sequenced based

2023-07-24 23:56:46 +01:00 · 2023-07-24 23:56:46 +01:00 · d0f0b21cb5
commit d0f0b21cb5
parent 80c5f2216d
3 changed files with 150 additions and 61 deletions
--- a/DeepEncode.py
+++ b/DeepEncode.py
@ -1,7 +1,7 @@
 import tensorflow as tf
 import numpy as np
 import cv2
-from video_compression_model import VideoCompressionModel
+from video_compression_model import NUM_FRAMES, PRESET_SPEED_CATEGORIES, VideoCompressionModel
 # Constants
 NUM_CHANNELS = 3
@ -10,7 +10,7 @@ NUM_CHANNELS = 3
 model = tf.keras.models.load_model('models/model.keras', custom_objects={'VideoCompressionModel': VideoCompressionModel})
 # Step 3: Load the uncompressed video
-UNCOMPRESSED_VIDEO_FILE = 'test_data/test_video.mkv'
+UNCOMPRESSED_VIDEO_FILE = 'test_data/training_video.mkv'
 def load_frames_from_video(video_file, num_frames = 0):
    print("Extracting video frames...")
@ -32,19 +32,40 @@ def load_frames_from_video(video_file, num_frames = 0):
    print("Extraction Complete")
    return frames
-uncompressed_frames = load_frames_from_video(UNCOMPRESSED_VIDEO_FILE, 200)
+uncompressed_frames = load_frames_from_video(UNCOMPRESSED_VIDEO_FILE, 100)
-if len(uncompressed_frames) == 0 or None:
+if not uncompressed_frames:
    print("IO ERROR!")
    exit()
 uncompressed_frames = np.array(uncompressed_frames) / 255.0
-if len(uncompressed_frames) == 0 or None:
+# Generate sequences of frames for prediction
-    print("np.array ERROR!")
+uncompressed_frame_sequences = []
-    exit()
+for i in range(len(uncompressed_frames) - NUM_FRAMES + 1):
    sequence = uncompressed_frames[i:i+NUM_FRAMES]
    uncompressed_frame_sequences.append(sequence)
 uncompressed_frame_sequences = np.array(uncompressed_frame_sequences)
 #for frame in uncompressed_frames:
 #    cv2.imshow('Frame', frame)
 #    cv2.waitKey(50)  # Display each frame for 1 second
 # Step 4: Compress the video frames using the loaded model
-compressed_frames = model.predict(uncompressed_frames)
+crf_values = np.full((len(uncompressed_frame_sequences), 1), 25, dtype=np.float32)  # Added dtype argument
 preset_speed_index = PRESET_SPEED_CATEGORIES.index("fast")
 preset_speed_values = np.full((len(uncompressed_frame_sequences), 1), preset_speed_index, dtype=np.float32)  # Added dtype argument
 compressed_frame_sequences = model.predict({"frames": uncompressed_frame_sequences, "crf": crf_values, "preset_speed": preset_speed_values})
 # We'll use the last frame of each sequence as the compressed frame
 #compressed_frames = compressed_frame_sequences[:, -1]
 #for frame in compressed_frame_sequences:
 #    cv2.imshow('Compressed Frame', frame)
 #    cv2.waitKey(50)
 # Step 5: Save the compressed video frames
 COMPRESSED_VIDEO_FILE = 'compressed_video.mkv'
@ -60,5 +81,5 @@ def save_frames_as_video(frames, video_file):
        out.write(frame)
    out.release()
-save_frames_as_video(compressed_frames, COMPRESSED_VIDEO_FILE)
+save_frames_as_video(compressed_frame_sequences, COMPRESSED_VIDEO_FILE)
 print("Compression completed.")
--- a/train_model.py
+++ b/train_model.py
@ -1,81 +1,123 @@
 import os
 import json
 import tensorflow as tf
 import numpy as np
 import cv2
-from video_compression_model import VideoCompressionModel
+from video_compression_model import NUM_FRAMES, VideoCompressionModel, PRESET_SPEED_CATEGORIES
 # Constants
 NUM_CHANNELS = 3     # Number of color channels in the video frames (RGB images have 3 channels)
-BATCH_SIZE = 32       # Batch size used during training
+BATCH_SIZE = 16      # Batch size used during training
-EPOCHS = 20           # Number of training epochs
+EPOCHS = 1           # Number of training epochs
 TRAIN_SAMPLES = 1  # number of frames to extract
 # Step 1: Data Preparation
 TRAIN_VIDEO_FILE = 'test_data/native_video.mkv'  # The training video file name
 VAL_VIDEO_FILE = 'test_data/training_video.mkv'      # The validation video file name
 TRAIN_SAMPLES = 2  # Number of video frames used for training
 VAL_SAMPLES = 2     # Number of video frames used for validation
 def load_list(list_path):
    with open(list_path, "r") as json_file:
        video_details_list = json.load(json_file)
    return video_details_list
 # Update load_frames_from_video function to resize frames
 def load_frames_from_video(video_file, num_frames):
    print("Extracting video frames...")
    cap = cv2.VideoCapture(video_file)
    frames = []
    count = 0
    frame_width, frame_height = None, None  # Initialize the frame dimensions
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if frame_width is None or frame_height is None:
            frame_height, frame_width = frame.shape[:2]  # Get the frame dimensions from the first frame
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        #frame = cv2.resize(frame, (target_width, target_height))
        frames.append(frame)
        count += 1
        if count >= num_frames:
            break
    cap.release()
-    return frames, frame_width, frame_height  # Return frames and frame dimensions
+    width, height = frame.shape[:2]
-
+    return frames, width, height
 train_frames, FRAME_WIDTH, FRAME_HEIGHT = load_frames_from_video(TRAIN_VIDEO_FILE, num_frames=TRAIN_SAMPLES)
 val_frames, _, _ = load_frames_from_video(VAL_VIDEO_FILE, num_frames=VAL_SAMPLES)
 print("Number of training frames:", len(train_frames))
 print("Number of validation frames:", len(val_frames))
 def preprocess(frames):
-    frames = np.array(frames) / 255.0
+    return np.array(frames) / 255.0
    return frames
-train_frames = preprocess(train_frames)
+def save_model(model, file):
-val_frames = preprocess(val_frames)
+    os.makedirs("models", exist_ok=True)
    model.save(os.path.join("models/", file))
    print("Model saved successfully!")
-print("training frames:", len(train_frames))
+# Update load_video_from_list function to provide target_width and target_height
-print("validation frames:", len(val_frames))
+def load_video_from_list(list_path):
    details_list = load_list(list_path)
    all_frames = []
    all_details = []
    for video_details in details_list:
        VIDEO_FILE = video_details["video_file"]
        CRF = video_details['crf'] / 63.0
        PRESET_SPEED = PRESET_SPEED_CATEGORIES.index(video_details['preset_speed'])
        video_details['preset_speed'] = PRESET_SPEED
-# Step 2: Model Architecture
+        # Update load_frames_from_video calls with target_width and target_height
-model = VideoCompressionModel()
+        #train_frames, w, h = load_frames_from_video(os.path.join("test_data/", VIDEO_FILE), TRAIN_SAMPLES, target_width, target_height)
        train_frames, w, h = load_frames_from_video(os.path.join("test_data/", VIDEO_FILE), NUM_FRAMES * TRAIN_SAMPLES)
        all_frames.extend(train_frames)
        all_details.append({
            "frames": train_frames,
            "width": w,
            "height": h,
            "crf": CRF,
            "preset_speed": PRESET_SPEED,
            "video_file": VIDEO_FILE
        })
    return all_details
-model.compile(loss='mean_squared_error', optimizer='adam', run_eagerly=True)
+def generate_frame_sequences(frames):
    # Generate sequences of frames for the model
    sequences = []
    labels = []
    for i in range(len(frames) - NUM_FRAMES + 1):
        sequence = frames[i:i+NUM_FRAMES]
        sequences.append(sequence)
        # Use the last frame of the sequence as the label
        labels.append(sequence[-1])
    return np.array(sequences), np.array(labels)
 # Adjusting the input shape for training and validation
 frame_height, frame_width = train_frames[0].shape[:2]
-# Use the resized frames as target data
+def main():
-train_targets = train_frames
+    #target_width = 640  # Choose a fixed width for the frames
-val_targets = val_frames
+    #target_height = 360  # Choose a fixed height for the frames
-# Create the "models" directory if it doesn't exist
+    all_video_details = load_video_from_list("test_data/training.json")
 os.makedirs("models", exist_ok=True)
-print("\nTraining the model...")
+    model = VideoCompressionModel(NUM_CHANNELS, NUM_FRAMES)
-model.fit(
+    model.compile(loss='mean_squared_error', optimizer='adam')
    train_frames, [train_targets, tf.zeros_like(train_targets)],
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(val_frames, [val_targets, tf.zeros_like(val_targets)])
 )
 print("\nTraining completed.")
-# Step 3: Save the trained model
+    for video_details in all_video_details:
-model.save('models/model.keras')
+        train_frames = video_details["frames"]
-print("Model saved successfully!")
+        val_frames = train_frames.copy()  # For simplicity, using the same frames for validation
        train_frames = preprocess(train_frames)
        val_frames = preprocess(val_frames)
        train_sequences, train_labels = generate_frame_sequences(train_frames)
        val_sequences, val_labels = generate_frame_sequences(val_frames)
        num_sequences = len(train_sequences)
        crf_array = np.full((num_sequences, 1), video_details['crf'])
        preset_speed_array = np.full((num_sequences, 1), video_details['preset_speed'])
        print("\nTraining the model for video:", video_details["video_file"])
        model.fit(
            {"frames": train_sequences, "crf": crf_array, "preset_speed": preset_speed_array},
            train_labels,  # Use train_labels as the ground truth
            batch_size=BATCH_SIZE,
            epochs=EPOCHS,
            validation_data=({"frames": val_sequences, "crf": crf_array, "preset_speed": preset_speed_array},
                             val_labels)  # Use val_labels as the ground truth for validation
        )
        print("\nTraining completed for video:", video_details["video_file"])
    save_model(model, 'model.keras')
 if __name__ == "__main__":
    main()
--- a/video_compression_model.py
+++ b/video_compression_model.py
@ -1,27 +1,53 @@
 import tensorflow as tf
 PRESET_SPEED_CATEGORIES = ["ultrafast", "superfast", "veryfast", "faster", "fast", "medium", "slow", "slower", "veryslow"]
 NUM_PRESET_SPEEDS = len(PRESET_SPEED_CATEGORIES)
 NUM_FRAMES = 5       # Number of consecutive frames in a sequence
 class VideoCompressionModel(tf.keras.Model):
-    def __init__(self, NUM_CHANNELS=3):
+    def __init__(self, NUM_CHANNELS=3, NUM_FRAMES=5):
        super(VideoCompressionModel, self).__init__()
        self.NUM_CHANNELS = NUM_CHANNELS
        self.NUM_FRAMES = NUM_FRAMES
        # Embedding layer for preset_speed
        self.preset_embedding = tf.keras.layers.Embedding(NUM_PRESET_SPEEDS, 16)
        # Encoder layers
        self.encoder = tf.keras.Sequential([
-            tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(None, None, NUM_CHANNELS)),
+            tf.keras.layers.Conv3D(32, (3, 3, 3), activation='relu', padding='same', input_shape=(None, None, None, NUM_CHANNELS + 1 + 16)), # Notice the adjusted channel number
            tf.keras.layers.MaxPooling3D((2, 2, 2)),
            # Add more encoder layers as needed
        ])
        # Decoder layers
        self.decoder = tf.keras.Sequential([
-            tf.keras.layers.Conv2DTranspose(32, (3, 3), activation='relu', padding='same'),
+            tf.keras.layers.Conv3DTranspose(32, (3, 3, 3), activation='relu', padding='same'),
            tf.keras.layers.UpSampling3D((2, 2, 2)),
            # Add more decoder layers as needed
-            tf.keras.layers.Conv2D(NUM_CHANNELS, (3, 3), activation='sigmoid', padding='same')  # Output layer for video frames
+            tf.keras.layers.Conv3D(NUM_CHANNELS, (3, 3, 3), activation='sigmoid', padding='same')  # Output layer for video frames
        ])
    def call(self, inputs):
        frames = inputs["frames"]
        crf = tf.expand_dims(inputs["crf"], -1)
        preset_speed = inputs["preset_speed"]
        # Convert preset_speed to embeddings
        preset_embedding = self.preset_embedding(preset_speed)
        preset_embedding = tf.keras.layers.Flatten()(preset_embedding)
        # Concatenate crf and preset_embedding to frames
        frames_shape = tf.shape(frames)
        repeated_crf = tf.tile(tf.reshape(crf, (-1, 1, 1, 1, 1)), [1, frames_shape[1], frames_shape[2], frames_shape[3], 1])
        repeated_preset = tf.tile(tf.reshape(preset_embedding, (-1, 1, 1, 1, 16)), [1, frames_shape[1], frames_shape[2], frames_shape[3], 1])
        frames = tf.concat([frames, repeated_crf, repeated_preset], axis=-1)
        # Encoding the video frames
-        compressed_representation = self.encoder(inputs)
+        compressed_representation = self.encoder(frames)
        # Decoding to generate compressed video frames
        reconstructed_frames = self.decoder(compressed_representation)
-
+        return reconstructed_frames[:,-1,:,:,:]
        return reconstructed_frames