sequenced based

2023-07-24 23:56:46 +01:00 · 2023-07-24 23:56:46 +01:00 · d0f0b21cb5
commit d0f0b21cb5
parent 80c5f2216d
3 changed files with 150 additions and 61 deletions
--- a/train_model.py
+++ b/train_model.py
@ -1,81 +1,123 @@
 import os
+import json
 import tensorflow as tf
 import numpy as np
 import cv2
-from video_compression_model import VideoCompressionModel
+from video_compression_model import NUM_FRAMES, VideoCompressionModel, PRESET_SPEED_CATEGORIES

 # Constants
 NUM_CHANNELS = 3     # Number of color channels in the video frames (RGB images have 3 channels)
-BATCH_SIZE = 32       # Batch size used during training
-EPOCHS = 20           # Number of training epochs
+BATCH_SIZE = 16      # Batch size used during training
+EPOCHS = 1           # Number of training epochs
+TRAIN_SAMPLES = 1  # number of frames to extract

 # Step 1: Data Preparation
-TRAIN_VIDEO_FILE = 'test_data/native_video.mkv'  # The training video file name
-VAL_VIDEO_FILE = 'test_data/training_video.mkv'      # The validation video file name
-TRAIN_SAMPLES = 2  # Number of video frames used for training
-VAL_SAMPLES = 2     # Number of video frames used for validation

+def load_list(list_path):
+    with open(list_path, "r") as json_file:
+        video_details_list = json.load(json_file)
+    return video_details_list
+
+# Update load_frames_from_video function to resize frames
 def load_frames_from_video(video_file, num_frames):
    print("Extracting video frames...")
    cap = cv2.VideoCapture(video_file)
    frames = []
    count = 0
-    frame_width, frame_height = None, None  # Initialize the frame dimensions
    while True:
        ret, frame = cap.read()
        if not ret:
            break
-        if frame_width is None or frame_height is None:
-            frame_height, frame_width = frame.shape[:2]  # Get the frame dimensions from the first frame
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        #frame = cv2.resize(frame, (target_width, target_height))
        frames.append(frame)
        count += 1
        if count >= num_frames:
            break
    cap.release()
-    return frames, frame_width, frame_height  # Return frames and frame dimensions
-
-train_frames, FRAME_WIDTH, FRAME_HEIGHT = load_frames_from_video(TRAIN_VIDEO_FILE, num_frames=TRAIN_SAMPLES)
-val_frames, _, _ = load_frames_from_video(VAL_VIDEO_FILE, num_frames=VAL_SAMPLES)
-
-
-print("Number of training frames:", len(train_frames))
-print("Number of validation frames:", len(val_frames))
+    width, height = frame.shape[:2]
+    return frames, width, height

 def preprocess(frames):
-    frames = np.array(frames) / 255.0
-    return frames
+    return np.array(frames) / 255.0

-train_frames = preprocess(train_frames)
-val_frames = preprocess(val_frames)
+def save_model(model, file):
+    os.makedirs("models", exist_ok=True)
+    model.save(os.path.join("models/", file))
+    print("Model saved successfully!")

-print("training frames:", len(train_frames))
-print("validation frames:", len(val_frames))
+# Update load_video_from_list function to provide target_width and target_height
+def load_video_from_list(list_path):
+    details_list = load_list(list_path)
+    all_frames = []
+    all_details = []
+    for video_details in details_list:
+        VIDEO_FILE = video_details["video_file"]
+        CRF = video_details['crf'] / 63.0
+        PRESET_SPEED = PRESET_SPEED_CATEGORIES.index(video_details['preset_speed'])
+        video_details['preset_speed'] = PRESET_SPEED

-# Step 2: Model Architecture
-model = VideoCompressionModel()
+        # Update load_frames_from_video calls with target_width and target_height
+        #train_frames, w, h = load_frames_from_video(os.path.join("test_data/", VIDEO_FILE), TRAIN_SAMPLES, target_width, target_height)
+        train_frames, w, h = load_frames_from_video(os.path.join("test_data/", VIDEO_FILE), NUM_FRAMES * TRAIN_SAMPLES)
+        all_frames.extend(train_frames)
+        all_details.append({
+            "frames": train_frames,
+            "width": w,
+            "height": h,
+            "crf": CRF,
+            "preset_speed": PRESET_SPEED,
+            "video_file": VIDEO_FILE
+        })
+    return all_details

-model.compile(loss='mean_squared_error', optimizer='adam', run_eagerly=True)
+def generate_frame_sequences(frames):
+    # Generate sequences of frames for the model
+    sequences = []
+    labels = []
+    for i in range(len(frames) - NUM_FRAMES + 1):
+        sequence = frames[i:i+NUM_FRAMES]
+        sequences.append(sequence)
+        # Use the last frame of the sequence as the label
+        labels.append(sequence[-1])
+    return np.array(sequences), np.array(labels)

-# Adjusting the input shape for training and validation
-frame_height, frame_width = train_frames[0].shape[:2]

-# Use the resized frames as target data
-train_targets = train_frames
-val_targets = val_frames
+def main():
+    #target_width = 640  # Choose a fixed width for the frames
+    #target_height = 360  # Choose a fixed height for the frames

-# Create the "models" directory if it doesn't exist
-os.makedirs("models", exist_ok=True)
+    all_video_details = load_video_from_list("test_data/training.json")

-print("\nTraining the model...")
-model.fit(
-    train_frames, [train_targets, tf.zeros_like(train_targets)],
-    batch_size=BATCH_SIZE,
-    epochs=EPOCHS,
-    validation_data=(val_frames, [val_targets, tf.zeros_like(val_targets)])
-)
-print("\nTraining completed.")
+    model = VideoCompressionModel(NUM_CHANNELS, NUM_FRAMES)
+    model.compile(loss='mean_squared_error', optimizer='adam')

-# Step 3: Save the trained model
-model.save('models/model.keras')
-print("Model saved successfully!")
+    for video_details in all_video_details:
+        train_frames = video_details["frames"]
+        val_frames = train_frames.copy()  # For simplicity, using the same frames for validation
+
+        train_frames = preprocess(train_frames)
+        val_frames = preprocess(val_frames)
+
+        train_sequences, train_labels = generate_frame_sequences(train_frames)
+        val_sequences, val_labels = generate_frame_sequences(val_frames)
+
+        num_sequences = len(train_sequences)
+        crf_array = np.full((num_sequences, 1), video_details['crf'])
+        preset_speed_array = np.full((num_sequences, 1), video_details['preset_speed'])
+
+        print("\nTraining the model for video:", video_details["video_file"])
+        model.fit(
+            {"frames": train_sequences, "crf": crf_array, "preset_speed": preset_speed_array},
+            train_labels,  # Use train_labels as the ground truth
+            batch_size=BATCH_SIZE,
+            epochs=EPOCHS,
+            validation_data=({"frames": val_sequences, "crf": crf_array, "preset_speed": preset_speed_array},
+                             val_labels)  # Use val_labels as the ground truth for validation
+        )
+        print("\nTraining completed for video:", video_details["video_file"])
+
+    save_model(model, 'model.keras')
+
+if __name__ == "__main__":
+    main()