optimisation

2023-07-25 01:42:16 +01:00 · 2023-07-25 01:42:16 +01:00 · b97293d7ca
commit b97293d7ca
parent d0f0b21cb5
3 changed files with 112 additions and 97 deletions
--- a/train_model.py
+++ b/train_model.py
@ -3,22 +3,19 @@ import json
 import tensorflow as tf
 import numpy as np
 import cv2
-from video_compression_model import NUM_FRAMES, VideoCompressionModel, PRESET_SPEED_CATEGORIES
+from video_compression_model import NUM_CHANNELS, NUM_FRAMES, VideoCompressionModel, PRESET_SPEED_CATEGORIES
+from tensorflow.keras.callbacks import EarlyStopping

 # Constants
-NUM_CHANNELS = 3     # Number of color channels in the video frames (RGB images have 3 channels)
-BATCH_SIZE = 16      # Batch size used during training
-EPOCHS = 1           # Number of training epochs
-TRAIN_SAMPLES = 1  # number of frames to extract
-
-# Step 1: Data Preparation
+BATCH_SIZE = 16
+EPOCHS = 1
+TRAIN_SAMPLES = 1

 def load_list(list_path):
    with open(list_path, "r") as json_file:
        video_details_list = json.load(json_file)
    return video_details_list

-# Update load_frames_from_video function to resize frames
 def load_frames_from_video(video_file, num_frames):
    print("Extracting video frames...")
    cap = cv2.VideoCapture(video_file)
@ -29,7 +26,6 @@ def load_frames_from_video(video_file, num_frames):
        if not ret:
            break
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-        #frame = cv2.resize(frame, (target_width, target_height))
        frames.append(frame)
        count += 1
        if count >= num_frames:
@ -46,7 +42,6 @@ def save_model(model, file):
    model.save(os.path.join("models/", file))
    print("Model saved successfully!")

-# Update load_video_from_list function to provide target_width and target_height
 def load_video_from_list(list_path):
    details_list = load_list(list_path)
    all_frames = []
@ -57,8 +52,6 @@ def load_video_from_list(list_path):
        PRESET_SPEED = PRESET_SPEED_CATEGORIES.index(video_details['preset_speed'])
        video_details['preset_speed'] = PRESET_SPEED

-        # Update load_frames_from_video calls with target_width and target_height
-        #train_frames, w, h = load_frames_from_video(os.path.join("test_data/", VIDEO_FILE), TRAIN_SAMPLES, target_width, target_height)
        train_frames, w, h = load_frames_from_video(os.path.join("test_data/", VIDEO_FILE), NUM_FRAMES * TRAIN_SAMPLES)
        all_frames.extend(train_frames)
        all_details.append({
@ -72,52 +65,61 @@ def load_video_from_list(list_path):
    return all_details

 def generate_frame_sequences(frames):
-    # Generate sequences of frames for the model
    sequences = []
    labels = []
-    for i in range(len(frames) - NUM_FRAMES + 1):
-        sequence = frames[i:i+NUM_FRAMES]
+    for i in range(len(frames) - NUM_FRAMES + 2):
+        sequence = frames[i:i+NUM_FRAMES-1]
        sequences.append(sequence)
-        # Use the last frame of the sequence as the label
        labels.append(sequence[-1])
    return np.array(sequences), np.array(labels)

+def frame_difference(frames):
+    differences = []
+    for i in range(1, len(frames)):
+        differences.append(cv2.absdiff(frames[i], frames[i-1]))
+    return differences

 def main():
-    #target_width = 640  # Choose a fixed width for the frames
-    #target_height = 360  # Choose a fixed height for the frames
-
-    all_video_details = load_video_from_list("test_data/training.json")
+    all_video_details_train = load_video_from_list("test_data/training.json")
+    all_video_details_val = load_video_from_list("test_data/validation.json")

    model = VideoCompressionModel(NUM_CHANNELS, NUM_FRAMES)
    model.compile(loss='mean_squared_error', optimizer='adam')

-    for video_details in all_video_details:
-        train_frames = video_details["frames"]
-        val_frames = train_frames.copy()  # For simplicity, using the same frames for validation
+    early_stop = EarlyStopping(monitor='val_loss', patience=3, verbose=1, restore_best_weights=True)

-        train_frames = preprocess(train_frames)
-        val_frames = preprocess(val_frames)
+    for video_details_train, video_details_val in zip(all_video_details_train, all_video_details_val):
+        train_frames = video_details_train["frames"]
+        val_frames = video_details_val["frames"]

-        train_sequences, train_labels = generate_frame_sequences(train_frames)
-        val_sequences, val_labels = generate_frame_sequences(val_frames)
+        train_differences = frame_difference(preprocess(train_frames))
+        val_differences = frame_difference(preprocess(val_frames))

-        num_sequences = len(train_sequences)
-        crf_array = np.full((num_sequences, 1), video_details['crf'])
-        preset_speed_array = np.full((num_sequences, 1), video_details['preset_speed'])
+        train_sequences, train_labels = generate_frame_sequences(train_differences)
+        val_sequences, val_labels = generate_frame_sequences(val_differences)

-        print("\nTraining the model for video:", video_details["video_file"])
+        num_sequences_train = len(train_sequences)
+        num_sequences_val = len(val_sequences)
+        crf_array_train = np.full((num_sequences_train, 1), video_details_train['crf'])
+        crf_array_val = np.full((num_sequences_val, 1), video_details_val['crf'])
+        preset_speed_array_train = np.full((num_sequences_train, 1), video_details_train['preset_speed'])
+        preset_speed_array_val = np.full((num_sequences_val, 1), video_details_val['preset_speed'])
+
+        print(len(train_sequences))
+        print(len(val_sequences))
+
+        print("\nTraining the model for video:", video_details_train["video_file"])
        model.fit(
-            {"frames": train_sequences, "crf": crf_array, "preset_speed": preset_speed_array},
-            train_labels,  # Use train_labels as the ground truth
+            {"frames": train_sequences, "crf": crf_array_train, "preset_speed": preset_speed_array_train},
+            train_labels,
            batch_size=BATCH_SIZE,
            epochs=EPOCHS,
-            validation_data=({"frames": val_sequences, "crf": crf_array, "preset_speed": preset_speed_array},
-                             val_labels)  # Use val_labels as the ground truth for validation
+            validation_data=({"frames": val_sequences, "crf": crf_array_val, "preset_speed": preset_speed_array_val}, val_labels),
+            callbacks=[early_stop]
        )
-        print("\nTraining completed for video:", video_details["video_file"])
+        print("\nTraining completed for video:", video_details_train["video_file"])

-    save_model(model, 'model.keras')
+    save_model(model, 'model_differencing.keras')

 if __name__ == "__main__":
    main()