DeepEncode/train_model.py

import os
import json
import tensorflow as tf
import numpy as np
import cv2
from video_compression_model import NUM_FRAMES, VideoCompressionModel, PRESET_SPEED_CATEGORIES

# Constants
NUM_CHANNELS = 3     # Number of color channels in the video frames (RGB images have 3 channels)
BATCH_SIZE = 16      # Batch size used during training
EPOCHS = 1           # Number of training epochs
TRAIN_SAMPLES = 1  # number of frames to extract

# Step 1: Data Preparation

def load_list(list_path):
    with open(list_path, "r") as json_file:
        video_details_list = json.load(json_file)
    return video_details_list

# Update load_frames_from_video function to resize frames
def load_frames_from_video(video_file, num_frames):
    print("Extracting video frames...")
    cap = cv2.VideoCapture(video_file)
    frames = []
    count = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        #frame = cv2.resize(frame, (target_width, target_height))
        frames.append(frame)
        count += 1
        if count >= num_frames:
            break
    cap.release()
    width, height = frame.shape[:2]
    return frames, width, height

def preprocess(frames):
    return np.array(frames) / 255.0

def save_model(model, file):
    os.makedirs("models", exist_ok=True)
    model.save(os.path.join("models/", file))
    print("Model saved successfully!")

# Update load_video_from_list function to provide target_width and target_height
def load_video_from_list(list_path):
    details_list = load_list(list_path)
    all_frames = []
    all_details = []
    for video_details in details_list:
        VIDEO_FILE = video_details["video_file"]
        CRF = video_details['crf'] / 63.0
        PRESET_SPEED = PRESET_SPEED_CATEGORIES.index(video_details['preset_speed'])
        video_details['preset_speed'] = PRESET_SPEED

        # Update load_frames_from_video calls with target_width and target_height
        #train_frames, w, h = load_frames_from_video(os.path.join("test_data/", VIDEO_FILE), TRAIN_SAMPLES, target_width, target_height)
        train_frames, w, h = load_frames_from_video(os.path.join("test_data/", VIDEO_FILE), NUM_FRAMES * TRAIN_SAMPLES)
        all_frames.extend(train_frames)
        all_details.append({
            "frames": train_frames,
            "width": w,
            "height": h,
            "crf": CRF,
            "preset_speed": PRESET_SPEED,
            "video_file": VIDEO_FILE
        })
    return all_details

def generate_frame_sequences(frames):
    # Generate sequences of frames for the model
    sequences = []
    labels = []
    for i in range(len(frames) - NUM_FRAMES + 1):
        sequence = frames[i:i+NUM_FRAMES]
        sequences.append(sequence)
        # Use the last frame of the sequence as the label
        labels.append(sequence[-1])
    return np.array(sequences), np.array(labels)


def main():
    #target_width = 640  # Choose a fixed width for the frames
    #target_height = 360  # Choose a fixed height for the frames

    all_video_details = load_video_from_list("test_data/training.json")

    model = VideoCompressionModel(NUM_CHANNELS, NUM_FRAMES)
    model.compile(loss='mean_squared_error', optimizer='adam')

    for video_details in all_video_details:
        train_frames = video_details["frames"]
        val_frames = train_frames.copy()  # For simplicity, using the same frames for validation

        train_frames = preprocess(train_frames)
        val_frames = preprocess(val_frames)

        train_sequences, train_labels = generate_frame_sequences(train_frames)
        val_sequences, val_labels = generate_frame_sequences(val_frames)

        num_sequences = len(train_sequences)
        crf_array = np.full((num_sequences, 1), video_details['crf'])
        preset_speed_array = np.full((num_sequences, 1), video_details['preset_speed'])

        print("\nTraining the model for video:", video_details["video_file"])
        model.fit(
            {"frames": train_sequences, "crf": crf_array, "preset_speed": preset_speed_array},
            train_labels,  # Use train_labels as the ground truth
            batch_size=BATCH_SIZE,
            epochs=EPOCHS,
            validation_data=({"frames": val_sequences, "crf": crf_array, "preset_speed": preset_speed_array},
                             val_labels)  # Use val_labels as the ground truth for validation
        )
        print("\nTraining completed for video:", video_details["video_file"])

    save_model(model, 'model.keras')

if __name__ == "__main__":
    main()