123 lines
4.5 KiB
Python
123 lines
4.5 KiB
Python
import os
|
|
import json
|
|
import tensorflow as tf
|
|
import numpy as np
|
|
import cv2
|
|
from video_compression_model import NUM_FRAMES, VideoCompressionModel, PRESET_SPEED_CATEGORIES
|
|
|
|
# Constants
|
|
NUM_CHANNELS = 3 # Number of color channels in the video frames (RGB images have 3 channels)
|
|
BATCH_SIZE = 16 # Batch size used during training
|
|
EPOCHS = 1 # Number of training epochs
|
|
TRAIN_SAMPLES = 1 # number of frames to extract
|
|
|
|
# Step 1: Data Preparation
|
|
|
|
def load_list(list_path):
|
|
with open(list_path, "r") as json_file:
|
|
video_details_list = json.load(json_file)
|
|
return video_details_list
|
|
|
|
# Update load_frames_from_video function to resize frames
|
|
def load_frames_from_video(video_file, num_frames):
|
|
print("Extracting video frames...")
|
|
cap = cv2.VideoCapture(video_file)
|
|
frames = []
|
|
count = 0
|
|
while True:
|
|
ret, frame = cap.read()
|
|
if not ret:
|
|
break
|
|
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
|
#frame = cv2.resize(frame, (target_width, target_height))
|
|
frames.append(frame)
|
|
count += 1
|
|
if count >= num_frames:
|
|
break
|
|
cap.release()
|
|
width, height = frame.shape[:2]
|
|
return frames, width, height
|
|
|
|
def preprocess(frames):
|
|
return np.array(frames) / 255.0
|
|
|
|
def save_model(model, file):
|
|
os.makedirs("models", exist_ok=True)
|
|
model.save(os.path.join("models/", file))
|
|
print("Model saved successfully!")
|
|
|
|
# Update load_video_from_list function to provide target_width and target_height
|
|
def load_video_from_list(list_path):
|
|
details_list = load_list(list_path)
|
|
all_frames = []
|
|
all_details = []
|
|
for video_details in details_list:
|
|
VIDEO_FILE = video_details["video_file"]
|
|
CRF = video_details['crf'] / 63.0
|
|
PRESET_SPEED = PRESET_SPEED_CATEGORIES.index(video_details['preset_speed'])
|
|
video_details['preset_speed'] = PRESET_SPEED
|
|
|
|
# Update load_frames_from_video calls with target_width and target_height
|
|
#train_frames, w, h = load_frames_from_video(os.path.join("test_data/", VIDEO_FILE), TRAIN_SAMPLES, target_width, target_height)
|
|
train_frames, w, h = load_frames_from_video(os.path.join("test_data/", VIDEO_FILE), NUM_FRAMES * TRAIN_SAMPLES)
|
|
all_frames.extend(train_frames)
|
|
all_details.append({
|
|
"frames": train_frames,
|
|
"width": w,
|
|
"height": h,
|
|
"crf": CRF,
|
|
"preset_speed": PRESET_SPEED,
|
|
"video_file": VIDEO_FILE
|
|
})
|
|
return all_details
|
|
|
|
def generate_frame_sequences(frames):
|
|
# Generate sequences of frames for the model
|
|
sequences = []
|
|
labels = []
|
|
for i in range(len(frames) - NUM_FRAMES + 1):
|
|
sequence = frames[i:i+NUM_FRAMES]
|
|
sequences.append(sequence)
|
|
# Use the last frame of the sequence as the label
|
|
labels.append(sequence[-1])
|
|
return np.array(sequences), np.array(labels)
|
|
|
|
|
|
def main():
|
|
#target_width = 640 # Choose a fixed width for the frames
|
|
#target_height = 360 # Choose a fixed height for the frames
|
|
|
|
all_video_details = load_video_from_list("test_data/training.json")
|
|
|
|
model = VideoCompressionModel(NUM_CHANNELS, NUM_FRAMES)
|
|
model.compile(loss='mean_squared_error', optimizer='adam')
|
|
|
|
for video_details in all_video_details:
|
|
train_frames = video_details["frames"]
|
|
val_frames = train_frames.copy() # For simplicity, using the same frames for validation
|
|
|
|
train_frames = preprocess(train_frames)
|
|
val_frames = preprocess(val_frames)
|
|
|
|
train_sequences, train_labels = generate_frame_sequences(train_frames)
|
|
val_sequences, val_labels = generate_frame_sequences(val_frames)
|
|
|
|
num_sequences = len(train_sequences)
|
|
crf_array = np.full((num_sequences, 1), video_details['crf'])
|
|
preset_speed_array = np.full((num_sequences, 1), video_details['preset_speed'])
|
|
|
|
print("\nTraining the model for video:", video_details["video_file"])
|
|
model.fit(
|
|
{"frames": train_sequences, "crf": crf_array, "preset_speed": preset_speed_array},
|
|
train_labels, # Use train_labels as the ground truth
|
|
batch_size=BATCH_SIZE,
|
|
epochs=EPOCHS,
|
|
validation_data=({"frames": val_sequences, "crf": crf_array, "preset_speed": preset_speed_array},
|
|
val_labels) # Use val_labels as the ground truth for validation
|
|
)
|
|
print("\nTraining completed for video:", video_details["video_file"])
|
|
|
|
save_model(model, 'model.keras')
|
|
|
|
if __name__ == "__main__":
|
|
main()
|