sequenced based

This commit is contained in:
Jordon Brooks 2023-07-24 23:56:46 +01:00
parent 80c5f2216d
commit d0f0b21cb5
3 changed files with 150 additions and 61 deletions

View file

@ -1,81 +1,123 @@
import os
import json
import tensorflow as tf
import numpy as np
import cv2
from video_compression_model import VideoCompressionModel
from video_compression_model import NUM_FRAMES, VideoCompressionModel, PRESET_SPEED_CATEGORIES
# Constants
NUM_CHANNELS = 3 # Number of color channels in the video frames (RGB images have 3 channels)
BATCH_SIZE = 32 # Batch size used during training
EPOCHS = 20 # Number of training epochs
BATCH_SIZE = 16 # Batch size used during training
EPOCHS = 1 # Number of training epochs
TRAIN_SAMPLES = 1 # number of frames to extract
# Step 1: Data Preparation
TRAIN_VIDEO_FILE = 'test_data/native_video.mkv' # The training video file name
VAL_VIDEO_FILE = 'test_data/training_video.mkv' # The validation video file name
TRAIN_SAMPLES = 2 # Number of video frames used for training
VAL_SAMPLES = 2 # Number of video frames used for validation
def load_list(list_path):
with open(list_path, "r") as json_file:
video_details_list = json.load(json_file)
return video_details_list
# Update load_frames_from_video function to resize frames
def load_frames_from_video(video_file, num_frames):
print("Extracting video frames...")
cap = cv2.VideoCapture(video_file)
frames = []
count = 0
frame_width, frame_height = None, None # Initialize the frame dimensions
while True:
ret, frame = cap.read()
if not ret:
break
if frame_width is None or frame_height is None:
frame_height, frame_width = frame.shape[:2] # Get the frame dimensions from the first frame
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
#frame = cv2.resize(frame, (target_width, target_height))
frames.append(frame)
count += 1
if count >= num_frames:
break
cap.release()
return frames, frame_width, frame_height # Return frames and frame dimensions
train_frames, FRAME_WIDTH, FRAME_HEIGHT = load_frames_from_video(TRAIN_VIDEO_FILE, num_frames=TRAIN_SAMPLES)
val_frames, _, _ = load_frames_from_video(VAL_VIDEO_FILE, num_frames=VAL_SAMPLES)
print("Number of training frames:", len(train_frames))
print("Number of validation frames:", len(val_frames))
width, height = frame.shape[:2]
return frames, width, height
def preprocess(frames):
frames = np.array(frames) / 255.0
return frames
return np.array(frames) / 255.0
train_frames = preprocess(train_frames)
val_frames = preprocess(val_frames)
def save_model(model, file):
os.makedirs("models", exist_ok=True)
model.save(os.path.join("models/", file))
print("Model saved successfully!")
print("training frames:", len(train_frames))
print("validation frames:", len(val_frames))
# Update load_video_from_list function to provide target_width and target_height
def load_video_from_list(list_path):
details_list = load_list(list_path)
all_frames = []
all_details = []
for video_details in details_list:
VIDEO_FILE = video_details["video_file"]
CRF = video_details['crf'] / 63.0
PRESET_SPEED = PRESET_SPEED_CATEGORIES.index(video_details['preset_speed'])
video_details['preset_speed'] = PRESET_SPEED
# Step 2: Model Architecture
model = VideoCompressionModel()
# Update load_frames_from_video calls with target_width and target_height
#train_frames, w, h = load_frames_from_video(os.path.join("test_data/", VIDEO_FILE), TRAIN_SAMPLES, target_width, target_height)
train_frames, w, h = load_frames_from_video(os.path.join("test_data/", VIDEO_FILE), NUM_FRAMES * TRAIN_SAMPLES)
all_frames.extend(train_frames)
all_details.append({
"frames": train_frames,
"width": w,
"height": h,
"crf": CRF,
"preset_speed": PRESET_SPEED,
"video_file": VIDEO_FILE
})
return all_details
model.compile(loss='mean_squared_error', optimizer='adam', run_eagerly=True)
def generate_frame_sequences(frames):
# Generate sequences of frames for the model
sequences = []
labels = []
for i in range(len(frames) - NUM_FRAMES + 1):
sequence = frames[i:i+NUM_FRAMES]
sequences.append(sequence)
# Use the last frame of the sequence as the label
labels.append(sequence[-1])
return np.array(sequences), np.array(labels)
# Adjusting the input shape for training and validation
frame_height, frame_width = train_frames[0].shape[:2]
# Use the resized frames as target data
train_targets = train_frames
val_targets = val_frames
def main():
#target_width = 640 # Choose a fixed width for the frames
#target_height = 360 # Choose a fixed height for the frames
# Create the "models" directory if it doesn't exist
os.makedirs("models", exist_ok=True)
all_video_details = load_video_from_list("test_data/training.json")
print("\nTraining the model...")
model.fit(
train_frames, [train_targets, tf.zeros_like(train_targets)],
batch_size=BATCH_SIZE,
epochs=EPOCHS,
validation_data=(val_frames, [val_targets, tf.zeros_like(val_targets)])
)
print("\nTraining completed.")
model = VideoCompressionModel(NUM_CHANNELS, NUM_FRAMES)
model.compile(loss='mean_squared_error', optimizer='adam')
# Step 3: Save the trained model
model.save('models/model.keras')
print("Model saved successfully!")
for video_details in all_video_details:
train_frames = video_details["frames"]
val_frames = train_frames.copy() # For simplicity, using the same frames for validation
train_frames = preprocess(train_frames)
val_frames = preprocess(val_frames)
train_sequences, train_labels = generate_frame_sequences(train_frames)
val_sequences, val_labels = generate_frame_sequences(val_frames)
num_sequences = len(train_sequences)
crf_array = np.full((num_sequences, 1), video_details['crf'])
preset_speed_array = np.full((num_sequences, 1), video_details['preset_speed'])
print("\nTraining the model for video:", video_details["video_file"])
model.fit(
{"frames": train_sequences, "crf": crf_array, "preset_speed": preset_speed_array},
train_labels, # Use train_labels as the ground truth
batch_size=BATCH_SIZE,
epochs=EPOCHS,
validation_data=({"frames": val_sequences, "crf": crf_array, "preset_speed": preset_speed_array},
val_labels) # Use val_labels as the ground truth for validation
)
print("\nTraining completed for video:", video_details["video_file"])
save_model(model, 'model.keras')
if __name__ == "__main__":
main()