diff --git a/DeepEncode.py b/DeepEncode.py index 8c20ecd..283da48 100644 --- a/DeepEncode.py +++ b/DeepEncode.py @@ -1,7 +1,7 @@ import tensorflow as tf import numpy as np import cv2 -from video_compression_model import VideoCompressionModel +from video_compression_model import NUM_FRAMES, PRESET_SPEED_CATEGORIES, VideoCompressionModel # Constants NUM_CHANNELS = 3 @@ -10,7 +10,7 @@ NUM_CHANNELS = 3 model = tf.keras.models.load_model('models/model.keras', custom_objects={'VideoCompressionModel': VideoCompressionModel}) # Step 3: Load the uncompressed video -UNCOMPRESSED_VIDEO_FILE = 'test_data/test_video.mkv' +UNCOMPRESSED_VIDEO_FILE = 'test_data/training_video.mkv' def load_frames_from_video(video_file, num_frames = 0): print("Extracting video frames...") @@ -32,19 +32,40 @@ def load_frames_from_video(video_file, num_frames = 0): print("Extraction Complete") return frames -uncompressed_frames = load_frames_from_video(UNCOMPRESSED_VIDEO_FILE, 200) -if len(uncompressed_frames) == 0 or None: +uncompressed_frames = load_frames_from_video(UNCOMPRESSED_VIDEO_FILE, 100) +if not uncompressed_frames: print("IO ERROR!") exit() uncompressed_frames = np.array(uncompressed_frames) / 255.0 -if len(uncompressed_frames) == 0 or None: - print("np.array ERROR!") - exit() +# Generate sequences of frames for prediction +uncompressed_frame_sequences = [] +for i in range(len(uncompressed_frames) - NUM_FRAMES + 1): + sequence = uncompressed_frames[i:i+NUM_FRAMES] + uncompressed_frame_sequences.append(sequence) +uncompressed_frame_sequences = np.array(uncompressed_frame_sequences) + +#for frame in uncompressed_frames: +# cv2.imshow('Frame', frame) +# cv2.waitKey(50) # Display each frame for 1 second + # Step 4: Compress the video frames using the loaded model -compressed_frames = model.predict(uncompressed_frames) +crf_values = np.full((len(uncompressed_frame_sequences), 1), 25, dtype=np.float32) # Added dtype argument + +preset_speed_index = PRESET_SPEED_CATEGORIES.index("fast") +preset_speed_values = np.full((len(uncompressed_frame_sequences), 1), preset_speed_index, dtype=np.float32) # Added dtype argument + +compressed_frame_sequences = model.predict({"frames": uncompressed_frame_sequences, "crf": crf_values, "preset_speed": preset_speed_values}) + +# We'll use the last frame of each sequence as the compressed frame +#compressed_frames = compressed_frame_sequences[:, -1] + +#for frame in compressed_frame_sequences: +# cv2.imshow('Compressed Frame', frame) +# cv2.waitKey(50) + # Step 5: Save the compressed video frames COMPRESSED_VIDEO_FILE = 'compressed_video.mkv' @@ -60,5 +81,5 @@ def save_frames_as_video(frames, video_file): out.write(frame) out.release() -save_frames_as_video(compressed_frames, COMPRESSED_VIDEO_FILE) +save_frames_as_video(compressed_frame_sequences, COMPRESSED_VIDEO_FILE) print("Compression completed.") diff --git a/train_model.py b/train_model.py index 304c483..d410e1c 100644 --- a/train_model.py +++ b/train_model.py @@ -1,81 +1,123 @@ import os +import json import tensorflow as tf import numpy as np import cv2 -from video_compression_model import VideoCompressionModel +from video_compression_model import NUM_FRAMES, VideoCompressionModel, PRESET_SPEED_CATEGORIES # Constants NUM_CHANNELS = 3 # Number of color channels in the video frames (RGB images have 3 channels) -BATCH_SIZE = 32 # Batch size used during training -EPOCHS = 20 # Number of training epochs +BATCH_SIZE = 16 # Batch size used during training +EPOCHS = 1 # Number of training epochs +TRAIN_SAMPLES = 1 # number of frames to extract # Step 1: Data Preparation -TRAIN_VIDEO_FILE = 'test_data/native_video.mkv' # The training video file name -VAL_VIDEO_FILE = 'test_data/training_video.mkv' # The validation video file name -TRAIN_SAMPLES = 2 # Number of video frames used for training -VAL_SAMPLES = 2 # Number of video frames used for validation +def load_list(list_path): + with open(list_path, "r") as json_file: + video_details_list = json.load(json_file) + return video_details_list + +# Update load_frames_from_video function to resize frames def load_frames_from_video(video_file, num_frames): print("Extracting video frames...") cap = cv2.VideoCapture(video_file) frames = [] count = 0 - frame_width, frame_height = None, None # Initialize the frame dimensions while True: ret, frame = cap.read() if not ret: break - if frame_width is None or frame_height is None: - frame_height, frame_width = frame.shape[:2] # Get the frame dimensions from the first frame frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + #frame = cv2.resize(frame, (target_width, target_height)) frames.append(frame) count += 1 if count >= num_frames: break cap.release() - return frames, frame_width, frame_height # Return frames and frame dimensions - -train_frames, FRAME_WIDTH, FRAME_HEIGHT = load_frames_from_video(TRAIN_VIDEO_FILE, num_frames=TRAIN_SAMPLES) -val_frames, _, _ = load_frames_from_video(VAL_VIDEO_FILE, num_frames=VAL_SAMPLES) - - -print("Number of training frames:", len(train_frames)) -print("Number of validation frames:", len(val_frames)) + width, height = frame.shape[:2] + return frames, width, height def preprocess(frames): - frames = np.array(frames) / 255.0 - return frames + return np.array(frames) / 255.0 -train_frames = preprocess(train_frames) -val_frames = preprocess(val_frames) +def save_model(model, file): + os.makedirs("models", exist_ok=True) + model.save(os.path.join("models/", file)) + print("Model saved successfully!") -print("training frames:", len(train_frames)) -print("validation frames:", len(val_frames)) +# Update load_video_from_list function to provide target_width and target_height +def load_video_from_list(list_path): + details_list = load_list(list_path) + all_frames = [] + all_details = [] + for video_details in details_list: + VIDEO_FILE = video_details["video_file"] + CRF = video_details['crf'] / 63.0 + PRESET_SPEED = PRESET_SPEED_CATEGORIES.index(video_details['preset_speed']) + video_details['preset_speed'] = PRESET_SPEED -# Step 2: Model Architecture -model = VideoCompressionModel() + # Update load_frames_from_video calls with target_width and target_height + #train_frames, w, h = load_frames_from_video(os.path.join("test_data/", VIDEO_FILE), TRAIN_SAMPLES, target_width, target_height) + train_frames, w, h = load_frames_from_video(os.path.join("test_data/", VIDEO_FILE), NUM_FRAMES * TRAIN_SAMPLES) + all_frames.extend(train_frames) + all_details.append({ + "frames": train_frames, + "width": w, + "height": h, + "crf": CRF, + "preset_speed": PRESET_SPEED, + "video_file": VIDEO_FILE + }) + return all_details -model.compile(loss='mean_squared_error', optimizer='adam', run_eagerly=True) +def generate_frame_sequences(frames): + # Generate sequences of frames for the model + sequences = [] + labels = [] + for i in range(len(frames) - NUM_FRAMES + 1): + sequence = frames[i:i+NUM_FRAMES] + sequences.append(sequence) + # Use the last frame of the sequence as the label + labels.append(sequence[-1]) + return np.array(sequences), np.array(labels) -# Adjusting the input shape for training and validation -frame_height, frame_width = train_frames[0].shape[:2] -# Use the resized frames as target data -train_targets = train_frames -val_targets = val_frames +def main(): + #target_width = 640 # Choose a fixed width for the frames + #target_height = 360 # Choose a fixed height for the frames -# Create the "models" directory if it doesn't exist -os.makedirs("models", exist_ok=True) + all_video_details = load_video_from_list("test_data/training.json") -print("\nTraining the model...") -model.fit( - train_frames, [train_targets, tf.zeros_like(train_targets)], - batch_size=BATCH_SIZE, - epochs=EPOCHS, - validation_data=(val_frames, [val_targets, tf.zeros_like(val_targets)]) -) -print("\nTraining completed.") + model = VideoCompressionModel(NUM_CHANNELS, NUM_FRAMES) + model.compile(loss='mean_squared_error', optimizer='adam') -# Step 3: Save the trained model -model.save('models/model.keras') -print("Model saved successfully!") + for video_details in all_video_details: + train_frames = video_details["frames"] + val_frames = train_frames.copy() # For simplicity, using the same frames for validation + + train_frames = preprocess(train_frames) + val_frames = preprocess(val_frames) + + train_sequences, train_labels = generate_frame_sequences(train_frames) + val_sequences, val_labels = generate_frame_sequences(val_frames) + + num_sequences = len(train_sequences) + crf_array = np.full((num_sequences, 1), video_details['crf']) + preset_speed_array = np.full((num_sequences, 1), video_details['preset_speed']) + + print("\nTraining the model for video:", video_details["video_file"]) + model.fit( + {"frames": train_sequences, "crf": crf_array, "preset_speed": preset_speed_array}, + train_labels, # Use train_labels as the ground truth + batch_size=BATCH_SIZE, + epochs=EPOCHS, + validation_data=({"frames": val_sequences, "crf": crf_array, "preset_speed": preset_speed_array}, + val_labels) # Use val_labels as the ground truth for validation + ) + print("\nTraining completed for video:", video_details["video_file"]) + + save_model(model, 'model.keras') + +if __name__ == "__main__": + main() diff --git a/video_compression_model.py b/video_compression_model.py index 7f49848..47cc0b8 100644 --- a/video_compression_model.py +++ b/video_compression_model.py @@ -1,27 +1,53 @@ import tensorflow as tf +PRESET_SPEED_CATEGORIES = ["ultrafast", "superfast", "veryfast", "faster", "fast", "medium", "slow", "slower", "veryslow"] +NUM_PRESET_SPEEDS = len(PRESET_SPEED_CATEGORIES) +NUM_FRAMES = 5 # Number of consecutive frames in a sequence + class VideoCompressionModel(tf.keras.Model): - def __init__(self, NUM_CHANNELS=3): + def __init__(self, NUM_CHANNELS=3, NUM_FRAMES=5): super(VideoCompressionModel, self).__init__() + + self.NUM_CHANNELS = NUM_CHANNELS + self.NUM_FRAMES = NUM_FRAMES + + # Embedding layer for preset_speed + self.preset_embedding = tf.keras.layers.Embedding(NUM_PRESET_SPEEDS, 16) # Encoder layers self.encoder = tf.keras.Sequential([ - tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(None, None, NUM_CHANNELS)), + tf.keras.layers.Conv3D(32, (3, 3, 3), activation='relu', padding='same', input_shape=(None, None, None, NUM_CHANNELS + 1 + 16)), # Notice the adjusted channel number + tf.keras.layers.MaxPooling3D((2, 2, 2)), # Add more encoder layers as needed ]) # Decoder layers self.decoder = tf.keras.Sequential([ - tf.keras.layers.Conv2DTranspose(32, (3, 3), activation='relu', padding='same'), + tf.keras.layers.Conv3DTranspose(32, (3, 3, 3), activation='relu', padding='same'), + tf.keras.layers.UpSampling3D((2, 2, 2)), # Add more decoder layers as needed - tf.keras.layers.Conv2D(NUM_CHANNELS, (3, 3), activation='sigmoid', padding='same') # Output layer for video frames + tf.keras.layers.Conv3D(NUM_CHANNELS, (3, 3, 3), activation='sigmoid', padding='same') # Output layer for video frames ]) def call(self, inputs): + frames = inputs["frames"] + crf = tf.expand_dims(inputs["crf"], -1) + preset_speed = inputs["preset_speed"] + + # Convert preset_speed to embeddings + preset_embedding = self.preset_embedding(preset_speed) + preset_embedding = tf.keras.layers.Flatten()(preset_embedding) + + # Concatenate crf and preset_embedding to frames + frames_shape = tf.shape(frames) + repeated_crf = tf.tile(tf.reshape(crf, (-1, 1, 1, 1, 1)), [1, frames_shape[1], frames_shape[2], frames_shape[3], 1]) + repeated_preset = tf.tile(tf.reshape(preset_embedding, (-1, 1, 1, 1, 16)), [1, frames_shape[1], frames_shape[2], frames_shape[3], 1]) + + frames = tf.concat([frames, repeated_crf, repeated_preset], axis=-1) + # Encoding the video frames - compressed_representation = self.encoder(inputs) + compressed_representation = self.encoder(frames) # Decoding to generate compressed video frames reconstructed_frames = self.decoder(compressed_representation) - - return reconstructed_frames \ No newline at end of file + return reconstructed_frames[:,-1,:,:,:]