diff --git a/DeepEncode.py b/DeepEncode.py index af55038..a4df1bd 100644 --- a/DeepEncode.py +++ b/DeepEncode.py @@ -2,7 +2,7 @@ import os -from featureExtraction import preprocess_frame, psnr +from featureExtraction import preprocess_frame, psnr, scale_crf, scale_speed_preset from globalVars import PRESET_SPEED_CATEGORIES os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' @@ -10,12 +10,12 @@ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' import tensorflow as tf import numpy as np import cv2 -from video_compression_model import VideoCompressionModel +from video_compression_model import VideoCompressionModel, combine_batch # Constants COMPRESSED_VIDEO_FILE = 'compressed_video.avi' MAX_FRAMES = 0 # Limit the number of frames processed -CRF = 51 +CRF = 0 SPEED = PRESET_SPEED_CATEGORIES.index("ultrafast") # Load the trained model @@ -38,10 +38,13 @@ def predict_frame(uncompressed_frame): #display_frame = np.clip(cv2.cvtColor(uncompressed_frame, cv2.COLOR_BGR2RGB) * 255.0, 0, 255).astype(np.uint8) #cv2.imshow("uncomp", uncompressed_frame) + scaled_crf = scale_crf(CRF) + scaled_speed = scale_speed_preset(SPEED) - frame = preprocess_frame(uncompressed_frame, CRF, SPEED) + frame = combine_batch(uncompressed_frame, scaled_crf, scaled_speed) compressed_frame = MODEL.predict([np.expand_dims(frame, axis=0)])[0] + compressed_frame = compressed_frame[:, :, :3] # Keep only the first 3 channels (BGR) display_frame = np.clip(compressed_frame * 255.0, 0, 255).astype(np.uint8) diff --git a/featureExtraction.py b/featureExtraction.py index 9d6e277..01189a3 100644 --- a/featureExtraction.py +++ b/featureExtraction.py @@ -10,6 +10,12 @@ from tensorflow.keras import backend as K from globalVars import HEIGHT, NUM_PRESET_SPEEDS, WIDTH +def scale_crf(crf): + return crf / 51 + +def scale_speed_preset(speed): + return speed / NUM_PRESET_SPEEDS + def extract_edge_features(frame): """ diff --git a/video_compression_model.py b/video_compression_model.py index 03bfeb4..34b2ef2 100644 --- a/video_compression_model.py +++ b/video_compression_model.py @@ -4,7 +4,7 @@ import os import cv2 import numpy as np import tensorflow as tf -from featureExtraction import preprocess_frame +from featureExtraction import preprocess_frame, scale_crf, scale_speed_preset from globalVars import HEIGHT, LOGGER, NUM_COLOUR_CHANNELS, NUM_PRESET_SPEEDS, PRESET_SPEED_CATEGORIES, WIDTH @@ -13,10 +13,13 @@ from globalVars import HEIGHT, LOGGER, NUM_COLOUR_CHANNELS, NUM_PRESET_SPEEDS, P #policy = Policy('mixed_float16') #tf.keras.mixed_precision.set_global_policy(policy) -def combine_batch(frame, crf_array, speed_array): +def combine_batch(frame, crf, speed): # Preprocess the compressed frame (target) processed_frame = preprocess_frame(frame) + crf_array = np.full((HEIGHT, WIDTH, 1), crf) # Note the added dimension + speed_array = np.full((HEIGHT, WIDTH, 1), speed) # Note the added dimension + # Combine the frames with the CRF and SPEED images combined = np.concatenate([processed_frame, crf_array, speed_array], axis=-1) @@ -32,16 +35,8 @@ def data_generator(videos, batch_size): video_path = os.path.join(base_dir, video_details["compressed_video_file"]) uncompressed_video_path = os.path.join(base_dir, video_details["original_video_file"]) - CRF = video_details["crf"] / 51 - SPEED = PRESET_SPEED_CATEGORIES.index(video_details["preset_speed"]) / NUM_PRESET_SPEEDS - - # Create images with the CRF and SPEED values, filling extra channels - compressed_crf_array = np.full((HEIGHT, WIDTH, 1), CRF) # Note the added dimension - compressed_speed_array = np.full((HEIGHT, WIDTH, 1), SPEED) # Note the added dimension - - # Create images with the CRF and SPEED values, filling extra channels - uncompressed_crf_array = np.full((HEIGHT, WIDTH, 1), 0) # Note the added dimension - uncompressed_speed_array = np.full((HEIGHT, WIDTH, 1), PRESET_SPEED_CATEGORIES.index("veryslow") / NUM_PRESET_SPEEDS) # Note the added dimension + CRF = scale_crf(video_details["crf"]) + SPEED = scale_speed_preset(PRESET_SPEED_CATEGORIES.index(video_details["preset_speed"])) # Open the video files cap_compressed = cv2.VideoCapture(video_path) @@ -58,9 +53,9 @@ def data_generator(videos, batch_size): if not ret_compressed or not ret_uncompressed: break - compressed_combined = combine_batch(compressed_frame, compressed_crf_array, compressed_speed_array) + compressed_combined = combine_batch(compressed_frame, CRF, SPEED) - uncompressed_combined = combine_batch(uncompressed_frame, uncompressed_crf_array, uncompressed_speed_array) + uncompressed_combined = combine_batch(uncompressed_frame, 0, scale_speed_preset(PRESET_SPEED_CATEGORIES.index("veryslow"))) # Append processed frames to batches compressed_frame_batch.append(compressed_combined) @@ -103,7 +98,7 @@ class VideoCompressionModel(tf.keras.Model): tf.keras.layers.UpSampling2D((2, 2)), tf.keras.layers.Conv2DTranspose(64, (3, 3), activation='relu', padding='same'), tf.keras.layers.UpSampling2D((2, 2)), - tf.keras.layers.Conv2DTranspose(NUM_COLOUR_CHANNELS + 2, (3, 3), activation='sigmoid', padding='same') + tf.keras.layers.Conv2DTranspose(NUM_COLOUR_CHANNELS, (3, 3), activation='sigmoid', padding='same') ]) def call(self, inputs):