import os import argparse import cv2 import numpy as np # Set TensorFlow log level before any other imports os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' import tensorflow as tf from featureExtraction import combined, combined_loss, combined_loss_weighted_psnr, psnr, scale_crf, scale_speed_preset, ssim from globalVars import PRESET_SPEED_CATEGORIES, clear_screen from video_compression_model import VideoCompressionModel, combine_batch # Constants COMPRESSED_VIDEO_FILE = 'compressed_video.avi' MAX_FRAMES = 0 # Limit the number of frames processed CRF = 10 SPEED = "ultrafast" MODEL_PATH = 'models/model.tf' UNCOMPRESSED_VIDEO_FILE = 'test_data/x264_crf-5_preset-veryslow.mkv' DISPLAY_OUTPUT = False CROP_DIMENSIONS = None def parse_arguments(): global COMPRESSED_VIDEO_FILE, MAX_FRAMES, CRF, SPEED, MODEL_PATH, UNCOMPRESSED_VIDEO_FILE, DISPLAY_OUTPUT, CROP_DIMENSIONS parser = argparse.ArgumentParser(description='Deep Encoding of Videos') parser.add_argument('-o', '--compressed_video_file', default=COMPRESSED_VIDEO_FILE, help='Path to the compressed video file') parser.add_argument('-m', '--max_frames', type=int, default=MAX_FRAMES, help='Maximum number of frames to process') parser.add_argument('-c', '--crf', type=int, default=CRF, help='CRF value for video compression') parser.add_argument('-s', '--speed', default=SPEED, choices=PRESET_SPEED_CATEGORIES, help='Video compression speed category') parser.add_argument('-p', '--model_path', default=MODEL_PATH, help='Path to the trained model') parser.add_argument('-i', '--uncompressed_video_file', default=UNCOMPRESSED_VIDEO_FILE, help='Path to the uncompressed video file') parser.add_argument('-d', '--display_output', action='store_true', default=DISPLAY_OUTPUT, help='Display real-time output to screen') parser.add_argument('--keep_black_bars', action='store_false', help='Keep black bars from the video', default=True) args = parser.parse_args() COMPRESSED_VIDEO_FILE = args.compressed_video_file MAX_FRAMES = args.max_frames CRF = args.crf SPEED = args.speed MODEL_PATH = args.model_path UNCOMPRESSED_VIDEO_FILE = args.uncompressed_video_file DISPLAY_OUTPUT = args.display_output if not args.keep_black_bars: CROP_DIMENSIONS = find_crop_dimensions(UNCOMPRESSED_VIDEO_FILE) def crop_black_bars(frame): # Convert to grayscale for easier processing gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # Threshold the image to make everything below a certain gray value black, and everything else white _, thresh = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY) # Find the contours of the white regions contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # Find the bounding box that contains all the contours x_min = y_min = float('inf') x_max = y_max = 0 for contour in contours: x, y, w, h = cv2.boundingRect(contour) x_min = min(x_min, x) y_min = min(y_min, y) x_max = max(x_max, x + w) y_max = max(y_max, y + h) return x_min, y_min, x_max, y_max def find_crop_dimensions(video_file): cap = cv2.VideoCapture(video_file) while True: ret, frame = cap.read() if not ret: print("Error: Unable to find a non-black frame.") cap.release() exit() # Check if the frame is entirely black if np.any(frame > 0): x_min, y_min, x_max, y_max = crop_black_bars(frame) cap.release() return x_min, y_min, x_max, y_max def load_frame_from_video(video_file, frame_num): cap = cv2.VideoCapture(video_file) cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num) ret, frame = cap.read() cap.release() return frame if ret else None def predict_frame(uncompressed_frame, model, crf, speed): # Scale the CRF and Speed values scaled_crf = scale_crf(crf) scaled_speed = scale_speed_preset(PRESET_SPEED_CATEGORIES.index(speed)) # Preprocess the frame frame = combine_batch(uncompressed_frame, resize=False) # Predict using the model inputs = { 'image': np.expand_dims(frame, axis=0), 'CRF': np.array([scaled_crf]), 'Speed': np.array([scaled_speed]) } compressed_frame = model.predict(inputs)[0] # Post-process the output frame return np.clip(compressed_frame * 255.0, 0, 255).astype(np.uint8) def main(): model = tf.keras.models.load_model(MODEL_PATH, custom_objects={'VideoCompressionModel': VideoCompressionModel, 'psnr': psnr, 'ssim': ssim, 'combined': combined, 'combined_loss': combined_loss, 'combined_loss_weighted_psnr': combined_loss_weighted_psnr}) cap = cv2.VideoCapture(UNCOMPRESSED_VIDEO_FILE) if MAX_FRAMES > 0: total_frames = min(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), MAX_FRAMES) else: total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) height, width, fps = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FPS)) cap.release() fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(COMPRESSED_VIDEO_FILE, fourcc, fps, (width, height), True) if not out.isOpened(): print("Error: VideoWriter could not be opened.") exit() for i in range(total_frames): uncompressed_frame = load_frame_from_video(UNCOMPRESSED_VIDEO_FILE, frame_num=i) if CROP_DIMENSIONS: x_min, y_min, x_max, y_max = CROP_DIMENSIONS uncompressed_frame = uncompressed_frame[y_min:y_max, x_min:x_max] compressed_frame = predict_frame(uncompressed_frame, model, CRF, SPEED) compressed_frame = cv2.resize(compressed_frame, (width, height)) compressed_frame = cv2.cvtColor(compressed_frame, cv2.COLOR_RGB2BGR) out.write(compressed_frame) if DISPLAY_OUTPUT: cv2.imshow('Compressed Video', compressed_frame) if cv2.waitKey(1) & 0xFF == ord('q'): break out.release() print("Compression completed.") if __name__ == '__main__': clear_screen() parse_arguments() main()