163 lines
6.1 KiB
Python
163 lines
6.1 KiB
Python
import os
|
|
import argparse
|
|
import cv2
|
|
import numpy as np
|
|
|
|
# Set TensorFlow log level before any other imports
|
|
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
|
|
|
|
import tensorflow as tf
|
|
from featureExtraction import combined, combined_loss, combined_loss_weighted_psnr, psnr, scale_crf, scale_speed_preset, ssim
|
|
from globalVars import PRESET_SPEED_CATEGORIES, clear_screen
|
|
from video_compression_model import VideoCompressionModel, combine_batch
|
|
|
|
# Constants
|
|
COMPRESSED_VIDEO_FILE = 'compressed_video.avi'
|
|
MAX_FRAMES = 0 # Limit the number of frames processed
|
|
CRF = 10
|
|
SPEED = "ultrafast"
|
|
MODEL_PATH = 'models/model.tf'
|
|
UNCOMPRESSED_VIDEO_FILE = 'test_data/x264_crf-5_preset-veryslow.mkv'
|
|
DISPLAY_OUTPUT = False
|
|
CROP_DIMENSIONS = None
|
|
|
|
|
|
|
|
def parse_arguments():
|
|
global COMPRESSED_VIDEO_FILE, MAX_FRAMES, CRF, SPEED, MODEL_PATH, UNCOMPRESSED_VIDEO_FILE, DISPLAY_OUTPUT, CROP_DIMENSIONS
|
|
parser = argparse.ArgumentParser(description='Deep Encoding of Videos')
|
|
parser.add_argument('-o', '--compressed_video_file', default=COMPRESSED_VIDEO_FILE, help='Path to the compressed video file')
|
|
parser.add_argument('-m', '--max_frames', type=int, default=MAX_FRAMES, help='Maximum number of frames to process')
|
|
parser.add_argument('-c', '--crf', type=int, default=CRF, help='CRF value for video compression')
|
|
parser.add_argument('-s', '--speed', default=SPEED, choices=PRESET_SPEED_CATEGORIES, help='Video compression speed category')
|
|
parser.add_argument('-p', '--model_path', default=MODEL_PATH, help='Path to the trained model')
|
|
parser.add_argument('-i', '--uncompressed_video_file', default=UNCOMPRESSED_VIDEO_FILE, help='Path to the uncompressed video file')
|
|
parser.add_argument('-d', '--display_output', action='store_true', default=DISPLAY_OUTPUT, help='Display real-time output to screen')
|
|
parser.add_argument('--keep_black_bars', action='store_false', help='Keep black bars from the video', default=True)
|
|
|
|
args = parser.parse_args()
|
|
|
|
COMPRESSED_VIDEO_FILE = args.compressed_video_file
|
|
MAX_FRAMES = args.max_frames
|
|
CRF = args.crf
|
|
SPEED = args.speed
|
|
MODEL_PATH = args.model_path
|
|
UNCOMPRESSED_VIDEO_FILE = args.uncompressed_video_file
|
|
DISPLAY_OUTPUT = args.display_output
|
|
|
|
if not args.keep_black_bars:
|
|
CROP_DIMENSIONS = find_crop_dimensions(UNCOMPRESSED_VIDEO_FILE)
|
|
|
|
def crop_black_bars(frame):
|
|
# Convert to grayscale for easier processing
|
|
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
|
|
|
# Threshold the image to make everything below a certain gray value black, and everything else white
|
|
_, thresh = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY)
|
|
|
|
# Find the contours of the white regions
|
|
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
|
|
# Find the bounding box that contains all the contours
|
|
x_min = y_min = float('inf')
|
|
x_max = y_max = 0
|
|
for contour in contours:
|
|
x, y, w, h = cv2.boundingRect(contour)
|
|
x_min = min(x_min, x)
|
|
y_min = min(y_min, y)
|
|
x_max = max(x_max, x + w)
|
|
y_max = max(y_max, y + h)
|
|
|
|
return x_min, y_min, x_max, y_max
|
|
|
|
def find_crop_dimensions(video_file):
|
|
cap = cv2.VideoCapture(video_file)
|
|
while True:
|
|
ret, frame = cap.read()
|
|
if not ret:
|
|
print("Error: Unable to find a non-black frame.")
|
|
cap.release()
|
|
exit()
|
|
|
|
# Check if the frame is entirely black
|
|
if np.any(frame > 0):
|
|
x_min, y_min, x_max, y_max = crop_black_bars(frame)
|
|
cap.release()
|
|
return x_min, y_min, x_max, y_max
|
|
|
|
|
|
def load_frame_from_video(video_file, frame_num):
|
|
cap = cv2.VideoCapture(video_file)
|
|
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
|
|
ret, frame = cap.read()
|
|
cap.release()
|
|
return frame if ret else None
|
|
|
|
|
|
def predict_frame(uncompressed_frame, model, crf, speed):
|
|
# Scale the CRF and Speed values
|
|
scaled_crf = scale_crf(crf)
|
|
scaled_speed = scale_speed_preset(PRESET_SPEED_CATEGORIES.index(speed))
|
|
|
|
# Preprocess the frame
|
|
frame = combine_batch(uncompressed_frame, resize=False)
|
|
|
|
# Predict using the model
|
|
inputs = {
|
|
'image': np.expand_dims(frame, axis=0),
|
|
'CRF': np.array([scaled_crf]),
|
|
'Speed': np.array([scaled_speed])
|
|
}
|
|
compressed_frame = model.predict(inputs)[0]
|
|
|
|
# Post-process the output frame
|
|
return np.clip(compressed_frame * 255.0, 0, 255).astype(np.uint8)
|
|
|
|
|
|
|
|
def main():
|
|
model = tf.keras.models.load_model(MODEL_PATH, custom_objects={'VideoCompressionModel': VideoCompressionModel, 'psnr': psnr, 'ssim': ssim, 'combined': combined, 'combined_loss': combined_loss, 'combined_loss_weighted_psnr': combined_loss_weighted_psnr})
|
|
cap = cv2.VideoCapture(UNCOMPRESSED_VIDEO_FILE)
|
|
|
|
if MAX_FRAMES > 0:
|
|
total_frames = min(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), MAX_FRAMES)
|
|
else:
|
|
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
|
|
height, width, fps = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FPS))
|
|
|
|
cap.release()
|
|
|
|
fourcc = cv2.VideoWriter_fourcc(*'XVID')
|
|
out = cv2.VideoWriter(COMPRESSED_VIDEO_FILE, fourcc, fps, (width, height), True)
|
|
|
|
if not out.isOpened():
|
|
print("Error: VideoWriter could not be opened.")
|
|
exit()
|
|
|
|
for i in range(total_frames):
|
|
uncompressed_frame = load_frame_from_video(UNCOMPRESSED_VIDEO_FILE, frame_num=i)
|
|
|
|
if CROP_DIMENSIONS:
|
|
x_min, y_min, x_max, y_max = CROP_DIMENSIONS
|
|
uncompressed_frame = uncompressed_frame[y_min:y_max, x_min:x_max]
|
|
|
|
compressed_frame = predict_frame(uncompressed_frame, model, CRF, SPEED)
|
|
compressed_frame = cv2.resize(compressed_frame, (width, height))
|
|
compressed_frame = cv2.cvtColor(compressed_frame, cv2.COLOR_RGB2BGR)
|
|
|
|
out.write(compressed_frame)
|
|
|
|
if DISPLAY_OUTPUT:
|
|
cv2.imshow('Compressed Video', compressed_frame)
|
|
if cv2.waitKey(1) & 0xFF == ord('q'):
|
|
break
|
|
|
|
out.release()
|
|
print("Compression completed.")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
clear_screen()
|
|
parse_arguments()
|
|
main()
|