sequenced based
This commit is contained in:
parent
80c5f2216d
commit
d0f0b21cb5
3 changed files with 150 additions and 61 deletions
|
@ -1,7 +1,7 @@
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import cv2
|
import cv2
|
||||||
from video_compression_model import VideoCompressionModel
|
from video_compression_model import NUM_FRAMES, PRESET_SPEED_CATEGORIES, VideoCompressionModel
|
||||||
|
|
||||||
# Constants
|
# Constants
|
||||||
NUM_CHANNELS = 3
|
NUM_CHANNELS = 3
|
||||||
|
@ -10,7 +10,7 @@ NUM_CHANNELS = 3
|
||||||
model = tf.keras.models.load_model('models/model.keras', custom_objects={'VideoCompressionModel': VideoCompressionModel})
|
model = tf.keras.models.load_model('models/model.keras', custom_objects={'VideoCompressionModel': VideoCompressionModel})
|
||||||
|
|
||||||
# Step 3: Load the uncompressed video
|
# Step 3: Load the uncompressed video
|
||||||
UNCOMPRESSED_VIDEO_FILE = 'test_data/test_video.mkv'
|
UNCOMPRESSED_VIDEO_FILE = 'test_data/training_video.mkv'
|
||||||
|
|
||||||
def load_frames_from_video(video_file, num_frames = 0):
|
def load_frames_from_video(video_file, num_frames = 0):
|
||||||
print("Extracting video frames...")
|
print("Extracting video frames...")
|
||||||
|
@ -32,19 +32,40 @@ def load_frames_from_video(video_file, num_frames = 0):
|
||||||
print("Extraction Complete")
|
print("Extraction Complete")
|
||||||
return frames
|
return frames
|
||||||
|
|
||||||
uncompressed_frames = load_frames_from_video(UNCOMPRESSED_VIDEO_FILE, 200)
|
uncompressed_frames = load_frames_from_video(UNCOMPRESSED_VIDEO_FILE, 100)
|
||||||
if len(uncompressed_frames) == 0 or None:
|
if not uncompressed_frames:
|
||||||
print("IO ERROR!")
|
print("IO ERROR!")
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
uncompressed_frames = np.array(uncompressed_frames) / 255.0
|
uncompressed_frames = np.array(uncompressed_frames) / 255.0
|
||||||
|
|
||||||
if len(uncompressed_frames) == 0 or None:
|
# Generate sequences of frames for prediction
|
||||||
print("np.array ERROR!")
|
uncompressed_frame_sequences = []
|
||||||
exit()
|
for i in range(len(uncompressed_frames) - NUM_FRAMES + 1):
|
||||||
|
sequence = uncompressed_frames[i:i+NUM_FRAMES]
|
||||||
|
uncompressed_frame_sequences.append(sequence)
|
||||||
|
uncompressed_frame_sequences = np.array(uncompressed_frame_sequences)
|
||||||
|
|
||||||
|
#for frame in uncompressed_frames:
|
||||||
|
# cv2.imshow('Frame', frame)
|
||||||
|
# cv2.waitKey(50) # Display each frame for 1 second
|
||||||
|
|
||||||
|
|
||||||
# Step 4: Compress the video frames using the loaded model
|
# Step 4: Compress the video frames using the loaded model
|
||||||
compressed_frames = model.predict(uncompressed_frames)
|
crf_values = np.full((len(uncompressed_frame_sequences), 1), 25, dtype=np.float32) # Added dtype argument
|
||||||
|
|
||||||
|
preset_speed_index = PRESET_SPEED_CATEGORIES.index("fast")
|
||||||
|
preset_speed_values = np.full((len(uncompressed_frame_sequences), 1), preset_speed_index, dtype=np.float32) # Added dtype argument
|
||||||
|
|
||||||
|
compressed_frame_sequences = model.predict({"frames": uncompressed_frame_sequences, "crf": crf_values, "preset_speed": preset_speed_values})
|
||||||
|
|
||||||
|
# We'll use the last frame of each sequence as the compressed frame
|
||||||
|
#compressed_frames = compressed_frame_sequences[:, -1]
|
||||||
|
|
||||||
|
#for frame in compressed_frame_sequences:
|
||||||
|
# cv2.imshow('Compressed Frame', frame)
|
||||||
|
# cv2.waitKey(50)
|
||||||
|
|
||||||
|
|
||||||
# Step 5: Save the compressed video frames
|
# Step 5: Save the compressed video frames
|
||||||
COMPRESSED_VIDEO_FILE = 'compressed_video.mkv'
|
COMPRESSED_VIDEO_FILE = 'compressed_video.mkv'
|
||||||
|
@ -60,5 +81,5 @@ def save_frames_as_video(frames, video_file):
|
||||||
out.write(frame)
|
out.write(frame)
|
||||||
out.release()
|
out.release()
|
||||||
|
|
||||||
save_frames_as_video(compressed_frames, COMPRESSED_VIDEO_FILE)
|
save_frames_as_video(compressed_frame_sequences, COMPRESSED_VIDEO_FILE)
|
||||||
print("Compression completed.")
|
print("Compression completed.")
|
||||||
|
|
132
train_model.py
132
train_model.py
|
@ -1,81 +1,123 @@
|
||||||
import os
|
import os
|
||||||
|
import json
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import cv2
|
import cv2
|
||||||
from video_compression_model import VideoCompressionModel
|
from video_compression_model import NUM_FRAMES, VideoCompressionModel, PRESET_SPEED_CATEGORIES
|
||||||
|
|
||||||
# Constants
|
# Constants
|
||||||
NUM_CHANNELS = 3 # Number of color channels in the video frames (RGB images have 3 channels)
|
NUM_CHANNELS = 3 # Number of color channels in the video frames (RGB images have 3 channels)
|
||||||
BATCH_SIZE = 32 # Batch size used during training
|
BATCH_SIZE = 16 # Batch size used during training
|
||||||
EPOCHS = 20 # Number of training epochs
|
EPOCHS = 1 # Number of training epochs
|
||||||
|
TRAIN_SAMPLES = 1 # number of frames to extract
|
||||||
|
|
||||||
# Step 1: Data Preparation
|
# Step 1: Data Preparation
|
||||||
TRAIN_VIDEO_FILE = 'test_data/native_video.mkv' # The training video file name
|
|
||||||
VAL_VIDEO_FILE = 'test_data/training_video.mkv' # The validation video file name
|
|
||||||
TRAIN_SAMPLES = 2 # Number of video frames used for training
|
|
||||||
VAL_SAMPLES = 2 # Number of video frames used for validation
|
|
||||||
|
|
||||||
|
def load_list(list_path):
|
||||||
|
with open(list_path, "r") as json_file:
|
||||||
|
video_details_list = json.load(json_file)
|
||||||
|
return video_details_list
|
||||||
|
|
||||||
|
# Update load_frames_from_video function to resize frames
|
||||||
def load_frames_from_video(video_file, num_frames):
|
def load_frames_from_video(video_file, num_frames):
|
||||||
print("Extracting video frames...")
|
print("Extracting video frames...")
|
||||||
cap = cv2.VideoCapture(video_file)
|
cap = cv2.VideoCapture(video_file)
|
||||||
frames = []
|
frames = []
|
||||||
count = 0
|
count = 0
|
||||||
frame_width, frame_height = None, None # Initialize the frame dimensions
|
|
||||||
while True:
|
while True:
|
||||||
ret, frame = cap.read()
|
ret, frame = cap.read()
|
||||||
if not ret:
|
if not ret:
|
||||||
break
|
break
|
||||||
if frame_width is None or frame_height is None:
|
|
||||||
frame_height, frame_width = frame.shape[:2] # Get the frame dimensions from the first frame
|
|
||||||
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
||||||
|
#frame = cv2.resize(frame, (target_width, target_height))
|
||||||
frames.append(frame)
|
frames.append(frame)
|
||||||
count += 1
|
count += 1
|
||||||
if count >= num_frames:
|
if count >= num_frames:
|
||||||
break
|
break
|
||||||
cap.release()
|
cap.release()
|
||||||
return frames, frame_width, frame_height # Return frames and frame dimensions
|
width, height = frame.shape[:2]
|
||||||
|
return frames, width, height
|
||||||
train_frames, FRAME_WIDTH, FRAME_HEIGHT = load_frames_from_video(TRAIN_VIDEO_FILE, num_frames=TRAIN_SAMPLES)
|
|
||||||
val_frames, _, _ = load_frames_from_video(VAL_VIDEO_FILE, num_frames=VAL_SAMPLES)
|
|
||||||
|
|
||||||
|
|
||||||
print("Number of training frames:", len(train_frames))
|
|
||||||
print("Number of validation frames:", len(val_frames))
|
|
||||||
|
|
||||||
def preprocess(frames):
|
def preprocess(frames):
|
||||||
frames = np.array(frames) / 255.0
|
return np.array(frames) / 255.0
|
||||||
return frames
|
|
||||||
|
|
||||||
train_frames = preprocess(train_frames)
|
def save_model(model, file):
|
||||||
val_frames = preprocess(val_frames)
|
os.makedirs("models", exist_ok=True)
|
||||||
|
model.save(os.path.join("models/", file))
|
||||||
|
print("Model saved successfully!")
|
||||||
|
|
||||||
print("training frames:", len(train_frames))
|
# Update load_video_from_list function to provide target_width and target_height
|
||||||
print("validation frames:", len(val_frames))
|
def load_video_from_list(list_path):
|
||||||
|
details_list = load_list(list_path)
|
||||||
|
all_frames = []
|
||||||
|
all_details = []
|
||||||
|
for video_details in details_list:
|
||||||
|
VIDEO_FILE = video_details["video_file"]
|
||||||
|
CRF = video_details['crf'] / 63.0
|
||||||
|
PRESET_SPEED = PRESET_SPEED_CATEGORIES.index(video_details['preset_speed'])
|
||||||
|
video_details['preset_speed'] = PRESET_SPEED
|
||||||
|
|
||||||
# Step 2: Model Architecture
|
# Update load_frames_from_video calls with target_width and target_height
|
||||||
model = VideoCompressionModel()
|
#train_frames, w, h = load_frames_from_video(os.path.join("test_data/", VIDEO_FILE), TRAIN_SAMPLES, target_width, target_height)
|
||||||
|
train_frames, w, h = load_frames_from_video(os.path.join("test_data/", VIDEO_FILE), NUM_FRAMES * TRAIN_SAMPLES)
|
||||||
|
all_frames.extend(train_frames)
|
||||||
|
all_details.append({
|
||||||
|
"frames": train_frames,
|
||||||
|
"width": w,
|
||||||
|
"height": h,
|
||||||
|
"crf": CRF,
|
||||||
|
"preset_speed": PRESET_SPEED,
|
||||||
|
"video_file": VIDEO_FILE
|
||||||
|
})
|
||||||
|
return all_details
|
||||||
|
|
||||||
model.compile(loss='mean_squared_error', optimizer='adam', run_eagerly=True)
|
def generate_frame_sequences(frames):
|
||||||
|
# Generate sequences of frames for the model
|
||||||
|
sequences = []
|
||||||
|
labels = []
|
||||||
|
for i in range(len(frames) - NUM_FRAMES + 1):
|
||||||
|
sequence = frames[i:i+NUM_FRAMES]
|
||||||
|
sequences.append(sequence)
|
||||||
|
# Use the last frame of the sequence as the label
|
||||||
|
labels.append(sequence[-1])
|
||||||
|
return np.array(sequences), np.array(labels)
|
||||||
|
|
||||||
# Adjusting the input shape for training and validation
|
|
||||||
frame_height, frame_width = train_frames[0].shape[:2]
|
|
||||||
|
|
||||||
# Use the resized frames as target data
|
def main():
|
||||||
train_targets = train_frames
|
#target_width = 640 # Choose a fixed width for the frames
|
||||||
val_targets = val_frames
|
#target_height = 360 # Choose a fixed height for the frames
|
||||||
|
|
||||||
# Create the "models" directory if it doesn't exist
|
all_video_details = load_video_from_list("test_data/training.json")
|
||||||
os.makedirs("models", exist_ok=True)
|
|
||||||
|
|
||||||
print("\nTraining the model...")
|
model = VideoCompressionModel(NUM_CHANNELS, NUM_FRAMES)
|
||||||
model.fit(
|
model.compile(loss='mean_squared_error', optimizer='adam')
|
||||||
train_frames, [train_targets, tf.zeros_like(train_targets)],
|
|
||||||
batch_size=BATCH_SIZE,
|
|
||||||
epochs=EPOCHS,
|
|
||||||
validation_data=(val_frames, [val_targets, tf.zeros_like(val_targets)])
|
|
||||||
)
|
|
||||||
print("\nTraining completed.")
|
|
||||||
|
|
||||||
# Step 3: Save the trained model
|
for video_details in all_video_details:
|
||||||
model.save('models/model.keras')
|
train_frames = video_details["frames"]
|
||||||
print("Model saved successfully!")
|
val_frames = train_frames.copy() # For simplicity, using the same frames for validation
|
||||||
|
|
||||||
|
train_frames = preprocess(train_frames)
|
||||||
|
val_frames = preprocess(val_frames)
|
||||||
|
|
||||||
|
train_sequences, train_labels = generate_frame_sequences(train_frames)
|
||||||
|
val_sequences, val_labels = generate_frame_sequences(val_frames)
|
||||||
|
|
||||||
|
num_sequences = len(train_sequences)
|
||||||
|
crf_array = np.full((num_sequences, 1), video_details['crf'])
|
||||||
|
preset_speed_array = np.full((num_sequences, 1), video_details['preset_speed'])
|
||||||
|
|
||||||
|
print("\nTraining the model for video:", video_details["video_file"])
|
||||||
|
model.fit(
|
||||||
|
{"frames": train_sequences, "crf": crf_array, "preset_speed": preset_speed_array},
|
||||||
|
train_labels, # Use train_labels as the ground truth
|
||||||
|
batch_size=BATCH_SIZE,
|
||||||
|
epochs=EPOCHS,
|
||||||
|
validation_data=({"frames": val_sequences, "crf": crf_array, "preset_speed": preset_speed_array},
|
||||||
|
val_labels) # Use val_labels as the ground truth for validation
|
||||||
|
)
|
||||||
|
print("\nTraining completed for video:", video_details["video_file"])
|
||||||
|
|
||||||
|
save_model(model, 'model.keras')
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
|
@ -1,27 +1,53 @@
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
|
PRESET_SPEED_CATEGORIES = ["ultrafast", "superfast", "veryfast", "faster", "fast", "medium", "slow", "slower", "veryslow"]
|
||||||
|
NUM_PRESET_SPEEDS = len(PRESET_SPEED_CATEGORIES)
|
||||||
|
NUM_FRAMES = 5 # Number of consecutive frames in a sequence
|
||||||
|
|
||||||
class VideoCompressionModel(tf.keras.Model):
|
class VideoCompressionModel(tf.keras.Model):
|
||||||
def __init__(self, NUM_CHANNELS=3):
|
def __init__(self, NUM_CHANNELS=3, NUM_FRAMES=5):
|
||||||
super(VideoCompressionModel, self).__init__()
|
super(VideoCompressionModel, self).__init__()
|
||||||
|
|
||||||
|
self.NUM_CHANNELS = NUM_CHANNELS
|
||||||
|
self.NUM_FRAMES = NUM_FRAMES
|
||||||
|
|
||||||
|
# Embedding layer for preset_speed
|
||||||
|
self.preset_embedding = tf.keras.layers.Embedding(NUM_PRESET_SPEEDS, 16)
|
||||||
|
|
||||||
# Encoder layers
|
# Encoder layers
|
||||||
self.encoder = tf.keras.Sequential([
|
self.encoder = tf.keras.Sequential([
|
||||||
tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(None, None, NUM_CHANNELS)),
|
tf.keras.layers.Conv3D(32, (3, 3, 3), activation='relu', padding='same', input_shape=(None, None, None, NUM_CHANNELS + 1 + 16)), # Notice the adjusted channel number
|
||||||
|
tf.keras.layers.MaxPooling3D((2, 2, 2)),
|
||||||
# Add more encoder layers as needed
|
# Add more encoder layers as needed
|
||||||
])
|
])
|
||||||
|
|
||||||
# Decoder layers
|
# Decoder layers
|
||||||
self.decoder = tf.keras.Sequential([
|
self.decoder = tf.keras.Sequential([
|
||||||
tf.keras.layers.Conv2DTranspose(32, (3, 3), activation='relu', padding='same'),
|
tf.keras.layers.Conv3DTranspose(32, (3, 3, 3), activation='relu', padding='same'),
|
||||||
|
tf.keras.layers.UpSampling3D((2, 2, 2)),
|
||||||
# Add more decoder layers as needed
|
# Add more decoder layers as needed
|
||||||
tf.keras.layers.Conv2D(NUM_CHANNELS, (3, 3), activation='sigmoid', padding='same') # Output layer for video frames
|
tf.keras.layers.Conv3D(NUM_CHANNELS, (3, 3, 3), activation='sigmoid', padding='same') # Output layer for video frames
|
||||||
])
|
])
|
||||||
|
|
||||||
def call(self, inputs):
|
def call(self, inputs):
|
||||||
|
frames = inputs["frames"]
|
||||||
|
crf = tf.expand_dims(inputs["crf"], -1)
|
||||||
|
preset_speed = inputs["preset_speed"]
|
||||||
|
|
||||||
|
# Convert preset_speed to embeddings
|
||||||
|
preset_embedding = self.preset_embedding(preset_speed)
|
||||||
|
preset_embedding = tf.keras.layers.Flatten()(preset_embedding)
|
||||||
|
|
||||||
|
# Concatenate crf and preset_embedding to frames
|
||||||
|
frames_shape = tf.shape(frames)
|
||||||
|
repeated_crf = tf.tile(tf.reshape(crf, (-1, 1, 1, 1, 1)), [1, frames_shape[1], frames_shape[2], frames_shape[3], 1])
|
||||||
|
repeated_preset = tf.tile(tf.reshape(preset_embedding, (-1, 1, 1, 1, 16)), [1, frames_shape[1], frames_shape[2], frames_shape[3], 1])
|
||||||
|
|
||||||
|
frames = tf.concat([frames, repeated_crf, repeated_preset], axis=-1)
|
||||||
|
|
||||||
# Encoding the video frames
|
# Encoding the video frames
|
||||||
compressed_representation = self.encoder(inputs)
|
compressed_representation = self.encoder(frames)
|
||||||
|
|
||||||
# Decoding to generate compressed video frames
|
# Decoding to generate compressed video frames
|
||||||
reconstructed_frames = self.decoder(compressed_representation)
|
reconstructed_frames = self.decoder(compressed_representation)
|
||||||
|
return reconstructed_frames[:,-1,:,:,:]
|
||||||
return reconstructed_frames
|
|
||||||
|
|
Reference in a new issue