test
This commit is contained in:
parent
8c5001166d
commit
5085c87300
3 changed files with 96 additions and 173 deletions
|
@ -1,91 +1,68 @@
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import cv2
|
import cv2
|
||||||
from video_compression_model import NUM_FRAMES, PRESET_SPEED_CATEGORIES, VideoCompressionModel
|
from video_compression_model import VideoCompressionModel
|
||||||
|
|
||||||
# Constants
|
# Constants
|
||||||
MAX_FRAMES = 24
|
|
||||||
CHUNK_SIZE = 24 # Adjust based on available memory and video resolution
|
CHUNK_SIZE = 24 # Adjust based on available memory and video resolution
|
||||||
COMPRESSED_VIDEO_FILE = 'compressed_video.mkv'
|
COMPRESSED_VIDEO_FILE = 'compressed_video.mp4'
|
||||||
|
MAX_FRAMES = 24 # Limit the number of frames processed
|
||||||
|
|
||||||
|
# Load the trained model
|
||||||
|
model = tf.keras.models.load_model('models/model.keras', custom_objects={'VideoCompressionModel': VideoCompressionModel})
|
||||||
|
|
||||||
# Step 2: Load the trained model
|
# Load the uncompressed video
|
||||||
model = tf.keras.models.load_model('models/model_differencing.keras', custom_objects={'VideoCompressionModel': VideoCompressionModel})
|
|
||||||
|
|
||||||
# Step 3: Load the uncompressed video
|
|
||||||
UNCOMPRESSED_VIDEO_FILE = 'test_data/training_video.mkv'
|
UNCOMPRESSED_VIDEO_FILE = 'test_data/training_video.mkv'
|
||||||
|
|
||||||
def load_frames_from_video(video_file, start_frame=0, num_frames=CHUNK_SIZE):
|
def load_frame_from_video(video_file, frame_num):
|
||||||
cap = cv2.VideoCapture(video_file)
|
cap = cv2.VideoCapture(video_file)
|
||||||
frames = []
|
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
|
||||||
cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
|
|
||||||
|
|
||||||
for _ in range(num_frames):
|
|
||||||
ret, frame = cap.read()
|
ret, frame = cap.read()
|
||||||
if not ret:
|
if not ret:
|
||||||
break
|
return None
|
||||||
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0 # Normalize and convert to float32
|
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0 # Normalize and convert to float32
|
||||||
frames.append(frame)
|
|
||||||
cap.release()
|
cap.release()
|
||||||
return frames
|
|
||||||
|
|
||||||
def predict_in_chunks(uncompressed_frames, model, crf_values, preset_speed_values):
|
#display_frame = np.clip(frame * 255.0, 0, 255).astype(np.uint8)
|
||||||
num_sequences = len(uncompressed_frames) - NUM_FRAMES + 1
|
#cv2.imshow("uncomp", display_frame)
|
||||||
compressed_frames = []
|
#cv2.waitKey(0) # Add this line to hold the display window until a key is pressed
|
||||||
|
|
||||||
#for frame in uncompressed_frames:
|
|
||||||
# cv2.imshow("frame", frame)
|
|
||||||
# cv2.waitKey(50)
|
|
||||||
|
|
||||||
for start in range(0, num_sequences, CHUNK_SIZE):
|
return frame
|
||||||
end = min(start + CHUNK_SIZE, num_sequences)
|
|
||||||
frame_chunk = uncompressed_frames[start:end + NUM_FRAMES - 1]
|
|
||||||
crf_chunk = crf_values[start:end]
|
|
||||||
speed_chunk = preset_speed_values[start:end]
|
|
||||||
|
|
||||||
frame_sequences = []
|
def predict_frame(uncompressed_frame, model, crf_value, preset_speed_value):
|
||||||
for i in range(len(frame_chunk) - NUM_FRAMES + 1):
|
crf_array = np.array([crf_value])
|
||||||
sequence = frame_chunk[i:i + NUM_FRAMES]
|
preset_speed_array = np.array([preset_speed_value])
|
||||||
frame_sequences.append(sequence)
|
|
||||||
|
|
||||||
frame_sequences = np.array(frame_sequences)
|
|
||||||
|
|
||||||
compressed_chunk = model.predict({"frames": frame_sequences, "crf": crf_chunk, "preset_speed": speed_chunk})
|
|
||||||
compressed_frames.extend(compressed_chunk)
|
|
||||||
|
|
||||||
return compressed_frames
|
|
||||||
|
|
||||||
def save_frames_chunk(frames, video_writer):
|
|
||||||
for frame in frames:
|
|
||||||
frame = np.clip(frame * 255.0, 0, 255).astype(np.uint8)
|
|
||||||
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
|
|
||||||
video_writer.write(frame)
|
|
||||||
|
|
||||||
|
compressed_frame = model.predict({
|
||||||
|
"frame": np.array([uncompressed_frame]),
|
||||||
|
"crf": crf_array,
|
||||||
|
"preset_speed": preset_speed_array
|
||||||
|
})
|
||||||
|
return compressed_frame[0]
|
||||||
|
|
||||||
cap = cv2.VideoCapture(UNCOMPRESSED_VIDEO_FILE)
|
cap = cv2.VideoCapture(UNCOMPRESSED_VIDEO_FILE)
|
||||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||||
|
cap.release()
|
||||||
|
|
||||||
if MAX_FRAMES != 0 and total_frames > MAX_FRAMES:
|
if MAX_FRAMES != 0 and total_frames > MAX_FRAMES:
|
||||||
total_frames = MAX_FRAMES
|
total_frames = MAX_FRAMES
|
||||||
|
|
||||||
cap.release()
|
crf_value = 25.0 # Example CRF value
|
||||||
|
preset_speed_value = 2 # Index for "fast" in our defined list
|
||||||
|
|
||||||
crf_values = np.full((CHUNK_SIZE + NUM_FRAMES - 1, 1), 25, dtype=np.float32) # Chunk size + look-ahead frames
|
height, width = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||||
preset_speed_index = PRESET_SPEED_CATEGORIES.index("fast")
|
fourcc = cv2.VideoWriter_fourcc(*'H264')
|
||||||
preset_speed_values = np.full((CHUNK_SIZE + NUM_FRAMES - 1, 1), preset_speed_index, dtype=np.float32)
|
out = cv2.VideoWriter(COMPRESSED_VIDEO_FILE, fourcc, 24.0, (width, height))
|
||||||
|
|
||||||
out = None # Video writer instance
|
for i in range(total_frames):
|
||||||
for i in range(0, total_frames, CHUNK_SIZE):
|
uncompressed_frame = load_frame_from_video(UNCOMPRESSED_VIDEO_FILE, frame_num=i)
|
||||||
uncompressed_frames_chunk = load_frames_from_video(UNCOMPRESSED_VIDEO_FILE, start_frame=i)
|
compressed_frame = predict_frame(uncompressed_frame, model, crf_value, preset_speed_value)
|
||||||
compressed_frames_chunk = predict_in_chunks(uncompressed_frames_chunk, model, crf_values, preset_speed_values)
|
|
||||||
|
|
||||||
# Initialize video writer if it's the first chunk
|
compressed_frame = np.clip(compressed_frame * 255.0, 0, 255).astype(np.uint8)
|
||||||
if out is None:
|
compressed_frame = cv2.cvtColor(compressed_frame, cv2.COLOR_RGB2BGR)
|
||||||
height, width = compressed_frames_chunk[0].shape[:2]
|
out.write(compressed_frame)
|
||||||
fourcc = cv2.VideoWriter_fourcc(*'XVID')
|
cv2.imshow("output", compressed_frame)
|
||||||
out = cv2.VideoWriter(COMPRESSED_VIDEO_FILE, fourcc, 24.0, (width, height))
|
|
||||||
|
|
||||||
save_frames_chunk(compressed_frames_chunk, out)
|
|
||||||
|
|
||||||
out.release()
|
out.release()
|
||||||
print("Compression completed.")
|
print("Compression completed.")
|
||||||
|
|
115
train_model.py
115
train_model.py
|
@ -1,16 +1,16 @@
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
import tensorflow as tf
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import cv2
|
import cv2
|
||||||
from video_compression_model import NUM_CHANNELS, NUM_FRAMES, VideoCompressionModel, PRESET_SPEED_CATEGORIES
|
import tensorflow as tf
|
||||||
|
from video_compression_model import NUM_CHANNELS, VideoCompressionModel, PRESET_SPEED_CATEGORIES
|
||||||
from tensorflow.keras.callbacks import EarlyStopping
|
from tensorflow.keras.callbacks import EarlyStopping
|
||||||
|
|
||||||
print(tf.config.list_physical_devices('GPU'))
|
print(tf.config.list_physical_devices('GPU'))
|
||||||
|
|
||||||
# Constants
|
# Constants
|
||||||
BATCH_SIZE = 8
|
BATCH_SIZE = 8
|
||||||
EPOCHS = 5
|
EPOCHS = 50
|
||||||
TRAIN_SAMPLES = 5
|
TRAIN_SAMPLES = 5
|
||||||
|
|
||||||
def load_list(list_path):
|
def load_list(list_path):
|
||||||
|
@ -18,26 +18,18 @@ def load_list(list_path):
|
||||||
video_details_list = json.load(json_file)
|
video_details_list = json.load(json_file)
|
||||||
return video_details_list
|
return video_details_list
|
||||||
|
|
||||||
def load_frames_from_video(video_file, num_frames):
|
def load_frame_from_video(video_file):
|
||||||
print("Extracting video frames...")
|
print("Extracting video frame...")
|
||||||
cap = cv2.VideoCapture(video_file)
|
cap = cv2.VideoCapture(video_file)
|
||||||
frames = []
|
|
||||||
count = 0
|
|
||||||
while True:
|
|
||||||
ret, frame = cap.read()
|
ret, frame = cap.read()
|
||||||
if not ret:
|
if not ret:
|
||||||
break
|
return None
|
||||||
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
||||||
frames.append(frame)
|
|
||||||
count += 1
|
|
||||||
if count >= num_frames:
|
|
||||||
break
|
|
||||||
cap.release()
|
cap.release()
|
||||||
width, height = frame.shape[:2]
|
return frame
|
||||||
return frames, width, height
|
|
||||||
|
|
||||||
def preprocess(frames):
|
def preprocess(frame):
|
||||||
return np.array(frames) / 255.0
|
return frame / 255.0
|
||||||
|
|
||||||
def save_model(model, file):
|
def save_model(model, file):
|
||||||
os.makedirs("models", exist_ok=True)
|
os.makedirs("models", exist_ok=True)
|
||||||
|
@ -54,109 +46,62 @@ def load_video_from_list(list_path):
|
||||||
PRESET_SPEED = PRESET_SPEED_CATEGORIES.index(video_details['preset_speed'])
|
PRESET_SPEED = PRESET_SPEED_CATEGORIES.index(video_details['preset_speed'])
|
||||||
video_details['preset_speed'] = PRESET_SPEED
|
video_details['preset_speed'] = PRESET_SPEED
|
||||||
|
|
||||||
train_frames, w, h = load_frames_from_video(os.path.join("test_data/", VIDEO_FILE), NUM_FRAMES * TRAIN_SAMPLES)
|
frame = load_frame_from_video(os.path.join("test_data/", VIDEO_FILE))
|
||||||
|
|
||||||
all_frames.extend(train_frames)
|
if frame is not None:
|
||||||
|
all_frames.append(preprocess(frame))
|
||||||
all_details.append({
|
all_details.append({
|
||||||
"frames": train_frames,
|
"frame": frame,
|
||||||
"width": w,
|
|
||||||
"height": h,
|
|
||||||
"crf": CRF,
|
"crf": CRF,
|
||||||
"preset_speed": PRESET_SPEED,
|
"preset_speed": PRESET_SPEED,
|
||||||
"video_file": VIDEO_FILE
|
"video_file": VIDEO_FILE
|
||||||
})
|
})
|
||||||
return all_details
|
return all_details
|
||||||
|
|
||||||
def generate_frame_sequences(frames):
|
|
||||||
sequences = []
|
|
||||||
labels = []
|
|
||||||
for i in range(len(frames) - NUM_FRAMES + 1):
|
|
||||||
sequence = frames[i:i+NUM_FRAMES-1]
|
|
||||||
sequences.append(sequence)
|
|
||||||
labels.append(sequence[-1])
|
|
||||||
return np.array(sequences), np.array(labels)
|
|
||||||
|
|
||||||
def frame_difference(frames):
|
|
||||||
differences = []
|
|
||||||
for i in range(1, len(frames)):
|
|
||||||
differences.append(cv2.absdiff(frames[i], frames[i-1]))
|
|
||||||
return differences
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
all_video_details_train = load_video_from_list("test_data/training.json")
|
all_video_details_train = load_video_from_list("test_data/training.json")
|
||||||
all_video_details_val = load_video_from_list("test_data/validation.json")
|
all_video_details_val = load_video_from_list("test_data/validation.json")
|
||||||
|
|
||||||
model = VideoCompressionModel(NUM_CHANNELS, NUM_FRAMES)
|
model = VideoCompressionModel(NUM_CHANNELS)
|
||||||
model.compile(loss='mean_squared_error', optimizer='adam')
|
model.compile(loss='mean_squared_error', optimizer='adam')
|
||||||
early_stop = EarlyStopping(monitor='val_loss', patience=3, verbose=1, restore_best_weights=True)
|
early_stop = EarlyStopping(monitor='val_loss', patience=3, verbose=1, restore_best_weights=True)
|
||||||
|
|
||||||
# Load and concatenate all sequences and labels
|
# Prepare data
|
||||||
all_train_sequences = []
|
all_train_frames = []
|
||||||
all_val_sequences = []
|
all_val_frames = []
|
||||||
all_train_labels = []
|
|
||||||
all_val_labels = []
|
|
||||||
all_crf_train = []
|
all_crf_train = []
|
||||||
all_crf_val = []
|
all_crf_val = []
|
||||||
all_preset_speed_train = []
|
all_preset_speed_train = []
|
||||||
all_preset_speed_val = []
|
all_preset_speed_val = []
|
||||||
|
|
||||||
for video_details_train, video_details_val in zip(all_video_details_train, all_video_details_val):
|
for video_details_train, video_details_val in zip(all_video_details_train, all_video_details_val):
|
||||||
train_frames = video_details_train["frames"]
|
all_train_frames.append(video_details_train["frame"])
|
||||||
val_frames = video_details_val["frames"]
|
all_val_frames.append(video_details_val["frame"])
|
||||||
|
all_crf_train.append(video_details_train['crf'])
|
||||||
train_differences = frame_difference(preprocess(train_frames))
|
all_crf_val.append(video_details_val['crf'])
|
||||||
val_differences = frame_difference(preprocess(val_frames))
|
all_preset_speed_train.append(video_details_train['preset_speed'])
|
||||||
|
all_preset_speed_val.append(video_details_val['preset_speed'])
|
||||||
#print(len(train_differences), train_differences[0].shape)
|
|
||||||
|
|
||||||
train_sequences, train_labels = generate_frame_sequences(train_differences)
|
|
||||||
val_sequences, val_labels = generate_frame_sequences(val_differences)
|
|
||||||
|
|
||||||
crf_array_train = np.full((len(train_sequences), 1), video_details_train['crf'])
|
|
||||||
crf_array_val = np.full((len(val_sequences), 1), video_details_val['crf'])
|
|
||||||
preset_speed_array_train = np.full((len(train_sequences), 1), video_details_train['preset_speed'])
|
|
||||||
preset_speed_array_val = np.full((len(val_sequences), 1), video_details_val['preset_speed'])
|
|
||||||
|
|
||||||
all_train_sequences.extend(train_sequences)
|
|
||||||
all_val_sequences.extend(val_sequences)
|
|
||||||
all_train_labels.extend(train_labels)
|
|
||||||
all_val_labels.extend(val_labels)
|
|
||||||
all_crf_train.extend(crf_array_train)
|
|
||||||
all_crf_val.extend(crf_array_val)
|
|
||||||
all_preset_speed_train.extend(preset_speed_array_train)
|
|
||||||
all_preset_speed_val.extend(preset_speed_array_val)
|
|
||||||
|
|
||||||
# Convert lists to numpy arrays
|
# Convert lists to numpy arrays
|
||||||
all_train_sequences = np.array(all_train_sequences)
|
all_train_frames = np.array(all_train_frames)
|
||||||
all_val_sequences = np.array(all_val_sequences)
|
all_val_frames = np.array(all_val_frames)
|
||||||
all_train_labels = np.array(all_train_labels)
|
|
||||||
all_val_labels = np.array(all_val_labels)
|
|
||||||
all_crf_train = np.array(all_crf_train)
|
all_crf_train = np.array(all_crf_train)
|
||||||
all_crf_val = np.array(all_crf_val)
|
all_crf_val = np.array(all_crf_val)
|
||||||
all_preset_speed_train = np.array(all_preset_speed_train)
|
all_preset_speed_train = np.array(all_preset_speed_train)
|
||||||
all_preset_speed_val = np.array(all_preset_speed_val)
|
all_preset_speed_val = np.array(all_preset_speed_val)
|
||||||
|
|
||||||
# Shuffle the training data
|
print("\nTraining the model on frame pairs...")
|
||||||
indices_train = np.arange(all_train_sequences.shape[0])
|
|
||||||
np.random.shuffle(indices_train)
|
|
||||||
|
|
||||||
all_train_sequences = all_train_sequences[indices_train]
|
|
||||||
all_train_labels = all_train_labels[indices_train]
|
|
||||||
all_crf_train = all_crf_train[indices_train]
|
|
||||||
all_preset_speed_train = all_preset_speed_train[indices_train]
|
|
||||||
|
|
||||||
print("\nTraining the model on mixed sequences...")
|
|
||||||
model.fit(
|
model.fit(
|
||||||
{"frames": all_train_sequences, "crf": all_crf_train, "preset_speed": all_preset_speed_train},
|
{"frame": all_train_frames, "crf": all_crf_train, "preset_speed": all_preset_speed_train},
|
||||||
all_train_labels,
|
all_val_frames, # Target is the compressed frame
|
||||||
batch_size=BATCH_SIZE,
|
batch_size=BATCH_SIZE,
|
||||||
epochs=EPOCHS,
|
epochs=EPOCHS,
|
||||||
validation_data=({"frames": all_val_sequences, "crf": all_crf_val, "preset_speed": all_preset_speed_val}, all_val_labels),
|
validation_data=({"frame": all_val_frames, "crf": all_crf_val, "preset_speed": all_preset_speed_val}, all_val_frames),
|
||||||
callbacks=[early_stop]
|
callbacks=[early_stop]
|
||||||
)
|
)
|
||||||
print("\nTraining completed!")
|
print("\nTraining completed!")
|
||||||
|
|
||||||
save_model(model, 'model_differencing.keras')
|
save_model(model, 'model.keras')
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
|
@ -2,7 +2,6 @@ import tensorflow as tf
|
||||||
|
|
||||||
PRESET_SPEED_CATEGORIES = ["ultrafast", "superfast", "veryfast", "faster", "fast", "medium", "slow", "slower", "veryslow"]
|
PRESET_SPEED_CATEGORIES = ["ultrafast", "superfast", "veryfast", "faster", "fast", "medium", "slow", "slower", "veryslow"]
|
||||||
NUM_PRESET_SPEEDS = len(PRESET_SPEED_CATEGORIES)
|
NUM_PRESET_SPEEDS = len(PRESET_SPEED_CATEGORIES)
|
||||||
NUM_FRAMES = 5 # Number of consecutive frames in a sequence
|
|
||||||
NUM_CHANNELS = 3 # Number of color channels in the video frames (RGB images have 3 channels)
|
NUM_CHANNELS = 3 # Number of color channels in the video frames (RGB images have 3 channels)
|
||||||
|
|
||||||
#policy = tf.keras.mixed_precision.Policy('mixed_float16')
|
#policy = tf.keras.mixed_precision.Policy('mixed_float16')
|
||||||
|
@ -13,7 +12,6 @@ class VideoCompressionModel(tf.keras.Model):
|
||||||
super(VideoCompressionModel, self).__init__()
|
super(VideoCompressionModel, self).__init__()
|
||||||
|
|
||||||
self.NUM_CHANNELS = NUM_CHANNELS
|
self.NUM_CHANNELS = NUM_CHANNELS
|
||||||
self.NUM_FRAMES = NUM_FRAMES
|
|
||||||
|
|
||||||
# Regularization
|
# Regularization
|
||||||
self.regularizer = tf.keras.regularizers.l2(regularization_factor)
|
self.regularizer = tf.keras.regularizers.l2(regularization_factor)
|
||||||
|
@ -23,21 +21,24 @@ class VideoCompressionModel(tf.keras.Model):
|
||||||
|
|
||||||
# Encoder layers
|
# Encoder layers
|
||||||
self.encoder = tf.keras.Sequential([
|
self.encoder = tf.keras.Sequential([
|
||||||
tf.keras.layers.Conv3D(32, (3, 3, 3), activation='relu', padding='same', input_shape=(None, None, None, NUM_CHANNELS + 1 + 16), kernel_regularizer=self.regularizer),
|
tf.keras.layers.ZeroPadding2D(padding=((1, 1), (1, 1))), # Padding to preserve spatial dimensions
|
||||||
tf.keras.layers.MaxPooling3D((2, 2, 2)),
|
tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same', kernel_regularizer=self.regularizer),
|
||||||
|
tf.keras.layers.MaxPooling2D((2, 2)),
|
||||||
# Add more encoder layers as needed
|
# Add more encoder layers as needed
|
||||||
])
|
])
|
||||||
|
|
||||||
# Decoder layers
|
# Decoder layers
|
||||||
self.decoder = tf.keras.Sequential([
|
self.decoder = tf.keras.Sequential([
|
||||||
tf.keras.layers.Conv3DTranspose(32, (3, 3, 3), activation='relu', padding='same', kernel_regularizer=self.regularizer),
|
tf.keras.layers.Conv2DTranspose(32, (3, 3), activation='relu', padding='same', kernel_regularizer=self.regularizer),
|
||||||
tf.keras.layers.UpSampling3D((2, 2, 2)),
|
tf.keras.layers.UpSampling2D((2, 2)),
|
||||||
# Add more decoder layers as needed
|
# Add more decoder layers as needed
|
||||||
tf.keras.layers.Conv3D(NUM_CHANNELS, (3, 3, 3), activation='sigmoid', padding='same', kernel_regularizer=self.regularizer) # Output layer for video frames
|
tf.keras.layers.Conv2D(NUM_CHANNELS, (3, 3), activation='sigmoid', padding='same', kernel_regularizer=self.regularizer), # Output layer for video frames
|
||||||
|
tf.keras.layers.Cropping2D(cropping=((1, 1), (1, 1))) # Adjust cropping to ensure dimensions match
|
||||||
|
|
||||||
])
|
])
|
||||||
|
|
||||||
def call(self, inputs):
|
def call(self, inputs):
|
||||||
frames = inputs["frames"]
|
frame = inputs["frame"]
|
||||||
crf = tf.expand_dims(inputs["crf"], -1)
|
crf = tf.expand_dims(inputs["crf"], -1)
|
||||||
preset_speed = inputs["preset_speed"]
|
preset_speed = inputs["preset_speed"]
|
||||||
|
|
||||||
|
@ -46,15 +47,15 @@ class VideoCompressionModel(tf.keras.Model):
|
||||||
preset_embedding = tf.keras.layers.Flatten()(preset_embedding)
|
preset_embedding = tf.keras.layers.Flatten()(preset_embedding)
|
||||||
|
|
||||||
# Concatenate crf and preset_embedding to frames
|
# Concatenate crf and preset_embedding to frames
|
||||||
frames_shape = tf.shape(frames)
|
frame_shape = tf.shape(frame)
|
||||||
repeated_crf = tf.tile(tf.reshape(crf, (-1, 1, 1, 1, 1)), [1, frames_shape[1], frames_shape[2], frames_shape[3], 1])
|
repeated_crf = tf.tile(tf.reshape(crf, (-1, 1, 1, 1)), [1, frame_shape[1], frame_shape[2], 1])
|
||||||
repeated_preset = tf.tile(tf.reshape(preset_embedding, (-1, 1, 1, 1, 16)), [1, frames_shape[1], frames_shape[2], frames_shape[3], 1])
|
repeated_preset = tf.tile(tf.reshape(preset_embedding, (-1, 1, 1, 16)), [1, frame_shape[1], frame_shape[2], 1])
|
||||||
|
|
||||||
frames = tf.concat([frames, repeated_crf, repeated_preset], axis=-1)
|
frame = tf.concat([tf.cast(frame, tf.float32), repeated_crf, repeated_preset], axis=-1)
|
||||||
|
|
||||||
# Encoding the video frames
|
# Encoding the frame
|
||||||
compressed_representation = self.encoder(frames)
|
compressed_representation = self.encoder(frame)
|
||||||
|
|
||||||
# Decoding to generate compressed video frames
|
# Decoding to generate compressed frame
|
||||||
reconstructed_frames = self.decoder(compressed_representation)
|
reconstructed_frame = self.decoder(compressed_representation)
|
||||||
return reconstructed_frames[:,-1,:,:,:]
|
return reconstructed_frame
|
||||||
|
|
Reference in a new issue