Skip to content

Commit be44674

Browse files
NERF-0000: Adding Random Seed Frame Processing
1 parent f86dbe6 commit be44674

File tree

2 files changed

+17
-4
lines changed

2 files changed

+17
-4
lines changed

nerfstudio/process_data/process_data_utils.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ def convert_video_to_images(
126126
verbose: bool = False,
127127
image_prefix: str = "frame_",
128128
keep_image_dir: bool = False,
129+
random_seed: Optional[int] = None
129130
) -> Tuple[List[str], int]:
130131
"""Converts a video into a sequence of images.
131132
@@ -177,9 +178,7 @@ def convert_video_to_images(
177178
start_x = crop_factor[2]
178179
start_y = crop_factor[0]
179180
crop_cmd = f"crop=w=iw*{width}:h=ih*{height}:x=iw*{start_x}:y=ih*{start_y},"
180-
181-
spacing = num_frames // num_frames_target
182-
181+
183182
downscale_chains = [f"[t{i}]scale=iw/{2**i}:ih/{2**i}[out{i}]" for i in range(num_downscales + 1)]
184183
downscale_dirs = [Path(str(image_dir) + (f"_{2**i}" if i > 0 else "")) for i in range(num_downscales + 1)]
185184
downscale_paths = [downscale_dirs[i] / f"{image_prefix}%05d.png" for i in range(num_downscales + 1)]
@@ -196,7 +195,14 @@ def convert_video_to_images(
196195

197196
ffmpeg_cmd += " -vsync vfr"
198197

199-
if spacing > 1:
198+
# Evenly distribute frame selection if random seed does not exist
199+
spacing = num_frames // num_frames_target
200+
if random_seed:
201+
random.seed(random_seed)
202+
frame_indices = sorted(random.sample(range(num_frames), num_frames_target))
203+
select_cmd = f"select=\'" + "+".join([f"eq(n\,{idx})" for idx in frame_indices]) + "\',setpts=N/TB,"
204+
CONSOLE.print(f"Extracting {num_frames_target} frames using seed-based random selection.")
205+
elif spacing > 1:
200206
CONSOLE.print("Number of frames to extract:", math.ceil(num_frames / spacing))
201207
select_cmd = f"thumbnail={spacing},setpts=N/TB,"
202208
else:

nerfstudio/process_data/video_to_nerfstudio_dataset.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,10 @@ class VideoToNerfstudioDataset(ColmapConverterToNerfstudioDataset):
4141
"""Feature matching method to use. Vocab tree is recommended for a balance of speed
4242
and accuracy. Exhaustive is slower but more accurate. Sequential is faster but
4343
should only be used for videos."""
44+
random_seed: int = None
45+
"""Random seed to select video frames"""
46+
eval_random_seed: int = None
47+
"""Random seed to select video frames for eval set"""
4448

4549
def main(self) -> None:
4650
"""Process video into a nerfstudio dataset."""
@@ -59,6 +63,7 @@ def main(self) -> None:
5963
num_downscales=0,
6064
crop_factor=(0.0, 0.0, 0.0, 0.0),
6165
verbose=self.verbose,
66+
random_seed = self.random_seed
6267
)
6368
else:
6469
# If we're not dealing with equirects we can downscale in one step.
@@ -71,6 +76,7 @@ def main(self) -> None:
7176
verbose=self.verbose,
7277
image_prefix="frame_train_" if self.eval_data is not None else "frame_",
7378
keep_image_dir=False,
79+
random_seed = self.random_seed
7480
)
7581
if self.eval_data is not None:
7682
summary_log_eval, num_extracted_frames_eval = process_data_utils.convert_video_to_images(
@@ -82,6 +88,7 @@ def main(self) -> None:
8288
verbose=self.verbose,
8389
image_prefix="frame_eval_",
8490
keep_image_dir=True,
91+
random_seed = self.eval_random_seed
8592
)
8693
summary_log += summary_log_eval
8794
num_extracted_frames += num_extracted_frames_eval

0 commit comments

Comments
 (0)