Skip to content

Commit 3e638da

Browse files
Adding Random Seed for Frame Processing (#3416)
* Adding Random Seed for Frame Processing * Added Unit Tests * Updating Unit Tests for Ffmpeg * Make Logs More Detailed --------- Co-authored-by: Anthony-Tafoya <anthonytafoya@berkeley.edu> Co-authored-by: J.Y. <132313008+jb-ye@users.noreply.github.com>
1 parent 27b8e14 commit 3e638da

File tree

3 files changed

+133
-6
lines changed

3 files changed

+133
-6
lines changed

nerfstudio/process_data/process_data_utils.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
"""Helper utils for processing data into the nerfstudio format."""
1616

1717
import math
18+
import random
1819
import re
1920
import shutil
2021
import sys
@@ -126,6 +127,7 @@ def convert_video_to_images(
126127
verbose: bool = False,
127128
image_prefix: str = "frame_",
128129
keep_image_dir: bool = False,
130+
random_seed: Optional[int] = None,
129131
) -> Tuple[List[str], int]:
130132
"""Converts a video into a sequence of images.
131133
@@ -138,6 +140,7 @@ def convert_video_to_images(
138140
verbose: If True, logs the output of the command.
139141
image_prefix: Prefix to use for the image filenames.
140142
keep_image_dir: If True, don't delete the output directory if it already exists.
143+
random_seed: If set, the seed used to choose the frames of the video
141144
Returns:
142145
A tuple containing summary of the conversion and the number of extracted frames.
143146
"""
@@ -178,8 +181,6 @@ def convert_video_to_images(
178181
start_y = crop_factor[0]
179182
crop_cmd = f"crop=w=iw*{width}:h=ih*{height}:x=iw*{start_x}:y=ih*{start_y},"
180183

181-
spacing = num_frames // num_frames_target
182-
183184
downscale_chains = [f"[t{i}]scale=iw/{2**i}:ih/{2**i}[out{i}]" for i in range(num_downscales + 1)]
184185
downscale_dirs = [Path(str(image_dir) + (f"_{2**i}" if i > 0 else "")) for i in range(num_downscales + 1)]
185186
downscale_paths = [downscale_dirs[i] / f"{image_prefix}%05d.png" for i in range(num_downscales + 1)]
@@ -196,8 +197,15 @@ def convert_video_to_images(
196197

197198
ffmpeg_cmd += " -vsync vfr"
198199

199-
if spacing > 1:
200-
CONSOLE.print("Number of frames to extract:", math.ceil(num_frames / spacing))
200+
# Evenly distribute frame selection if random seed does not exist
201+
spacing = num_frames // num_frames_target
202+
if random_seed:
203+
random.seed(random_seed)
204+
frame_indices = sorted(random.sample(range(num_frames), num_frames_target))
205+
select_cmd = "select='" + "+".join([f"eq(n\,{idx})" for idx in frame_indices]) + "',setpts=N/TB,"
206+
CONSOLE.print(f"Extracting {num_frames_target} frames using seed {random_seed} random selection.")
207+
elif spacing > 1:
208+
CONSOLE.print(f"Extracting {math.ceil(num_frames / spacing)} frames in evenly spaced intervals")
201209
select_cmd = f"thumbnail={spacing},setpts=N/TB,"
202210
else:
203211
CONSOLE.print("[bold red]Can't satisfy requested number of frames. Extracting all frames.")

nerfstudio/process_data/video_to_nerfstudio_dataset.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
import shutil
1818
from dataclasses import dataclass
19-
from typing import Literal
19+
from typing import Literal, Optional
2020

2121
from nerfstudio.process_data import equirect_utils, process_data_utils
2222
from nerfstudio.process_data.colmap_converter_to_nerfstudio_dataset import ColmapConverterToNerfstudioDataset
@@ -41,6 +41,10 @@ class VideoToNerfstudioDataset(ColmapConverterToNerfstudioDataset):
4141
"""Feature matching method to use. Vocab tree is recommended for a balance of speed
4242
and accuracy. Exhaustive is slower but more accurate. Sequential is faster but
4343
should only be used for videos."""
44+
random_seed: Optional[int] = None
45+
"""Random seed to select video frames for training set"""
46+
eval_random_seed: Optional[int] = None
47+
"""Random seed to select video frames for eval set"""
4448

4549
def main(self) -> None:
4650
"""Process video into a nerfstudio dataset."""
@@ -59,6 +63,7 @@ def main(self) -> None:
5963
num_downscales=0,
6064
crop_factor=(0.0, 0.0, 0.0, 0.0),
6165
verbose=self.verbose,
66+
random_seed=self.random_seed,
6267
)
6368
else:
6469
# If we're not dealing with equirects we can downscale in one step.
@@ -71,6 +76,7 @@ def main(self) -> None:
7176
verbose=self.verbose,
7277
image_prefix="frame_train_" if self.eval_data is not None else "frame_",
7378
keep_image_dir=False,
79+
random_seed=self.random_seed,
7480
)
7581
if self.eval_data is not None:
7682
summary_log_eval, num_extracted_frames_eval = process_data_utils.convert_video_to_images(
@@ -82,6 +88,7 @@ def main(self) -> None:
8288
verbose=self.verbose,
8389
image_prefix="frame_eval_",
8490
keep_image_dir=True,
91+
random_seed=self.eval_random_seed,
8592
)
8693
summary_log += summary_log_eval
8794
num_extracted_frames += num_extracted_frames_eval

tests/process_data/test_misc.py

Lines changed: 113 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,21 @@
22
Test misc data utils
33
"""
44

5+
import os
6+
import re
7+
from pathlib import Path
8+
from unittest import mock
9+
10+
import cv2
511
import numpy as np
12+
from PIL import Image
613
from pyquaternion import Quaternion
714
from scipy.spatial.transform import Rotation
815

916
# TODO(1480) use pycolmap instead of colmap_parsing_utils
1017
# import pycolmap
1118
from nerfstudio.data.utils.colmap_parsing_utils import qvec2rotmat
19+
from nerfstudio.process_data.process_data_utils import convert_video_to_images
1220

1321

1422
def test_scalar_first_scalar_last_quaternions():
@@ -39,7 +47,7 @@ def test_scalar_first_scalar_last_quaternions():
3947

4048
# Expected Rotation matrix
4149
# fmt: off
42-
R_expected = np.array(
50+
R_expected = np.array(
4351
[
4452
[ 0.81379768, -0.44096961, 0.37852231],
4553
[ 0.46984631, 0.88256412, 0.01802831],
@@ -61,3 +69,107 @@ def test_scalar_first_scalar_last_quaternions():
6169
# R = pycolmap.qvec_to_rotmat(wxyz)
6270
R = qvec2rotmat(wxyz)
6371
assert np.allclose(R, R_expected)
72+
73+
74+
def test_process_video_conversion_with_seed(tmp_path: Path):
75+
"""
76+
Test convert_video_to_images by creating a mock video and ensuring correct frame extraction with seed.
77+
"""
78+
79+
# Inner functions needed for the unit tests
80+
def create_mock_video(video_path: Path, frame_dir: Path, num_frames=10, frame_rate=1):
81+
"""Creates a mock video from a series of frames using OpenCV."""
82+
83+
first_frame = cv2.imread(str(frame_dir / "frame_0.png"))
84+
height, width, _ = first_frame.shape
85+
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
86+
out = cv2.VideoWriter(str(video_path), fourcc, frame_rate, (width, height))
87+
88+
for i in range(num_frames):
89+
frame_path = frame_dir / f"frame_{i}.png"
90+
frame = cv2.imread(str(frame_path))
91+
out.write(frame)
92+
out.release()
93+
94+
def extract_frame_numbers(ffmpeg_command: str):
95+
"""Extracts the frame numbers from the ffmpeg command"""
96+
97+
pattern = r"eq\(n\\,(\d+)\)"
98+
matches = re.findall(pattern, ffmpeg_command)
99+
frame_numbers = [int(match) for match in matches]
100+
return frame_numbers
101+
102+
# Create a video directory with path video
103+
video_dir = tmp_path / "video"
104+
video_dir.mkdir(exist_ok=True)
105+
106+
# Set parameters for mock video
107+
video_path = video_dir / "mock_video.mp4"
108+
num_frames = 10
109+
frame_height = 150
110+
frame_width = 100
111+
frame_rate = 1
112+
113+
# Create the mock video
114+
for i in range(num_frames):
115+
img = Image.new("RGB", (frame_width, frame_height), (0, 0, 0))
116+
img.save(video_dir / f"frame_{i}.png")
117+
create_mock_video(video_path, video_dir, num_frames=num_frames, frame_rate=frame_rate)
118+
119+
# Call convert_video_to_images
120+
image_output_dir = tmp_path / "extracted_images"
121+
num_frames_target = 5
122+
num_downscales = 1
123+
crop_factor = (0.0, 0.0, 0.0, 0.0)
124+
125+
# Mock missing COLMAP and ffmpeg in the dev env
126+
old_path = os.environ.get("PATH", "")
127+
os.environ["PATH"] = str(tmp_path / "mocked_bin") + f":{old_path}"
128+
(tmp_path / "mocked_bin").mkdir()
129+
(tmp_path / "mocked_bin" / "colmap").touch(mode=0o777)
130+
(tmp_path / "mocked_bin" / "ffmpeg").touch(mode=0o777)
131+
132+
# Return value of 10 for the get_num_frames_in_video run_command call
133+
with mock.patch("nerfstudio.process_data.process_data_utils.run_command", return_value="10") as mock_run_func:
134+
summary_log, extracted_frame_count = convert_video_to_images(
135+
video_path=video_path,
136+
image_dir=image_output_dir,
137+
num_frames_target=num_frames_target,
138+
num_downscales=num_downscales,
139+
crop_factor=crop_factor,
140+
verbose=False,
141+
random_seed=42,
142+
)
143+
assert mock_run_func.call_count == 2, f"Expected 2 calls, but got {mock_run_func.call_count}"
144+
first_frames = extract_frame_numbers(mock_run_func.call_args[0][0])
145+
assert len(first_frames) == 5, f"Expected 5 frames, but got {len(first_frames)}"
146+
147+
summary_log, extracted_frame_count = convert_video_to_images(
148+
video_path=video_path,
149+
image_dir=image_output_dir,
150+
num_frames_target=num_frames_target,
151+
num_downscales=num_downscales,
152+
crop_factor=crop_factor,
153+
verbose=False,
154+
random_seed=42,
155+
)
156+
157+
assert mock_run_func.call_count == 4, f"Expected 4 total calls, but got {mock_run_func.call_count}"
158+
second_frames = extract_frame_numbers(mock_run_func.call_args[0][0])
159+
assert len(second_frames) == 5, f"Expected 5 frames, but got {len(first_frames)}"
160+
assert first_frames == second_frames
161+
162+
summary_log, extracted_frame_count = convert_video_to_images(
163+
video_path=video_path,
164+
image_dir=image_output_dir,
165+
num_frames_target=num_frames_target,
166+
num_downscales=num_downscales,
167+
crop_factor=crop_factor,
168+
verbose=False,
169+
random_seed=52,
170+
)
171+
172+
assert mock_run_func.call_count == 6, f"Expected 6 total calls, but got {mock_run_func.call_count}"
173+
third_frames = extract_frame_numbers(mock_run_func.call_args[0][0])
174+
assert len(third_frames) == 5, f"Expected 5 frames, but got {len(first_frames)}"
175+
assert first_frames != third_frames

0 commit comments

Comments
 (0)