-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathrun.py
335 lines (266 loc) · 14.5 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
#!/usr/bin/env python3
# Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
#
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
import argparse
import os
import commentjson as json
import numpy as np
import shutil
import time
from common import *
from scenes import *
from tqdm import tqdm
import pyngp as ngp # noqa
def parse_args():
parser = argparse.ArgumentParser(description="Run instant neural graphics primitives with additional configuration & output options")
parser.add_argument("files", nargs="*", help="Files to be loaded. Can be a scene, network config, snapshot, camera path, or a combination of those.")
parser.add_argument("--scene", "--training_data", default="", help="The scene to load. Can be the scene's name or a full path to the training data. Can be NeRF dataset, a *.obj/*.stl mesh for training a SDF, an image, or a *.nvdb volume.")
parser.add_argument("--mode", default="", type=str, help=argparse.SUPPRESS) # deprecated
parser.add_argument("--network", default="", help="Path to the network config. Uses the scene's default if unspecified.")
parser.add_argument("--load_snapshot", "--snapshot", default="", help="Load this snapshot before training. recommended extension: .ingp/.msgpack")
parser.add_argument("--save_snapshot", default="", help="Save this snapshot after training. recommended extension: .ingp/.msgpack")
parser.add_argument("--nerf_compatibility", action="store_true", help="Matches parameters with original NeRF. Can cause slowness and worse results on some scenes, but helps with high PSNR on synthetic scenes.")
parser.add_argument("--test_transforms", default="", help="Path to a nerf style transforms json from which we will compute PSNR.")
parser.add_argument("--near_distance", default=-1, type=float, help="Set the distance from the camera at which training rays start for nerf. <0 means use ngp default")
parser.add_argument("--exposure", default=0.0, type=float, help="Controls the brightness of the image. Positive numbers increase brightness, negative numbers decrease it.")
parser.add_argument("--screenshot_transforms", default="", help="Path to a nerf style transforms.json from which to save screenshots.")
parser.add_argument("--screenshot_frames", nargs="*", help="Which frame(s) to take screenshots of.")
parser.add_argument("--screenshot_dir", default="", help="Which directory to output screenshots to.")
parser.add_argument("--screenshot_spp", type=int, default=16, help="Number of samples per pixel in screenshots.")
parser.add_argument("--video_camera_path", default="", help="The camera path to render, e.g., base_cam.json.")
parser.add_argument("--video_camera_smoothing", action="store_true", help="Applies additional smoothing to the camera trajectory with the caveat that the endpoint of the camera path may not be reached.")
parser.add_argument("--video_fps", type=int, default=60, help="Number of frames per second.")
parser.add_argument("--video_n_seconds", type=int, default=1, help="Number of seconds the rendered video should be long.")
parser.add_argument("--video_render_range", type=int, nargs=2, default=(-1, -1), metavar=("START_FRAME", "END_FRAME"), help="Limit output to frames between START_FRAME and END_FRAME (inclusive)")
parser.add_argument("--video_spp", type=int, default=8, help="Number of samples per pixel. A larger number means less noise, but slower rendering.")
parser.add_argument("--video_output", type=str, default="video.mp4", help="Filename of the output video (video.mp4) or video frames (video_%%04d.png).")
parser.add_argument("--save_mesh", default="", help="Output a marching-cubes based mesh from the NeRF or SDF model. Supports OBJ and PLY format.")
parser.add_argument("--marching_cubes_res", default=256, type=int, help="Sets the resolution for the marching cubes grid.")
parser.add_argument("--width", "--screenshot_w", type=int, default=0, help="Resolution width of GUI and screenshots.")
parser.add_argument("--height", "--screenshot_h", type=int, default=0, help="Resolution height of GUI and screenshots.")
parser.add_argument("--gui", action="store_true", help="Run the testbed GUI interactively.")
parser.add_argument("--train", action="store_true", help="If the GUI is enabled, controls whether training starts immediately.")
parser.add_argument("--n_steps", type=int, default=-1, help="Number of steps to train for before quitting.")
parser.add_argument("--second_window", action="store_true", help="Open a second window containing a copy of the main output.")
parser.add_argument("--vr", action="store_true", help="Render to a VR headset.")
parser.add_argument("--sharpen", default=0, help="Set amount of sharpening applied to NeRF training images. Range 0.0 to 1.0.")
return parser.parse_args()
def get_scene(scene):
for scenes in [scenes_sdf, scenes_nerf, scenes_image, scenes_volume]:
if scene in scenes:
return scenes[scene]
return None
if __name__ == "__main__":
args = parse_args()
if args.vr: # VR implies having the GUI running at the moment
args.gui = True
if args.mode:
print("Warning: the '--mode' argument is no longer in use. It has no effect. The mode is automatically chosen based on the scene.")
testbed = ngp.Testbed()
testbed.root_dir = ROOT_DIR
for file in args.files:
scene_info = get_scene(file)
if scene_info:
file = os.path.join(scene_info["data_dir"], scene_info["dataset"])
testbed.load_file(file)
if args.scene:
scene_info = get_scene(args.scene)
if scene_info is not None:
args.scene = os.path.join(scene_info["data_dir"], scene_info["dataset"])
if not args.network and "network" in scene_info:
args.network = scene_info["network"]
testbed.load_training_data(args.scene)
if args.gui:
# Pick a sensible GUI resolution depending on arguments.
sw = args.width or 1920
sh = args.height or 1080
while sw * sh > 1920 * 1080 * 4:
sw = int(sw / 2)
sh = int(sh / 2)
testbed.init_window(sw, sh, second_window=args.second_window)
if args.vr:
testbed.init_vr()
if args.load_snapshot:
scene_info = get_scene(args.load_snapshot)
if scene_info is not None:
args.load_snapshot = default_snapshot_filename(scene_info)
testbed.load_snapshot(args.load_snapshot)
elif args.network:
testbed.reload_network_from_file(args.network)
ref_transforms = {}
if args.screenshot_transforms: # try to load the given file straight away
print("Screenshot transforms from ", args.screenshot_transforms)
with open(args.screenshot_transforms) as f:
ref_transforms = json.load(f)
if testbed.mode == ngp.TestbedMode.Sdf:
testbed.tonemap_curve = ngp.TonemapCurve.ACES
testbed.nerf.sharpen = float(args.sharpen)
testbed.exposure = args.exposure
testbed.shall_train = args.train if args.gui else True
testbed.nerf.render_with_lens_distortion = True
network_stem = os.path.splitext(os.path.basename(args.network))[0] if args.network else "base"
if testbed.mode == ngp.TestbedMode.Sdf:
setup_colored_sdf(testbed, args.scene)
if args.near_distance >= 0.0:
print("NeRF training ray near_distance ", args.near_distance)
testbed.nerf.training.near_distance = args.near_distance
if args.nerf_compatibility:
print(f"NeRF compatibility mode enabled")
# Prior nerf papers accumulate/blend in the sRGB
# color space. This messes not only with background
# alpha, but also with DOF effects and the likes.
# We support this behavior, but we only enable it
# for the case of synthetic nerf data where we need
# to compare PSNR numbers to results of prior work.
testbed.color_space = ngp.ColorSpace.SRGB
# No exponential cone tracing. Slightly increases
# quality at the cost of speed. This is done by
# default on scenes with AABB 1 (like the synthetic
# ones), but not on larger scenes. So force the
# setting here.
testbed.nerf.cone_angle_constant = 0
# Match nerf paper behaviour and train on a fixed bg.
testbed.nerf.training.random_bg_color = False
old_training_step = 0
n_steps = args.n_steps
# If we loaded a snapshot, didn't specify a number of steps, _and_ didn't open a GUI,
# don't train by default and instead assume that the goal is to render screenshots,
# compute PSNR, or render a video.
if n_steps < 0 and (not args.load_snapshot or args.gui):
n_steps = 35000
tqdm_last_update = 0
if n_steps > 0:
with tqdm(desc="Training", total=n_steps, unit="steps") as t:
while testbed.frame():
if testbed.want_repl():
repl(testbed)
# What will happen when training is done?
if testbed.training_step >= n_steps:
if args.gui:
testbed.shall_train = False
else:
break
# Update progress bar
if testbed.training_step < old_training_step or old_training_step == 0:
old_training_step = 0
t.reset()
now = time.monotonic()
if now - tqdm_last_update > 0.1:
t.update(testbed.training_step - old_training_step)
t.set_postfix(loss=testbed.loss)
old_training_step = testbed.training_step
tqdm_last_update = now
if args.save_snapshot:
testbed.save_snapshot(args.save_snapshot, False)
if args.test_transforms:
print("Evaluating test transforms from ", args.test_transforms)
with open(args.test_transforms) as f:
test_transforms = json.load(f)
data_dir=os.path.dirname(args.test_transforms)
totmse = 0
totpsnr = 0
totssim = 0
totcount = 0
minpsnr = 1000
maxpsnr = 0
# Evaluate metrics on black background
testbed.background_color = [0.0, 0.0, 0.0, 1.0]
# Prior nerf papers don't typically do multi-sample anti aliasing.
# So snap all pixels to the pixel centers.
testbed.snap_to_pixel_centers = True
spp = 8
testbed.nerf.render_min_transmittance = 1e-4
testbed.shall_train = False
testbed.load_training_data(args.test_transforms)
with tqdm(range(testbed.nerf.training.dataset.n_images), unit="images", desc=f"Rendering test frame") as t:
for i in t:
resolution = testbed.nerf.training.dataset.metadata[i].resolution
testbed.render_ground_truth = True
testbed.set_camera_to_training_view(i)
ref_image = testbed.render(resolution[0], resolution[1], 1, True)
testbed.render_ground_truth = False
image = testbed.render(resolution[0], resolution[1], spp, True)
if i == 0:
write_image(f"ref.png", ref_image)
write_image(f"out.png", image)
diffimg = np.absolute(image - ref_image)
diffimg[...,3:4] = 1.0
write_image("diff.png", diffimg)
A = np.clip(linear_to_srgb(image[...,:3]), 0.0, 1.0)
R = np.clip(linear_to_srgb(ref_image[...,:3]), 0.0, 1.0)
mse = float(compute_error("MSE", A, R))
ssim = float(compute_error("SSIM", A, R))
totssim += ssim
totmse += mse
psnr = mse2psnr(mse)
totpsnr += psnr
minpsnr = psnr if psnr<minpsnr else minpsnr
maxpsnr = psnr if psnr>maxpsnr else maxpsnr
totcount = totcount+1
t.set_postfix(psnr = totpsnr/(totcount or 1))
psnr_avgmse = mse2psnr(totmse/(totcount or 1))
psnr = totpsnr/(totcount or 1)
ssim = totssim/(totcount or 1)
print(f"PSNR={psnr} [min={minpsnr} max={maxpsnr}] SSIM={ssim}")
if args.save_mesh:
res = args.marching_cubes_res or 256
print(f"Generating mesh via marching cubes and saving to {args.save_mesh}. Resolution=[{res},{res},{res}]")
testbed.compute_and_save_marching_cubes_mesh(args.save_mesh, [res, res, res])
if ref_transforms:
testbed.fov_axis = 0
testbed.fov = ref_transforms["camera_angle_x"] * 180 / np.pi
if not args.screenshot_frames:
args.screenshot_frames = range(len(ref_transforms["frames"]))
print(args.screenshot_frames)
for idx in args.screenshot_frames:
f = ref_transforms["frames"][int(idx)]
cam_matrix = f["transform_matrix"]
testbed.set_nerf_camera_matrix(np.matrix(cam_matrix)[:-1,:])
outname = os.path.join(args.screenshot_dir, os.path.basename(f["file_path"]))
# Some NeRF datasets lack the .png suffix in the dataset metadata
if not os.path.splitext(outname)[1]:
outname = outname + ".png"
print(f"rendering {outname}")
image = testbed.render(args.width or int(ref_transforms["w"]), args.height or int(ref_transforms["h"]), args.screenshot_spp, True)
os.makedirs(os.path.dirname(outname), exist_ok=True)
write_image(outname, image)
elif args.screenshot_dir:
outname = os.path.join(args.screenshot_dir, args.scene + "_" + network_stem)
print(f"Rendering {outname}.png")
image = testbed.render(args.width or 1920, args.height or 1080, args.screenshot_spp, True)
if os.path.dirname(outname) != "":
os.makedirs(os.path.dirname(outname), exist_ok=True)
write_image(outname + ".png", image)
if args.video_camera_path:
testbed.load_camera_path(args.video_camera_path)
resolution = [args.width or 1920, args.height or 1080]
n_frames = args.video_n_seconds * args.video_fps
save_frames = "%" in args.video_output
start_frame, end_frame = args.video_render_range
if "tmp" in os.listdir():
shutil.rmtree("tmp")
os.makedirs("tmp")
for i in tqdm(list(range(min(n_frames, n_frames+1))), unit="frames", desc=f"Rendering video"):
testbed.camera_smoothing = args.video_camera_smoothing
if start_frame >= 0 and i < start_frame:
# For camera smoothing and motion blur to work, we cannot just start rendering
# from middle of the sequence. Instead we render a very small image and discard it
# for these initial frames.
# TODO Replace this with a no-op render method once it's available
frame = testbed.render(32, 32, 1, True, float(i)/n_frames, float(i + 1)/n_frames, args.video_fps, shutter_fraction=0.5)
continue
elif end_frame >= 0 and i > end_frame:
continue
frame = testbed.render(resolution[0], resolution[1], args.video_spp, True, float(i)/n_frames, float(i + 1)/n_frames, args.video_fps, shutter_fraction=0.5)
if save_frames:
write_image(args.video_output % i, np.clip(frame * 2**args.exposure, 0.0, 1.0), quality=100)
else:
write_image(f"tmp/{i:04d}.jpg", np.clip(frame * 2**args.exposure, 0.0, 1.0), quality=100)
if not save_frames:
os.system(f"ffmpeg -y -framerate {args.video_fps} -i tmp/%04d.jpg -c:v libx264 -pix_fmt yuv420p {args.video_output}")
shutil.rmtree("tmp")