xizaoqu
commited on
Commit
·
0cb2a53
1
Parent(s):
3b61a0b
update
Browse files- app.py +18 -7
- configurations/huggingface.yaml +1 -1
app.py
CHANGED
|
@@ -241,7 +241,7 @@ def set_memory_length(memory_length, sampling_memory_length_state):
|
|
| 241 |
|
| 242 |
def generate(keys):
|
| 243 |
# print("algo frame:", len(worldmem.frames))
|
| 244 |
-
|
| 245 |
global input_history
|
| 246 |
global memory_frames
|
| 247 |
global memory_curr_frame
|
|
@@ -251,8 +251,19 @@ def generate(keys):
|
|
| 251 |
global self_memory_c2w
|
| 252 |
global self_frame_idx
|
| 253 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
new_frame, self_frames, self_actions, self_poses, self_memory_c2w, self_frame_idx = run_interactive(memory_frames[0],
|
| 255 |
-
|
| 256 |
None,
|
| 257 |
device=device,
|
| 258 |
self_frames=self_frames,
|
|
@@ -422,12 +433,12 @@ with gr.Blocks(css=css) as demo:
|
|
| 422 |
- D: turn right
|
| 423 |
- Q: move forward
|
| 424 |
- E: move backward
|
| 425 |
-
- N: no-op (do nothing)
|
| 426 |
-
- 1: switch to hotbar 1
|
| 427 |
- U: use item
|
| 428 |
-
5. Higher denoising steps produce more detailed results but take longer.
|
| 429 |
-
6.
|
| 430 |
-
7.
|
|
|
|
| 431 |
"""
|
| 432 |
)
|
| 433 |
# input_box.submit(update_image_and_log, inputs=[input_box], outputs=[image_display, video_display, log_output])
|
|
|
|
| 241 |
|
| 242 |
def generate(keys):
|
| 243 |
# print("algo frame:", len(worldmem.frames))
|
| 244 |
+
input_actions = parse_input_to_tensor(keys)
|
| 245 |
global input_history
|
| 246 |
global memory_frames
|
| 247 |
global memory_curr_frame
|
|
|
|
| 251 |
global self_memory_c2w
|
| 252 |
global self_frame_idx
|
| 253 |
|
| 254 |
+
if self_frames is None:
|
| 255 |
+
new_frame, self_frames, self_actions, self_poses, self_memory_c2w, self_frame_idx = run_interactive(memory_frames[0],
|
| 256 |
+
actions[0],
|
| 257 |
+
poses[0],
|
| 258 |
+
device=device,
|
| 259 |
+
self_frames=self_frames,
|
| 260 |
+
self_actions=self_actions,
|
| 261 |
+
self_poses=self_poses,
|
| 262 |
+
self_memory_c2w=self_memory_c2w,
|
| 263 |
+
self_frame_idx=self_frame_idx)
|
| 264 |
+
|
| 265 |
new_frame, self_frames, self_actions, self_poses, self_memory_c2w, self_frame_idx = run_interactive(memory_frames[0],
|
| 266 |
+
input_actions,
|
| 267 |
None,
|
| 268 |
device=device,
|
| 269 |
self_frames=self_frames,
|
|
|
|
| 433 |
- D: turn right
|
| 434 |
- Q: move forward
|
| 435 |
- E: move backward
|
| 436 |
+
- N: no-op (do nothing)
|
|
|
|
| 437 |
- U: use item
|
| 438 |
+
5. Higher denoising steps produce more detailed results but take longer. 20 steps is a good balance between quality and speed. The same applies to context and memory length.
|
| 439 |
+
6. For faster performance, we recommend running the demo locally (~1s/frame on H100 vs ~5s on Spaces).
|
| 440 |
+
7. If you find this project interesting or useful, please consider giving it a ⭐️ on [GitHub]()!
|
| 441 |
+
8. For feedback or suggestions, feel free to open a GitHub issue or contact me directly at **zeqixiao1@gmail.com**.
|
| 442 |
"""
|
| 443 |
)
|
| 444 |
# input_box.submit(update_image_and_log, inputs=[input_box], outputs=[image_display, video_display, log_output])
|
configurations/huggingface.yaml
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
n_tokens:
|
| 2 |
pose_cond_dim: 5
|
| 3 |
use_plucker: true
|
| 4 |
focal_length: 0.35
|
|
|
|
| 1 |
+
n_tokens: 3
|
| 2 |
pose_cond_dim: 5
|
| 3 |
use_plucker: true
|
| 4 |
focal_length: 0.35
|