KevinNg99 commited on
Commit
5bd8781
·
1 Parent(s): 9873fde

udpate readme

Browse files
Files changed (2) hide show
  1. README.md +5 -2
  2. README_CN.md +4 -2
README.md CHANGED
@@ -13,6 +13,7 @@ pipeline_tag: text-to-video
13
  extra_gated_eu_disallowed: true
14
  ---
15
 
 
16
  [中文文档](./README_CN.md)
17
 
18
  # HunyuanVideo-1.5
@@ -201,7 +202,7 @@ For models with a vLLM API, note that T2V (text-to-video) and I2V (image-to-vide
201
  - I2V: use [Qwen3-VL-235B-A22B-Instruct](https://huggingface.co/Qwen/Qwen3-VL-235B-A22B-Instruct), configure `I2V_REWRITE_BASE_URL` and `I2V_REWRITE_MODEL_NAME`
202
 
203
  > You may set the above model names to any other vLLM-compatible models you have deployed (including HuggingFace models).
204
- > Rewriting is enabled by default; to disable it explicitly, use the `--disable_rewrite` flag. If no vLLM endpoint is configured, the pipeline runs without remote rewriting.
205
 
206
  Example: Generate a video (works for both T2V and I2V; set `IMAGE_PATH=none` for T2V or provide an image path for I2V)
207
 
@@ -211,7 +212,7 @@ export T2V_REWRITE_MODEL_NAME="<your_model_name>"
211
  export I2V_REWRITE_BASE_URL="<your_vllm_server_base_url>"
212
  export I2V_REWRITE_MODEL_NAME="<your_model_name>"
213
 
214
- PROMPT='On a wet street corner in a cyberpunk city at night, a large neon sign reading "Hunyuan Video 1.5" lights up sequentially, illuminating the dark, rainy environment with a pinkish-purple glow. he scene is a dark, rain-slicked street corner in a futuristic, cinematic cyberpunk city. Mounted on the metallic, weathered facade of a building is a massive, unlit neon sign. The sign's glass tube framework clearly spells out the words "Hunyuan Video 1.5". Initially, the street is dimly lit, with ambient light from distant skyscrapers creating shimmering reflections on the wet asphalt below. Then, the camera zooms in slowly toward the sign. As it moves, a low electrical sizzling sound begins. In the background, the dense urban landscape of the cyberpunk metropolis is visible through a light atmospheric haze, with towering structures adorned with their own flickering advertisements. A complex web of cables and pipes crisscrosses between the buildings. The shot is at a low angle, looking up at the sign to emphasize its grand scale. The lighting is high-contrast and dramatic, dominated by the neon glow which creates sharp, specular reflections and deep shadows. The atmosphere is moody and tech-noir. The overall video presents a cinematic photography realistic style.,'
215
 
216
  IMAGE_PATH=./data/reference_image.png # Optional, 'none' or <image path>
217
  SEED=1
@@ -225,6 +226,7 @@ CFG_DISTILLED=true # Inference with CFG distilled model, 2x speedup
225
  SPARSE_ATTN=true # Inference with sparse attention
226
  SAGE_ATTN=false # Inference with SageAttention
227
  MODEL_PATH=ckpts # Path to pretrained model
 
228
 
229
  torchrun --nproc_per_node=$N_INFERENCE_GPU generate.py \
230
  --prompt "$PROMPT" \
@@ -235,6 +237,7 @@ torchrun --nproc_per_node=$N_INFERENCE_GPU generate.py \
235
  --cfg_distilled $CFG_DISTILLED \
236
  --sparse_attn $SPARSE_ATTN \
237
  --use_sageattn $SAGE_ATTN \
 
238
  --output_path $OUTPUT_PATH \
239
  --save_pre_sr_video \
240
  --model_path $MODEL_PATH
 
13
  extra_gated_eu_disallowed: true
14
  ---
15
 
16
+
17
  [中文文档](./README_CN.md)
18
 
19
  # HunyuanVideo-1.5
 
202
  - I2V: use [Qwen3-VL-235B-A22B-Instruct](https://huggingface.co/Qwen/Qwen3-VL-235B-A22B-Instruct), configure `I2V_REWRITE_BASE_URL` and `I2V_REWRITE_MODEL_NAME`
203
 
204
  > You may set the above model names to any other vLLM-compatible models you have deployed (including HuggingFace models).
205
+ > Rewriting is enabled by default (`--rewrite` defaults to `true`); to disable it explicitly, use `--rewrite false` or `--rewrite 0`. If no vLLM endpoint is configured, the pipeline runs without remote rewriting.
206
 
207
  Example: Generate a video (works for both T2V and I2V; set `IMAGE_PATH=none` for T2V or provide an image path for I2V)
208
 
 
212
  export I2V_REWRITE_BASE_URL="<your_vllm_server_base_url>"
213
  export I2V_REWRITE_MODEL_NAME="<your_model_name>"
214
 
215
+ PROMPT='A girl holding a paper with words "Hello, world!"'
216
 
217
  IMAGE_PATH=./data/reference_image.png # Optional, 'none' or <image path>
218
  SEED=1
 
226
  SPARSE_ATTN=true # Inference with sparse attention
227
  SAGE_ATTN=false # Inference with SageAttention
228
  MODEL_PATH=ckpts # Path to pretrained model
229
+ REWRITE=true # Enable prompt rewriting
230
 
231
  torchrun --nproc_per_node=$N_INFERENCE_GPU generate.py \
232
  --prompt "$PROMPT" \
 
237
  --cfg_distilled $CFG_DISTILLED \
238
  --sparse_attn $SPARSE_ATTN \
239
  --use_sageattn $SAGE_ATTN \
240
+ --rewrite $REWRITE \
241
  --output_path $OUTPUT_PATH \
242
  --save_pre_sr_video \
243
  --model_path $MODEL_PATH
README_CN.md CHANGED
@@ -190,7 +190,7 @@ pip install -i https://mirrors.tencent.com/pypi/simple/ --upgrade tencentcloud-s
190
 
191
  > 你也可以将上述模型名替换为任何你已部署、与 vLLM 兼容的模型(包括 HuggingFace 等模型)。
192
  >
193
- > 默认为开启提示词重写。若需显式关闭,可以使用 `--rewrite false` 或 `--rewrite 0`。如果未配置 vLLM 提示词重写相关服务,管道会在本地直接生成,无远程重写。
194
 
195
  示例:生成视频(支持 T2V/I2V。T2V 模式下设置 `IMAGE_PATH=none`,I2V 模式下指定图像路径)
196
 
@@ -200,7 +200,7 @@ export T2V_REWRITE_MODEL_NAME="<your_model_name>"
200
  export I2V_REWRITE_BASE_URL="<your_vllm_server_base_url>"
201
  export I2V_REWRITE_MODEL_NAME="<your_model_name>"
202
 
203
- PROMPT='On a wet street corner in a cyberpunk city at night, a large neon sign reading "Hunyuan Video 1.5" lights up sequentially, illuminating the dark, rainy environment with a pinkish-purple glow. he scene is a dark, rain-slicked street corner in a futuristic, cinematic cyberpunk city. Mounted on the metallic, weathered facade of a building is a massive, unlit neon sign. The sign's glass tube framework clearly spells out the words "Hunyuan Video 1.5". Initially, the street is dimly lit, with ambient light from distant skyscrapers creating shimmering reflections on the wet asphalt below. Then, the camera zooms in slowly toward the sign. As it moves, a low electrical sizzling sound begins. In the background, the dense urban landscape of the cyberpunk metropolis is visible through a light atmospheric haze, with towering structures adorned with their own flickering advertisements. A complex web of cables and pipes crisscrosses between the buildings. The shot is at a low angle, looking up at the sign to emphasize its grand scale. The lighting is high-contrast and dramatic, dominated by the neon glow which creates sharp, specular reflections and deep shadows. The atmosphere is moody and tech-noir. The overall video presents a cinematic photography realistic style.,'
204
 
205
  IMAGE_PATH=./data/reference_image.png # 可选,'none' 或 <图像路径>
206
  SEED=1
@@ -214,6 +214,7 @@ CFG_DISTILLED=true # 使用 CFG 蒸馏模型进行推理,2倍加速
214
  SPARSE_ATTN=true # 使用稀疏注意力进行推理
215
  SAGE_ATTN=false # 使用 SageAttention 进行推理
216
  MODEL_PATH=ckpts # 预训练模型路径
 
217
 
218
  torchrun --nproc_per_node=$N_INFERENCE_GPU generate.py \
219
  --prompt "$PROMPT" \
@@ -224,6 +225,7 @@ torchrun --nproc_per_node=$N_INFERENCE_GPU generate.py \
224
  --cfg_distilled $CFG_DISTILLED \
225
  --sparse_attn $SPARSE_ATTN \
226
  --use_sageattn $SAGE_ATTN \
 
227
  --output_path $OUTPUT_PATH \
228
  --save_pre_sr_video \
229
  --model_path $MODEL_PATH
 
190
 
191
  > 你也可以将上述模型名替换为任何你已部署、与 vLLM 兼容的模型(包括 HuggingFace 等模型)。
192
  >
193
+ > 默认为开启提示词重写(`--rewrite` 默认值为 `true`)。若需显式关闭,可以使用 `--rewrite false` 或 `--rewrite 0`。如果未配置 vLLM 提示词重写相关服务,管道会在本地直接生成,无远程重写。
194
 
195
  示例:生成视频(支持 T2V/I2V。T2V 模式下设置 `IMAGE_PATH=none`,I2V 模式下指定图像路径)
196
 
 
200
  export I2V_REWRITE_BASE_URL="<your_vllm_server_base_url>"
201
  export I2V_REWRITE_MODEL_NAME="<your_model_name>"
202
 
203
+ PROMPT='A girl holding a paper with words "Hello, world!"'
204
 
205
  IMAGE_PATH=./data/reference_image.png # 可选,'none' 或 <图像路径>
206
  SEED=1
 
214
  SPARSE_ATTN=true # 使用稀疏注意力进行推理
215
  SAGE_ATTN=false # 使用 SageAttention 进行推理
216
  MODEL_PATH=ckpts # 预训练模型路径
217
+ REWRITE=true # 启用提示词重写
218
 
219
  torchrun --nproc_per_node=$N_INFERENCE_GPU generate.py \
220
  --prompt "$PROMPT" \
 
225
  --cfg_distilled $CFG_DISTILLED \
226
  --sparse_attn $SPARSE_ATTN \
227
  --use_sageattn $SAGE_ATTN \
228
+ --rewrite $REWRITE \
229
  --output_path $OUTPUT_PATH \
230
  --save_pre_sr_video \
231
  --model_path $MODEL_PATH