smthem commited on
Commit
d47fef9
·
verified ·
1 Parent(s): a4d6110

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +73 -3
README.md CHANGED
@@ -1,3 +1,73 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ ---
4
+ use normal pipeline to run it
5
+ example:
6
+ ```
7
+
8
+ from diffusers import LTX2Pipeline
9
+ from diffusers.pipelines.ltx2.export_utils import encode_video
10
+ repo= 'smthem/ltx-2-19b-dev-diffusers-4bit'
11
+
12
+
13
+ ### text_encoder
14
+ from transformers import Gemma3ForConditionalGeneration
15
+ text_encoder = Gemma3ForConditionalGeneration.from_pretrained(
16
+ repo,
17
+ subfolder="text_encoder",
18
+ quantization_config=quant_config,
19
+ torch_dtype=torch.float16,
20
+ )
21
+
22
+ ### transformer
23
+ transformer_4bit = AutoModel.from_pretrained(
24
+ repo,
25
+ subfolder="transformer",
26
+ quantization_config=quant_config,
27
+ torch_dtype=torch.float16,
28
+ )
29
+ pipeline = LTX2Pipeline.from_pretrained("smthem/ltx-2-19b-dev-diffusers-test",transformer=transformer_4bit,text_encoder=text_encoder,torch_dtype=torch.float16,)
30
+ pipeline.enable_model_cpu_offload()
31
+
32
+ prompt='A video of a dog dancing to energetic electronic dance music'
33
+ negative_prompt="blurry, out of focus, overexposed, underexposed, low contrast, washed out colors, excessive noise, "
34
+ "grainy texture, poor lighting, flickering, motion blur, distorted proportions, unnatural skin tones, "
35
+ "deformed facial features, asymmetrical face, missing facial features, extra limbs, disfigured hands, "
36
+ "wrong hand count, artifacts around text, inconsistent perspective, camera shake, incorrect depth of "
37
+ "field, background too sharp, background clutter, distracting reflections, harsh shadows, inconsistent "
38
+ "lighting direction, color banding, cartoonish rendering, 3D CGI look, unrealistic materials, uncanny "
39
+ "valley effect, incorrect ethnicity, wrong gender, exaggerated expressions, wrong gaze direction, "
40
+ "mismatched lip sync, silent or muted audio, distorted voice, robotic voice, echo, background noise, "
41
+ "off-sync audio,incorrect dialogue, added dialogue, repetitive speech, jittery movement, awkward "
42
+ "pauses, incorrect timing, unnatural transitions, inconsistent framing, tilted camera, flat lighting, "
43
+ "inconsistent tone, cinematic oversaturation, stylized filters, or AI artifacts."
44
+
45
+
46
+ video, audio = pipeline(
47
+ prompt=prompt,
48
+ negative_prompt=negative_prompt,
49
+ height=512,
50
+ width=768,
51
+ num_frames=121,
52
+ frame_rate=25,
53
+ num_inference_steps=20,
54
+ guidance_scale=guidance_scale,
55
+ generator=torch.Generator(device="cuda").manual_seed(42),
56
+ output_type="np",
57
+ return_dict=False,
58
+ )
59
+
60
+ # Convert video to uint8 (but keep as NumPy array)
61
+ video = (video * 255).round().astype("uint8")
62
+ video = torch.from_numpy(video)
63
+
64
+ encode_video(
65
+ video[0],
66
+ fps=args.frame_rate,
67
+ audio=audio[0].float().cpu(),
68
+ audio_sample_rate=pipeline.vocoder.config.output_sampling_rate, # should be 24000
69
+ output_path=os.path.join(args.output_dir, args.output_filename),
70
+ )
71
+
72
+
73
+ ```