programmersd commited on
Commit
895942e
·
verified ·
1 Parent(s): 022f3b6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -0
app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gc
3
+ import time
4
+ import random
5
+ import torch
6
+ import gradio as gr
7
+
8
+ from diffusers import DiffusionPipeline
9
+
10
+ # =========================
11
+ # HARD CPU MODE
12
+ # =========================
13
+ os.environ["CUDA_VISIBLE_DEVICES"] = ""
14
+ os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
15
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
16
+
17
+ cpu_cores = os.cpu_count() or 1
18
+ torch.set_num_threads(cpu_cores)
19
+ torch.set_num_interop_threads(cpu_cores)
20
+
21
+ os.environ["OMP_NUM_THREADS"] = str(cpu_cores)
22
+ os.environ["MKL_NUM_THREADS"] = str(cpu_cores)
23
+
24
+ torch.backends.mkldnn.enabled = True
25
+
26
+ device = torch.device("cpu")
27
+ dtype = torch.bfloat16 if torch.cpu.is_bf16_supported() else torch.float32
28
+
29
+ MODEL_ID = "tensorart/stable-diffusion-3.5-medium-turbo"
30
+ CACHE_DIR = "models"
31
+
32
+ # =========================
33
+ # LOAD PIPELINE
34
+ # =========================
35
+ def load_pipeline():
36
+ pipe = DiffusionPipeline.from_pretrained(
37
+ MODEL_ID,
38
+ torch_dtype=dtype,
39
+ cache_dir=CACHE_DIR,
40
+ low_cpu_mem_usage=True
41
+ )
42
+
43
+ pipe.enable_attention_slicing()
44
+ pipe.enable_vae_slicing()
45
+ pipe.enable_sequential_cpu_offload()
46
+
47
+ pipe = pipe.to(device)
48
+
49
+ return pipe
50
+
51
+ pipe = load_pipeline()
52
+
53
+ # =========================
54
+ # GENERATION
55
+ # =========================
56
+ def generate(prompt, seed, progress=gr.Progress()):
57
+ if not prompt:
58
+ raise gr.Error("Prompt required")
59
+
60
+ if seed < 0:
61
+ seed = random.randint(0, 2**31 - 1)
62
+
63
+ generator = torch.Generator(device=device).manual_seed(seed)
64
+
65
+ steps = 6
66
+ width = 512
67
+ height = 512
68
+
69
+ start = time.time()
70
+
71
+ def callback(step, timestep, latents):
72
+ done = step + 1
73
+ elapsed = time.time() - start
74
+ eta = (elapsed / done) * (steps - done)
75
+ progress(done / steps, desc=f"Step {done}/{steps} | ETA {eta:.1f}s")
76
+
77
+ with torch.inference_mode():
78
+ gc.collect()
79
+ image = pipe(
80
+ prompt=prompt,
81
+ width=width,
82
+ height=height,
83
+ num_inference_steps=steps,
84
+ guidance_scale=0.0,
85
+ generator=generator,
86
+ callback=callback,
87
+ callback_steps=1
88
+ ).images[0]
89
+ gc.collect()
90
+
91
+ return image, seed
92
+
93
+ # =========================
94
+ # UI
95
+ # =========================
96
+ with gr.Blocks(title="SD 3.5 Medium Turbo CPU Ultra Lean") as demo:
97
+ gr.Markdown("# Stable Diffusion 3.5 Medium Turbo — 16GB CPU Mode")
98
+
99
+ prompt = gr.Textbox(label="Prompt", lines=3)
100
+ seed = gr.Number(label="Seed (-1 random)", value=-1, precision=0)
101
+ btn = gr.Button("Generate")
102
+
103
+ image_out = gr.Image()
104
+ seed_out = gr.Number(interactive=False)
105
+
106
+ btn.click(generate, inputs=[prompt, seed], outputs=[image_out, seed_out])
107
+
108
+ demo.queue(max_size=5, concurrency_count=1)
109
+
110
+ if __name__ == "__main__":
111
+ demo.launch(server_name="0.0.0.0", server_port=7860)