programmersd commited on
Commit
667e250
·
verified ·
1 Parent(s): bdb5cb4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +140 -79
app.py CHANGED
@@ -1,111 +1,172 @@
1
- import os
2
  import gc
3
  import time
4
  import random
5
- import torch
 
 
 
 
6
  import gradio as gr
 
7
 
8
- from diffusers import DiffusionPipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- # =========================
11
- # HARD CPU MODE
12
- # =========================
13
- os.environ["CUDA_VISIBLE_DEVICES"] = ""
14
- os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
15
- os.environ["TOKENIZERS_PARALLELISM"] = "false"
16
 
17
- cpu_cores = os.cpu_count() or 1
18
- torch.set_num_threads(cpu_cores)
19
- torch.set_num_interop_threads(cpu_cores)
20
 
21
- os.environ["OMP_NUM_THREADS"] = str(cpu_cores)
22
- os.environ["MKL_NUM_THREADS"] = str(cpu_cores)
 
23
 
24
- torch.backends.mkldnn.enabled = True
 
 
 
 
 
25
 
26
- device = torch.device("cpu")
27
- dtype = torch.bfloat16 if torch.cpu.is_bf16_supported() else torch.float32
28
 
29
- MODEL_ID = "tensorart/stable-diffusion-3.5-medium-turbo"
30
- CACHE_DIR = "models"
31
-
32
- # =========================
33
- # LOAD PIPELINE
34
- # =========================
35
- def load_pipeline():
36
- pipe = DiffusionPipeline.from_pretrained(
37
- MODEL_ID,
38
- torch_dtype=dtype,
39
- cache_dir=CACHE_DIR,
40
- low_cpu_mem_usage=True
41
- )
42
 
43
- pipe.enable_attention_slicing()
44
- pipe.enable_vae_slicing()
45
- pipe.enable_sequential_cpu_offload()
46
 
47
- pipe = pipe.to(device)
 
48
 
49
- return pipe
 
50
 
51
- pipe = load_pipeline()
 
 
 
52
 
53
- # =========================
54
- # GENERATION
55
- # =========================
56
- def generate(prompt, seed, progress=gr.Progress()):
57
- if not prompt:
58
- raise gr.Error("Prompt required")
59
 
60
- if seed < 0:
61
- seed = random.randint(0, 2**31 - 1)
 
 
 
 
 
 
62
 
63
  generator = torch.Generator(device=device).manual_seed(seed)
64
 
65
- steps = 6
66
- width = 512
67
- height = 512
 
 
 
 
 
 
 
 
68
 
69
- start = time.time()
70
 
71
- def callback(step, timestep, latents):
72
- done = step + 1
73
- elapsed = time.time() - start
74
- eta = (elapsed / done) * (steps - done)
75
- progress(done / steps, desc=f"Step {done}/{steps} | ETA {eta:.1f}s")
 
 
 
 
 
 
 
 
76
 
77
- with torch.inference_mode():
78
  gc.collect()
79
- image = pipe(
80
- prompt=prompt,
81
- width=width,
82
- height=height,
83
- num_inference_steps=steps,
84
- guidance_scale=0.0,
85
- generator=generator,
86
- callback=callback,
87
- callback_steps=1
88
- ).images[0]
89
  gc.collect()
 
 
 
90
 
91
- return image, seed
92
 
93
- # =========================
94
- # UI
95
- # =========================
96
- with gr.Blocks(title="SD 3.5 Medium Turbo CPU Ultra Lean") as demo:
97
- gr.Markdown("# Stable Diffusion 3.5 Medium Turbo — 16GB CPU Mode")
98
 
99
- prompt = gr.Textbox(label="Prompt", lines=3)
100
- seed = gr.Number(label="Seed (-1 random)", value=-1, precision=0)
101
- btn = gr.Button("Generate")
102
 
103
- image_out = gr.Image()
104
- seed_out = gr.Number(interactive=False)
 
105
 
106
- btn.click(generate, inputs=[prompt, seed], outputs=[image_out, seed_out])
107
 
108
- demo.queue(max_size=5, concurrency_count=1)
 
 
 
 
 
 
 
 
 
109
 
110
- if __name__ == "__main__":
111
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
+ import torch
2
  import gc
3
  import time
4
  import random
5
+ import os
6
+ import hashlib
7
+ import shutil
8
+ import psutil
9
+ from diffusers import DiffusionPipeline
10
  import gradio as gr
11
+ from PIL import Image, PngImagePlugin
12
 
13
+ MODEL_ID = "tensorart/stable-diffusion-3.5-medium-turbo"
14
+ CACHE_DIR = "./hf_cache"
15
+ OUTPUT_DIR = "./outputs"
16
+ MAX_CACHE_SIZE_GB = 2
17
+
18
+ os.makedirs(CACHE_DIR, exist_ok=True)
19
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
20
+
21
+ device = "cpu"
22
+ dtype = torch.float32
23
+
24
+ pipe = DiffusionPipeline.from_pretrained(
25
+ MODEL_ID,
26
+ torch_dtype=dtype,
27
+ safety_checker=None,
28
+ cache_dir=CACHE_DIR,
29
+ low_cpu_mem_usage=True
30
+ )
31
+
32
+ pipe.to(device)
33
+ pipe.enable_attention_slicing()
34
+ pipe.enable_vae_slicing()
35
+ pipe.set_progress_bar_config(disable=True)
36
+
37
+ def warmup():
38
+ with torch.inference_mode():
39
+ pipe(
40
+ prompt="warmup",
41
+ num_inference_steps=1,
42
+ guidance_scale=0.0,
43
+ width=256,
44
+ height=256
45
+ )
46
+ gc.collect()
47
 
48
+ warmup()
 
 
 
 
 
49
 
50
+ def get_ram_usage():
51
+ return round(psutil.virtual_memory().used / (1024 ** 3), 2)
 
52
 
53
+ def prune_cache():
54
+ total_size = 0
55
+ files = []
56
 
57
+ for f in os.listdir(OUTPUT_DIR):
58
+ path = os.path.join(OUTPUT_DIR, f)
59
+ if os.path.isfile(path):
60
+ size = os.path.getsize(path)
61
+ total_size += size
62
+ files.append((path, size, os.path.getmtime(path)))
63
 
64
+ max_bytes = MAX_CACHE_SIZE_GB * 1024 * 1024 * 1024
 
65
 
66
+ if total_size <= max_bytes:
67
+ return
68
+
69
+ files.sort(key=lambda x: x[2])
70
+
71
+ for path, size, _ in files:
72
+ os.remove(path)
73
+ total_size -= size
74
+ if total_size <= max_bytes:
75
+ break
 
 
 
76
 
77
+ def build_cache_key(prompt, negative_prompt, steps, guidance, width, height, seed):
78
+ raw = f"{prompt}|{negative_prompt}|{steps}|{guidance}|{width}|{height}|{seed}"
79
+ return hashlib.sha256(raw.encode()).hexdigest()
80
 
81
+ def generate(prompt, negative_prompt, steps, guidance, width, height, seed):
82
+ start_time = time.time()
83
 
84
+ if not prompt.strip():
85
+ return None, "Prompt cannot be empty."
86
 
87
+ width = max(256, min(int(width), 768))
88
+ height = max(256, min(int(height), 768))
89
+ steps = max(1, min(int(steps), 8))
90
+ guidance = max(0.0, min(float(guidance), 7.5))
91
 
92
+ if seed == -1:
93
+ seed = random.randint(0, 2**32 - 1)
 
 
 
 
94
 
95
+ cache_key = build_cache_key(prompt, negative_prompt, steps, guidance, width, height, seed)
96
+ cache_path = os.path.join(OUTPUT_DIR, f"{cache_key}.png")
97
+
98
+ if os.path.exists(cache_path):
99
+ image = Image.open(cache_path)
100
+ duration = round(time.time() - start_time, 2)
101
+ ram = get_ram_usage()
102
+ return image, f"Loaded from cache | Seed: {seed} | Time: {duration}s | RAM: {ram}GB"
103
 
104
  generator = torch.Generator(device=device).manual_seed(seed)
105
 
106
+ try:
107
+ with torch.inference_mode():
108
+ result = pipe(
109
+ prompt=prompt,
110
+ negative_prompt=negative_prompt,
111
+ num_inference_steps=steps,
112
+ guidance_scale=guidance,
113
+ width=width,
114
+ height=height,
115
+ generator=generator
116
+ )
117
 
118
+ image = result.images[0]
119
 
120
+ metadata = PngImagePlugin.PngInfo()
121
+ metadata.add_text("prompt", prompt)
122
+ metadata.add_text("negative_prompt", negative_prompt)
123
+ metadata.add_text("steps", str(steps))
124
+ metadata.add_text("guidance", str(guidance))
125
+ metadata.add_text("seed", str(seed))
126
+
127
+ image.save(cache_path, pnginfo=metadata)
128
+
129
+ prune_cache()
130
+
131
+ duration = round(time.time() - start_time, 2)
132
+ ram = get_ram_usage()
133
 
 
134
  gc.collect()
135
+
136
+ return image, f"Generated | Seed: {seed} | Time: {duration}s | RAM: {ram}GB"
137
+
138
+ except Exception as e:
 
 
 
 
 
 
139
  gc.collect()
140
+ return None, f"Error: {str(e)}"
141
+
142
+ with gr.Blocks(title="SD 3.5 Turbo - Ultimate CPU Mode") as demo:
143
 
144
+ gr.Markdown("## Stable Diffusion 3.5 Medium Turbo - Ultimate CPU Edition")
145
 
146
+ with gr.Row():
147
+ prompt = gr.Textbox(label="Prompt")
148
+ negative_prompt = gr.Textbox(label="Negative Prompt")
 
 
149
 
150
+ with gr.Row():
151
+ steps = gr.Slider(1, 8, value=4, step=1, label="Steps")
152
+ guidance = gr.Slider(0.0, 7.5, value=0.0, step=0.5, label="Guidance")
153
 
154
+ with gr.Row():
155
+ width = gr.Slider(256, 768, value=512, step=64, label="Width")
156
+ height = gr.Slider(256, 768, value=512, step=64, label="Height")
157
 
158
+ seed = gr.Number(value=-1, label="Seed (-1 random)")
159
 
160
+ generate_btn = gr.Button("Generate")
161
+
162
+ output_image = gr.Image(type="pil")
163
+ status = gr.Textbox(label="Status")
164
+
165
+ generate_btn.click(
166
+ generate,
167
+ inputs=[prompt, negative_prompt, steps, guidance, width, height, seed],
168
+ outputs=[output_image, status]
169
+ )
170
 
171
+ demo.queue(max_size=10, concurrency_count=1, status_update_rate=1)
172
+ demo.launch()