Kokoro-API

Paused

App Files Files Community

Yaron Koresh commited on Jan 21

Commit

d4dcb05

verified ·

1 Parent(s): c030471

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -55

app.py CHANGED Viewed

@@ -50,6 +50,9 @@ from refiners.foundationals.latent_diffusion.stable_diffusion_1.multi_upscaler i
 )
 from datetime import datetime
 working = False
 model = T5ForConditionalGeneration.from_pretrained("t5-base")
@@ -670,13 +673,13 @@ function custom(){
 # torch pipes
 taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype).to(device)
-good_vae = AutoencoderKL.from_pretrained("ostris/Flex.1-alpha", subfolder="vae", torch_dtype=dtype).to(device)
 image_pipe = DiffusionPipeline.from_pretrained("ostris/Flex.1-alpha", torch_dtype=dtype, vae=taef1).to(device)
-image_pipe.enable_model_cpu_offload()
 torch.cuda.empty_cache()
-image_pipe.flux_pipe_call_that_returns_an_iterable_of_images = flux_pipe_call_that_returns_an_iterable_of_images.__get__(image_pipe)
 # functionality
@@ -698,30 +701,39 @@ def upscaler(
     log(f'CALL upscaler')
-    manual_seed(seed)
-    solver_type: type[Solver] = getattr(solvers, solver)
-    log(f'DBG upscaler 1')
-    enhanced_image = enhancer.upscale(
-        image=input_image,
-        prompt=prompt,
-        negative_prompt=negative_prompt,
-        upscale_factor=upscale_factor,
-        controlnet_scale=controlnet_scale,
-        controlnet_scale_decay=controlnet_decay,
-        condition_scale=condition_scale,
-        tile_size=(tile_height, tile_width),
-        denoise_strength=denoise_strength,
-        num_inference_steps=num_inference_steps,
-        loras_scale={"more_details": 0.5, "sdxl_render": 1.0},
-        solver_type=solver_type,
-    )
-    log(f'RET upscaler')
-    return enhanced_image
 def get_tensor_length(tensor):
     nums = list(tensor.size())
@@ -737,7 +749,7 @@ def _summarize(text):
     gen = model.generate(
         toks,
         length_penalty=0.5,
-        num_beams=4,
         early_stopping=True,
         max_length=512
     )
@@ -775,7 +787,7 @@ def generate_random_string(length):
     characters = str(ascii_letters + digits)
     return ''.join(random.choice(characters) for _ in range(length))
-def add_song_cover_text(img,artist,song,h,w):
     draw = ImageDraw.Draw(img,mode="RGBA")
@@ -1392,8 +1404,11 @@ class GoogleTranslator(BaseTranslator):
 def translate(txt,to_lang="en",from_lang="auto"):
     log(f'CALL translate')
-    if len(txt) == 0 or from_lang == to_lang or get_language(txt) == to_lang:
-        print("Skipping translation...")
         return txt.strip().lower()
     translator = GoogleTranslator(from_lang=from_lang,to_lang=to_lang)
     translation = ""
@@ -1420,32 +1435,36 @@ def handle_generation(h,w,d):
     log(f'CALL handle_generate')
-    d = re.sub(r",( ){1,}",". ",d)
-    d_lines = re.split(r"([\n]){1,}", d)
-    for line_index in range(len(d_lines)):
-        d_lines[line_index] = d_lines[line_index].strip()
-        if re.sub(r'[\.]$', '', d_lines[line_index]) == d_lines[line_index]:
-            d_lines[line_index] = d_lines[line_index].strip() + "."
-    d = " ".join(d_lines)
-    pos_d = re.sub(r"([ \t]){1,}", " ", d).lower().strip()
-    pos_d = pos_d if pos_d == "" else summarize(translate(pos_d))
-    pos_d = re.sub(r"([ \t]){1,}", " ", pos_d).lower().strip()
-    neg = f"Textual, Text, Distorted, Fake, Discontinuous, Blurry, Doll-Like, Overly Plastic, Low Quality, Paint, Smoothed, Artificial, Phony, Gaudy, Digital Effects."
-    q = "\""
-    pos = f'HQ Hyper-realistic professional photograph{ pos_d if pos_d == "" else ": " + pos_d }.'
-    print(f"""
-        Positive: {pos}
-        Negative: {neg}
-    """)
-    for img in image_pipe.flux_pipe_call_that_returns_an_iterable_of_images(
             prompt=pos,
-            #negative_prompt=neg,
             height=h,
             width=w,
             output_type="pil",
@@ -1454,9 +1473,15 @@ def handle_generation(h,w,d):
             num_inference_steps=image_steps,
             max_sequence_length=seq,
             generator=torch.Generator(device).manual_seed(random.randint(0, MAX_SEED))
-    ):
-        yield img
 # entry
 if __name__ == "__main__":

 )
 from datetime import datetime
+_HEIGHT_ = None
+_WIDTH_ = None
 working = False
 model = T5ForConditionalGeneration.from_pretrained("t5-base")
 # torch pipes
 taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype).to(device)
+#good_vae = AutoencoderKL.from_pretrained("ostris/Flex.1-alpha", subfolder="vae", torch_dtype=dtype).to(device)
 image_pipe = DiffusionPipeline.from_pretrained("ostris/Flex.1-alpha", torch_dtype=dtype, vae=taef1).to(device)
+#image_pipe.enable_model_cpu_offload()
 torch.cuda.empty_cache()
+#image_pipe.flux_pipe_call_that_returns_an_iterable_of_images = flux_pipe_call_that_returns_an_iterable_of_images.__get__(image_pipe)
 # functionality
     log(f'CALL upscaler')
+    if not working:
+        working = True
+        manual_seed(seed)
+        solver_type: type[Solver] = getattr(solvers, solver)
+        log(f'DBG upscaler 1')
+        enhanced_image = enhancer.upscale(
+            image=input_image,
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            upscale_factor=upscale_factor,
+            controlnet_scale=controlnet_scale,
+            controlnet_scale_decay=controlnet_decay,
+            condition_scale=condition_scale,
+            tile_size=(tile_height, tile_width),
+            denoise_strength=denoise_strength,
+            num_inference_steps=num_inference_steps,
+            loras_scale={"more_details": 0.5, "sdxl_render": 1.0},
+            solver_type=solver_type,
+        )
+        _HEIGHT_ = _HEIGHT_ * upscale_factor
+        _WIDTH_ = _WIDTH_ * upscale_factor
+        log(f'RET upscaler')
+        working = False
+        return enhanced_image
 def get_tensor_length(tensor):
     nums = list(tensor.size())
     gen = model.generate(
         toks,
         length_penalty=0.5,
+        num_beams=8,
         early_stopping=True,
         max_length=512
     )
     characters = str(ascii_letters + digits)
     return ''.join(random.choice(characters) for _ in range(length))
+def add_song_cover_text(img,title,h,w):
     draw = ImageDraw.Draw(img,mode="RGBA")
 def translate(txt,to_lang="en",from_lang="auto"):
     log(f'CALL translate')
+    if len(txt) == 0:
+        print("Translated text is empty. Skipping translation...")
+        return txt.strip().lower()
+    if from_lang == to_lang or get_language(txt) == to_lang:
+        print("Same languages. Skipping translation...")
         return txt.strip().lower()
     translator = GoogleTranslator(from_lang=from_lang,to_lang=to_lang)
     translation = ""
     log(f'CALL handle_generate')
+    if not working:
+        working = True
+        d = re.sub(r",( ){1,}",". ",d)
+        d_lines = re.split(r"([\n]){1,}", d)
+        for line_index in range(len(d_lines)):
+            d_lines[line_index] = d_lines[line_index].strip()
+            if re.sub(r'[\.]$', '', d_lines[line_index]) == d_lines[line_index]:
+                d_lines[line_index] = d_lines[line_index].strip() + "."
+        d = " ".join(d_lines)
+        pos_d = re.sub(r"([ \t]){1,}", " ", d).lower().strip()
+        pos_d = pos_d if pos_d == "" else summarize(translate(pos_d))
+        pos_d = re.sub(r"([ \t]){1,}", " ", pos_d).lower().strip()
+        neg = f"Textual, Text, Distorted, Fake, Discontinuous, Blurry, Doll-Like, Overly Plastic, Low Quality, Paint, Smoothed, Artificial, Phony, Gaudy, Digital Effects."
+        q = "\""
+        pos = f'HQ Hyper-realistic professional photograph{ pos_d if pos_d == "" else ": " + pos_d }.'
+        print(f"""
+            Positive: {pos}
+            Negative: {neg}
+        """)
+        img = image_pipe(
             prompt=pos,
+            negative_prompt=neg,
             height=h,
             width=w,
             output_type="pil",
             num_inference_steps=image_steps,
             max_sequence_length=seq,
             generator=torch.Generator(device).manual_seed(random.randint(0, MAX_SEED))
+        )
+        working = False
+        _HEIGHT_ = h
+        _WIDTH_ = w
+        return img
 # entry
 if __name__ == "__main__":