Spaces:
Paused
Paused
Yaron Koresh
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -50,6 +50,9 @@ from refiners.foundationals.latent_diffusion.stable_diffusion_1.multi_upscaler i
|
|
| 50 |
)
|
| 51 |
from datetime import datetime
|
| 52 |
|
|
|
|
|
|
|
|
|
|
| 53 |
working = False
|
| 54 |
|
| 55 |
model = T5ForConditionalGeneration.from_pretrained("t5-base")
|
|
@@ -670,13 +673,13 @@ function custom(){
|
|
| 670 |
# torch pipes
|
| 671 |
|
| 672 |
taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype).to(device)
|
| 673 |
-
good_vae = AutoencoderKL.from_pretrained("ostris/Flex.1-alpha", subfolder="vae", torch_dtype=dtype).to(device)
|
| 674 |
image_pipe = DiffusionPipeline.from_pretrained("ostris/Flex.1-alpha", torch_dtype=dtype, vae=taef1).to(device)
|
| 675 |
-
image_pipe.enable_model_cpu_offload()
|
| 676 |
|
| 677 |
torch.cuda.empty_cache()
|
| 678 |
|
| 679 |
-
image_pipe.flux_pipe_call_that_returns_an_iterable_of_images = flux_pipe_call_that_returns_an_iterable_of_images.__get__(image_pipe)
|
| 680 |
|
| 681 |
# functionality
|
| 682 |
|
|
@@ -698,30 +701,39 @@ def upscaler(
|
|
| 698 |
|
| 699 |
log(f'CALL upscaler')
|
| 700 |
|
| 701 |
-
|
| 702 |
-
|
| 703 |
-
solver_type: type[Solver] = getattr(solvers, solver)
|
| 704 |
|
| 705 |
-
|
| 706 |
-
|
| 707 |
-
|
| 708 |
-
|
| 709 |
-
|
| 710 |
-
|
| 711 |
-
|
| 712 |
-
|
| 713 |
-
|
| 714 |
-
|
| 715 |
-
|
| 716 |
-
|
| 717 |
-
|
| 718 |
-
|
| 719 |
-
|
| 720 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 721 |
|
| 722 |
-
|
|
|
|
|
|
|
|
|
|
| 723 |
|
| 724 |
-
|
|
|
|
|
|
|
| 725 |
|
| 726 |
def get_tensor_length(tensor):
|
| 727 |
nums = list(tensor.size())
|
|
@@ -737,7 +749,7 @@ def _summarize(text):
|
|
| 737 |
gen = model.generate(
|
| 738 |
toks,
|
| 739 |
length_penalty=0.5,
|
| 740 |
-
num_beams=
|
| 741 |
early_stopping=True,
|
| 742 |
max_length=512
|
| 743 |
)
|
|
@@ -775,7 +787,7 @@ def generate_random_string(length):
|
|
| 775 |
characters = str(ascii_letters + digits)
|
| 776 |
return ''.join(random.choice(characters) for _ in range(length))
|
| 777 |
|
| 778 |
-
def add_song_cover_text(img,
|
| 779 |
|
| 780 |
draw = ImageDraw.Draw(img,mode="RGBA")
|
| 781 |
|
|
@@ -1392,8 +1404,11 @@ class GoogleTranslator(BaseTranslator):
|
|
| 1392 |
|
| 1393 |
def translate(txt,to_lang="en",from_lang="auto"):
|
| 1394 |
log(f'CALL translate')
|
| 1395 |
-
if len(txt) == 0
|
| 1396 |
-
print("Skipping translation...")
|
|
|
|
|
|
|
|
|
|
| 1397 |
return txt.strip().lower()
|
| 1398 |
translator = GoogleTranslator(from_lang=from_lang,to_lang=to_lang)
|
| 1399 |
translation = ""
|
|
@@ -1420,32 +1435,36 @@ def handle_generation(h,w,d):
|
|
| 1420 |
|
| 1421 |
log(f'CALL handle_generate')
|
| 1422 |
|
| 1423 |
-
|
| 1424 |
-
d_lines = re.split(r"([\n]){1,}", d)
|
| 1425 |
-
|
| 1426 |
-
for line_index in range(len(d_lines)):
|
| 1427 |
-
d_lines[line_index] = d_lines[line_index].strip()
|
| 1428 |
-
if re.sub(r'[\.]$', '', d_lines[line_index]) == d_lines[line_index]:
|
| 1429 |
-
d_lines[line_index] = d_lines[line_index].strip() + "."
|
| 1430 |
-
d = " ".join(d_lines)
|
| 1431 |
-
|
| 1432 |
-
pos_d = re.sub(r"([ \t]){1,}", " ", d).lower().strip()
|
| 1433 |
-
pos_d = pos_d if pos_d == "" else summarize(translate(pos_d))
|
| 1434 |
-
pos_d = re.sub(r"([ \t]){1,}", " ", pos_d).lower().strip()
|
| 1435 |
-
|
| 1436 |
-
neg = f"Textual, Text, Distorted, Fake, Discontinuous, Blurry, Doll-Like, Overly Plastic, Low Quality, Paint, Smoothed, Artificial, Phony, Gaudy, Digital Effects."
|
| 1437 |
-
q = "\""
|
| 1438 |
-
pos = f'HQ Hyper-realistic professional photograph{ pos_d if pos_d == "" else ": " + pos_d }.'
|
| 1439 |
|
| 1440 |
-
|
| 1441 |
-
|
| 1442 |
-
|
| 1443 |
-
|
| 1444 |
-
|
| 1445 |
-
|
| 1446 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1447 |
prompt=pos,
|
| 1448 |
-
|
| 1449 |
height=h,
|
| 1450 |
width=w,
|
| 1451 |
output_type="pil",
|
|
@@ -1454,9 +1473,15 @@ def handle_generation(h,w,d):
|
|
| 1454 |
num_inference_steps=image_steps,
|
| 1455 |
max_sequence_length=seq,
|
| 1456 |
generator=torch.Generator(device).manual_seed(random.randint(0, MAX_SEED))
|
| 1457 |
-
|
| 1458 |
-
|
| 1459 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1460 |
# entry
|
| 1461 |
|
| 1462 |
if __name__ == "__main__":
|
|
|
|
| 50 |
)
|
| 51 |
from datetime import datetime
|
| 52 |
|
| 53 |
+
_HEIGHT_ = None
|
| 54 |
+
_WIDTH_ = None
|
| 55 |
+
|
| 56 |
working = False
|
| 57 |
|
| 58 |
model = T5ForConditionalGeneration.from_pretrained("t5-base")
|
|
|
|
| 673 |
# torch pipes
|
| 674 |
|
| 675 |
taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype).to(device)
|
| 676 |
+
#good_vae = AutoencoderKL.from_pretrained("ostris/Flex.1-alpha", subfolder="vae", torch_dtype=dtype).to(device)
|
| 677 |
image_pipe = DiffusionPipeline.from_pretrained("ostris/Flex.1-alpha", torch_dtype=dtype, vae=taef1).to(device)
|
| 678 |
+
#image_pipe.enable_model_cpu_offload()
|
| 679 |
|
| 680 |
torch.cuda.empty_cache()
|
| 681 |
|
| 682 |
+
#image_pipe.flux_pipe_call_that_returns_an_iterable_of_images = flux_pipe_call_that_returns_an_iterable_of_images.__get__(image_pipe)
|
| 683 |
|
| 684 |
# functionality
|
| 685 |
|
|
|
|
| 701 |
|
| 702 |
log(f'CALL upscaler')
|
| 703 |
|
| 704 |
+
if not working:
|
|
|
|
|
|
|
| 705 |
|
| 706 |
+
working = True
|
| 707 |
+
|
| 708 |
+
manual_seed(seed)
|
| 709 |
+
|
| 710 |
+
solver_type: type[Solver] = getattr(solvers, solver)
|
| 711 |
+
|
| 712 |
+
log(f'DBG upscaler 1')
|
| 713 |
+
|
| 714 |
+
enhanced_image = enhancer.upscale(
|
| 715 |
+
image=input_image,
|
| 716 |
+
prompt=prompt,
|
| 717 |
+
negative_prompt=negative_prompt,
|
| 718 |
+
upscale_factor=upscale_factor,
|
| 719 |
+
controlnet_scale=controlnet_scale,
|
| 720 |
+
controlnet_scale_decay=controlnet_decay,
|
| 721 |
+
condition_scale=condition_scale,
|
| 722 |
+
tile_size=(tile_height, tile_width),
|
| 723 |
+
denoise_strength=denoise_strength,
|
| 724 |
+
num_inference_steps=num_inference_steps,
|
| 725 |
+
loras_scale={"more_details": 0.5, "sdxl_render": 1.0},
|
| 726 |
+
solver_type=solver_type,
|
| 727 |
+
)
|
| 728 |
|
| 729 |
+
_HEIGHT_ = _HEIGHT_ * upscale_factor
|
| 730 |
+
_WIDTH_ = _WIDTH_ * upscale_factor
|
| 731 |
+
|
| 732 |
+
log(f'RET upscaler')
|
| 733 |
|
| 734 |
+
working = False
|
| 735 |
+
|
| 736 |
+
return enhanced_image
|
| 737 |
|
| 738 |
def get_tensor_length(tensor):
|
| 739 |
nums = list(tensor.size())
|
|
|
|
| 749 |
gen = model.generate(
|
| 750 |
toks,
|
| 751 |
length_penalty=0.5,
|
| 752 |
+
num_beams=8,
|
| 753 |
early_stopping=True,
|
| 754 |
max_length=512
|
| 755 |
)
|
|
|
|
| 787 |
characters = str(ascii_letters + digits)
|
| 788 |
return ''.join(random.choice(characters) for _ in range(length))
|
| 789 |
|
| 790 |
+
def add_song_cover_text(img,title,h,w):
|
| 791 |
|
| 792 |
draw = ImageDraw.Draw(img,mode="RGBA")
|
| 793 |
|
|
|
|
| 1404 |
|
| 1405 |
def translate(txt,to_lang="en",from_lang="auto"):
|
| 1406 |
log(f'CALL translate')
|
| 1407 |
+
if len(txt) == 0:
|
| 1408 |
+
print("Translated text is empty. Skipping translation...")
|
| 1409 |
+
return txt.strip().lower()
|
| 1410 |
+
if from_lang == to_lang or get_language(txt) == to_lang:
|
| 1411 |
+
print("Same languages. Skipping translation...")
|
| 1412 |
return txt.strip().lower()
|
| 1413 |
translator = GoogleTranslator(from_lang=from_lang,to_lang=to_lang)
|
| 1414 |
translation = ""
|
|
|
|
| 1435 |
|
| 1436 |
log(f'CALL handle_generate')
|
| 1437 |
|
| 1438 |
+
if not working:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1439 |
|
| 1440 |
+
working = True
|
| 1441 |
+
|
| 1442 |
+
d = re.sub(r",( ){1,}",". ",d)
|
| 1443 |
+
d_lines = re.split(r"([\n]){1,}", d)
|
| 1444 |
+
|
| 1445 |
+
for line_index in range(len(d_lines)):
|
| 1446 |
+
d_lines[line_index] = d_lines[line_index].strip()
|
| 1447 |
+
if re.sub(r'[\.]$', '', d_lines[line_index]) == d_lines[line_index]:
|
| 1448 |
+
d_lines[line_index] = d_lines[line_index].strip() + "."
|
| 1449 |
+
d = " ".join(d_lines)
|
| 1450 |
+
|
| 1451 |
+
pos_d = re.sub(r"([ \t]){1,}", " ", d).lower().strip()
|
| 1452 |
+
pos_d = pos_d if pos_d == "" else summarize(translate(pos_d))
|
| 1453 |
+
pos_d = re.sub(r"([ \t]){1,}", " ", pos_d).lower().strip()
|
| 1454 |
+
|
| 1455 |
+
neg = f"Textual, Text, Distorted, Fake, Discontinuous, Blurry, Doll-Like, Overly Plastic, Low Quality, Paint, Smoothed, Artificial, Phony, Gaudy, Digital Effects."
|
| 1456 |
+
q = "\""
|
| 1457 |
+
pos = f'HQ Hyper-realistic professional photograph{ pos_d if pos_d == "" else ": " + pos_d }.'
|
| 1458 |
+
|
| 1459 |
+
print(f"""
|
| 1460 |
+
Positive: {pos}
|
| 1461 |
+
|
| 1462 |
+
Negative: {neg}
|
| 1463 |
+
""")
|
| 1464 |
+
|
| 1465 |
+
img = image_pipe(
|
| 1466 |
prompt=pos,
|
| 1467 |
+
negative_prompt=neg,
|
| 1468 |
height=h,
|
| 1469 |
width=w,
|
| 1470 |
output_type="pil",
|
|
|
|
| 1473 |
num_inference_steps=image_steps,
|
| 1474 |
max_sequence_length=seq,
|
| 1475 |
generator=torch.Generator(device).manual_seed(random.randint(0, MAX_SEED))
|
| 1476 |
+
)
|
| 1477 |
+
|
| 1478 |
+
working = False
|
| 1479 |
+
|
| 1480 |
+
_HEIGHT_ = h
|
| 1481 |
+
_WIDTH_ = w
|
| 1482 |
+
|
| 1483 |
+
return img
|
| 1484 |
+
|
| 1485 |
# entry
|
| 1486 |
|
| 1487 |
if __name__ == "__main__":
|