style-transfer-test-space

Running

App Files Files Community

daviondk commited on Jun 12

Commit

61da5bb

1 Parent(s): 80c68d0

add models

Browse files

Files changed (1) hide show

app.py +35 -70

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ from datetime import datetime
 import gradio as gr
 import torch
 from transformers import pipeline, TextIteratorStreamer
 import spaces  # Import spaces early to enable ZeroGPU support
 from transformers import (
@@ -24,87 +25,45 @@ cancel_event = threading.Event()
 # Qwen3 Model Definitions
 # ------------------------------
 MODELS = {
-    "T-lite": {"repo_id": "daviondk7131/bodrunov-t-lite-lora-16", "description": "T-lite wite LoRA style adapter"}
 }
-# Global cache for pipelines to avoid re-loading.
-PIPELINES = {}
-def load_pipeline(model_name):
-    """
-    Load and cache a transformers pipeline for text generation.
-    Tries bfloat16, falls back to float16 or float32 if unsupported.
-    """
-    load_kwargs = {
-        "pretrained_model_name_or_path": "daviondk7131/bodrunov-t-lite-lora-16",
-        "device_map": "auto",
-        "torch_dtype": torch.float16,
-        "trust_remote_code": True
-    }
-    tokenizer = AutoTokenizer.from_pretrained("t-tech/T-lite-it-1.0")
-    model = AutoModelForCausalLM.from_pretrained(**load_kwargs).to("cuda")
-    global PIPELINES
-    if model_name in PIPELINES:
-        return PIPELINES[model_name]
-    repo = MODELS[model_name]["repo_id"]
-    for dtype in (torch.bfloat16, torch.float16, torch.float32):
-        try:
-            pipe = pipeline(
-                task="text-generation",
-                model=repo,
-                tokenizer=repo,
-                trust_remote_code=True,
-                torch_dtype=dtype,
-                device_map="auto"
-            )
-            PIPELINES[model_name] = pipe
-            return pipe
-        except Exception:
-            continue
-    # Final fallback
-    pipe = pipeline(
-        task="text-generation",
-        model=repo,
-        tokenizer=repo,
-        trust_remote_code=True,
-        device_map="auto"
-    )
-    PIPELINES[model_name] = pipe
-    return pipe
-def format_conversation(history, system_prompt):
-    """
-    Flatten chat history and system prompt into a single string.
-    """
-    prompt = system_prompt.strip() + "\n"
-    for user_msg, assistant_msg in history:
-        prompt += "User: " + user_msg.strip() + "\n"
-        if assistant_msg:  # might be None or empty
-            prompt += "Assistant: " + assistant_msg.strip() + "\n"
-    prompt += "Assistant: "
-    return prompt
 # Function to get just the model name from the dropdown selection
 def get_model_name(full_selection):
     return full_selection.split(" - ")[0]
 # User input handling function
 def user_input(user_message, history):
     return "", history + [(user_message, None)]
 STYLE_TEMPLATE_PROMPT = """Below is an instruction describing the task, combined with input data that provides further context. Write a response that completes the request accordingly.
 ### Instruction:
-Write down the text from the input data in the style of the author bodrunov.
 ### Input data:
 {}
@@ -160,7 +119,7 @@ def bot_response(history, model_selection, max_tokens, temperature, top_k, top_p
     #history_without_last = history[:-1]
     # Get model name from selection
-    #model_name = get_model_name(model_selection)
     # Format the conversation
     #conversation = format_conversation(history_without_last, system_prompt)
@@ -173,17 +132,23 @@ def bot_response(history, model_selection, max_tokens, temperature, top_k, top_p
         """
         load_kwargs = {
-            "pretrained_model_name_or_path": "daviondk7131/bodrunov-t-lite-lora-16",
             "device_map": "auto",
             "torch_dtype": torch.float16,
             "trust_remote_code": True
         }
-        tokenizer = AutoTokenizer.from_pretrained("t-tech/T-lite-it-1.0")
         model = AutoModelForCausalLM.from_pretrained(**load_kwargs).to("cuda")
         #pipe = load_pipeline(model_name)
         user_message = history[-1][0]
-        response = generate(model, tokenizer, user_message, temperature=temperature, top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty)
         # Update the last message pair with the response
         history[-1] = (user_message, response)

 import gradio as gr
 import torch
 from transformers import pipeline, TextIteratorStreamer
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
 import spaces  # Import spaces early to enable ZeroGPU support
 from transformers import (
 # Qwen3 Model Definitions
 # ------------------------------
 MODELS = {
+    "bodrunov-t-lite-lora-16": {"repo_id": "daviondk7131/bodrunov-t-lite-lora-16", "description": "С. Д. Бодрунов (T-lite)", "reward_repo_id": "daviondk7131/bodrunov-reward-model", "author": "bodrunov", "base_model": "t-tech/T-lite-it-1.0"},
+    "shakespeare-deepseek-lora-16": {"repo_id": "daviondk7131/shakespeare-deepseek-lora-16", "description": "У. Шекспир (Deepseek)", "reward_repo_id": "daviondk7131/shakespeare-reward-model", "author": "Shakespeare", "base_model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"},
+    "chekhov-t-lite-lora-16": {"repo_id": "daviondk7131/chekhov-t-lite-lora-16", "description": "А. П. Чехов (T-lite)", "reward_repo_id": "daviondk7131/chekhov-reward-model", "author": "chekhov_ru", "base_model": "t-tech/T-lite-it-1.0"},
+    "tolstoy-t-lite-lora-16": {"repo_id": "daviondk7131/tolstoy-t-lite-lora-16", "description": "Л. Н. Толстой (T-lite)", "reward_repo_id": "daviondk7131/tolstoy-reward-model", "author": "tolstoy_ru", "base_model": "t-tech/T-lite-it-1.0"},
+    "dostoevsky-t-lite-lora-16": {"repo_id": "daviondk7131/dostoevsky-t-lite-lora-16", "description": "Ф. М. Достоевский (T-lite)", "reward_repo_id": "daviondk7131/dostoevsky-reward-model", "author": "dostoevsky_ru", "base_model": "t-tech/T-lite-it-1.0"},
+    "dostoevsky-yagpt-lora-16": {"repo_id": "daviondk7131/dostoevsky-yagpt-lora-16", "description": "Ф. М. Достоевский (YaGPT)", "reward_repo_id": "daviondk7131/dostoevsky-reward-model", "author": "dostoevsky_ru", "base_model": "yandex/YandexGPT-5-Lite-8B-instruct"},
+    "tolstoy-yagpt-lora-16": {"repo_id": "daviondk7131/tolstoy-yagpt-lora-16", "description": "Л. Н. Толстой (YaGPT)", "reward_repo_id": "daviondk7131/tolstoy-reward-model", "author": "tolstoy_ru", "base_model": "yandex/YandexGPT-5-Lite-8B-instruct"},
 }
 # Function to get just the model name from the dropdown selection
 def get_model_name(full_selection):
     return full_selection.split(" - ")[0]
 # User input handling function
 def user_input(user_message, history):
     return "", history + [(user_message, None)]
+class RewardModel(object):
+    def __init__(self, model_name):
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self.reward_model = AutoModelForSequenceClassification.from_pretrained(model_name, device_map=self.device).to('cuda')
+        self.reward_tokenizer = AutoTokenizer.from_pretrained(model_name)
+    def score(self, text):
+        inputs = self.reward_tokenizer(text, truncation=True, return_tensors='pt').to(self.device)
+        with torch.no_grad():
+            value = self.reward_model(**inputs).logits[0, 0].item()
+        return value
 STYLE_TEMPLATE_PROMPT = """Below is an instruction describing the task, combined with input data that provides further context. Write a response that completes the request accordingly.
 ### Instruction:
+Write down the text from the input data in the style of the author {}.
 ### Input data:
 {}
     #history_without_last = history[:-1]
     # Get model name from selection
+    model_name = get_model_name(model_selection)
     # Format the conversation
     #conversation = format_conversation(history_without_last, system_prompt)
         """
         load_kwargs = {
+            "pretrained_model_name_or_path": MODELS[model_name]["repo_id"],
             "device_map": "auto",
             "torch_dtype": torch.float16,
             "trust_remote_code": True
         }
+        tokenizer = AutoTokenizer.from_pretrained(MODELS[model_name]["base_model"])
         model = AutoModelForCausalLM.from_pretrained(**load_kwargs).to("cuda")
+        reward_model = RewardModel(model_name=MODELS[model_name][["reward_repo_id"]])
         #pipe = load_pipeline(model_name)
         user_message = history[-1][0]
+        results = []
+        for i in range(3):
+            results.append(generate(model, tokenizer, user_message, temperature=temperature, top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty))
+        response = max(results, key=reward_model.score)
         # Update the last message pair with the response
         history[-1] = (user_message, response)