daviondk commited on
Commit
61da5bb
·
1 Parent(s): 80c68d0

add models

Browse files
Files changed (1) hide show
  1. app.py +35 -70
app.py CHANGED
@@ -6,6 +6,7 @@ from datetime import datetime
6
  import gradio as gr
7
  import torch
8
  from transformers import pipeline, TextIteratorStreamer
 
9
  import spaces # Import spaces early to enable ZeroGPU support
10
 
11
  from transformers import (
@@ -24,87 +25,45 @@ cancel_event = threading.Event()
24
  # Qwen3 Model Definitions
25
  # ------------------------------
26
  MODELS = {
27
- "T-lite": {"repo_id": "daviondk7131/bodrunov-t-lite-lora-16", "description": "T-lite wite LoRA style adapter"}
 
 
 
 
 
 
28
  }
29
-
30
- # Global cache for pipelines to avoid re-loading.
31
- PIPELINES = {}
32
-
33
- def load_pipeline(model_name):
34
- """
35
- Load and cache a transformers pipeline for text generation.
36
- Tries bfloat16, falls back to float16 or float32 if unsupported.
37
- """
38
-
39
- load_kwargs = {
40
- "pretrained_model_name_or_path": "daviondk7131/bodrunov-t-lite-lora-16",
41
- "device_map": "auto",
42
- "torch_dtype": torch.float16,
43
- "trust_remote_code": True
44
- }
45
-
46
- tokenizer = AutoTokenizer.from_pretrained("t-tech/T-lite-it-1.0")
47
- model = AutoModelForCausalLM.from_pretrained(**load_kwargs).to("cuda")
48
-
49
-
50
- global PIPELINES
51
- if model_name in PIPELINES:
52
- return PIPELINES[model_name]
53
- repo = MODELS[model_name]["repo_id"]
54
- for dtype in (torch.bfloat16, torch.float16, torch.float32):
55
- try:
56
- pipe = pipeline(
57
- task="text-generation",
58
- model=repo,
59
- tokenizer=repo,
60
- trust_remote_code=True,
61
- torch_dtype=dtype,
62
- device_map="auto"
63
- )
64
- PIPELINES[model_name] = pipe
65
- return pipe
66
- except Exception:
67
- continue
68
- # Final fallback
69
- pipe = pipeline(
70
- task="text-generation",
71
- model=repo,
72
- tokenizer=repo,
73
- trust_remote_code=True,
74
- device_map="auto"
75
- )
76
- PIPELINES[model_name] = pipe
77
- return pipe
78
-
79
- def format_conversation(history, system_prompt):
80
- """
81
- Flatten chat history and system prompt into a single string.
82
- """
83
- prompt = system_prompt.strip() + "\n"
84
-
85
- for user_msg, assistant_msg in history:
86
- prompt += "User: " + user_msg.strip() + "\n"
87
- if assistant_msg: # might be None or empty
88
- prompt += "Assistant: " + assistant_msg.strip() + "\n"
89
-
90
- prompt += "Assistant: "
91
- return prompt
92
-
93
  # Function to get just the model name from the dropdown selection
94
  def get_model_name(full_selection):
95
  return full_selection.split(" - ")[0]
96
 
 
97
  # User input handling function
98
  def user_input(user_message, history):
99
  return "", history + [(user_message, None)]
100
 
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
 
104
  STYLE_TEMPLATE_PROMPT = """Below is an instruction describing the task, combined with input data that provides further context. Write a response that completes the request accordingly.
105
 
106
  ### Instruction:
107
- Write down the text from the input data in the style of the author bodrunov.
108
 
109
  ### Input data:
110
  {}
@@ -160,7 +119,7 @@ def bot_response(history, model_selection, max_tokens, temperature, top_k, top_p
160
  #history_without_last = history[:-1]
161
 
162
  # Get model name from selection
163
- #model_name = get_model_name(model_selection)
164
 
165
  # Format the conversation
166
  #conversation = format_conversation(history_without_last, system_prompt)
@@ -173,17 +132,23 @@ def bot_response(history, model_selection, max_tokens, temperature, top_k, top_p
173
  """
174
 
175
  load_kwargs = {
176
- "pretrained_model_name_or_path": "daviondk7131/bodrunov-t-lite-lora-16",
177
  "device_map": "auto",
178
  "torch_dtype": torch.float16,
179
  "trust_remote_code": True
180
  }
181
 
182
- tokenizer = AutoTokenizer.from_pretrained("t-tech/T-lite-it-1.0")
183
  model = AutoModelForCausalLM.from_pretrained(**load_kwargs).to("cuda")
 
184
  #pipe = load_pipeline(model_name)
185
  user_message = history[-1][0]
186
- response = generate(model, tokenizer, user_message, temperature=temperature, top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty)
 
 
 
 
 
187
 
188
  # Update the last message pair with the response
189
  history[-1] = (user_message, response)
 
6
  import gradio as gr
7
  import torch
8
  from transformers import pipeline, TextIteratorStreamer
9
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
10
  import spaces # Import spaces early to enable ZeroGPU support
11
 
12
  from transformers import (
 
25
  # Qwen3 Model Definitions
26
  # ------------------------------
27
  MODELS = {
28
+ "bodrunov-t-lite-lora-16": {"repo_id": "daviondk7131/bodrunov-t-lite-lora-16", "description": "С. Д. Бодрунов (T-lite)", "reward_repo_id": "daviondk7131/bodrunov-reward-model", "author": "bodrunov", "base_model": "t-tech/T-lite-it-1.0"},
29
+ "shakespeare-deepseek-lora-16": {"repo_id": "daviondk7131/shakespeare-deepseek-lora-16", "description": "У. Шекспир (Deepseek)", "reward_repo_id": "daviondk7131/shakespeare-reward-model", "author": "Shakespeare", "base_model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"},
30
+ "chekhov-t-lite-lora-16": {"repo_id": "daviondk7131/chekhov-t-lite-lora-16", "description": "А. П. Чехов (T-lite)", "reward_repo_id": "daviondk7131/chekhov-reward-model", "author": "chekhov_ru", "base_model": "t-tech/T-lite-it-1.0"},
31
+ "tolstoy-t-lite-lora-16": {"repo_id": "daviondk7131/tolstoy-t-lite-lora-16", "description": "Л. Н. Толстой (T-lite)", "reward_repo_id": "daviondk7131/tolstoy-reward-model", "author": "tolstoy_ru", "base_model": "t-tech/T-lite-it-1.0"},
32
+ "dostoevsky-t-lite-lora-16": {"repo_id": "daviondk7131/dostoevsky-t-lite-lora-16", "description": "Ф. М. Достоевский (T-lite)", "reward_repo_id": "daviondk7131/dostoevsky-reward-model", "author": "dostoevsky_ru", "base_model": "t-tech/T-lite-it-1.0"},
33
+ "dostoevsky-yagpt-lora-16": {"repo_id": "daviondk7131/dostoevsky-yagpt-lora-16", "description": "Ф. М. Достоевский (YaGPT)", "reward_repo_id": "daviondk7131/dostoevsky-reward-model", "author": "dostoevsky_ru", "base_model": "yandex/YandexGPT-5-Lite-8B-instruct"},
34
+ "tolstoy-yagpt-lora-16": {"repo_id": "daviondk7131/tolstoy-yagpt-lora-16", "description": "Л. Н. Толстой (YaGPT)", "reward_repo_id": "daviondk7131/tolstoy-reward-model", "author": "tolstoy_ru", "base_model": "yandex/YandexGPT-5-Lite-8B-instruct"},
35
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  # Function to get just the model name from the dropdown selection
37
  def get_model_name(full_selection):
38
  return full_selection.split(" - ")[0]
39
 
40
+
41
  # User input handling function
42
  def user_input(user_message, history):
43
  return "", history + [(user_message, None)]
44
 
45
 
46
+ class RewardModel(object):
47
+ def __init__(self, model_name):
48
+ self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
49
+
50
+ self.reward_model = AutoModelForSequenceClassification.from_pretrained(model_name, device_map=self.device).to('cuda')
51
+ self.reward_tokenizer = AutoTokenizer.from_pretrained(model_name)
52
+
53
+
54
+ def score(self, text):
55
+ inputs = self.reward_tokenizer(text, truncation=True, return_tensors='pt').to(self.device)
56
+ with torch.no_grad():
57
+ value = self.reward_model(**inputs).logits[0, 0].item()
58
+
59
+ return value
60
+
61
 
62
 
63
  STYLE_TEMPLATE_PROMPT = """Below is an instruction describing the task, combined with input data that provides further context. Write a response that completes the request accordingly.
64
 
65
  ### Instruction:
66
+ Write down the text from the input data in the style of the author {}.
67
 
68
  ### Input data:
69
  {}
 
119
  #history_without_last = history[:-1]
120
 
121
  # Get model name from selection
122
+ model_name = get_model_name(model_selection)
123
 
124
  # Format the conversation
125
  #conversation = format_conversation(history_without_last, system_prompt)
 
132
  """
133
 
134
  load_kwargs = {
135
+ "pretrained_model_name_or_path": MODELS[model_name]["repo_id"],
136
  "device_map": "auto",
137
  "torch_dtype": torch.float16,
138
  "trust_remote_code": True
139
  }
140
 
141
+ tokenizer = AutoTokenizer.from_pretrained(MODELS[model_name]["base_model"])
142
  model = AutoModelForCausalLM.from_pretrained(**load_kwargs).to("cuda")
143
+ reward_model = RewardModel(model_name=MODELS[model_name][["reward_repo_id"]])
144
  #pipe = load_pipeline(model_name)
145
  user_message = history[-1][0]
146
+
147
+ results = []
148
+ for i in range(3):
149
+ results.append(generate(model, tokenizer, user_message, temperature=temperature, top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty))
150
+
151
+ response = max(results, key=reward_model.score)
152
 
153
  # Update the last message pair with the response
154
  history[-1] = (user_message, response)