Controlled_Chat_CPU

Sleeping

App Files Files Community

abrakjamson commited on Oct 6, 2024

Commit

f7a1bd4

1 Parent(s): 5f7f1fd

Implementing streaming chat, moving models, generalizing preset buttons

Browse files

Files changed (14) hide show

anger.gguf +0 -0
app.py +147 -84
control_models/Angry.gguf +0 -0
control_models/Confident.gguf +0 -0
control_models/Conspiracies.gguf +0 -0
creative.gguf → control_models/Creative.gguf +0 -0
control_models/Empathatic.gguf +0 -0
control_models/Joking.gguf +0 -0
lazy.gguf → control_models/Lazy.gguf +0 -0
control_models/Optimistic.gguf +0 -0
right-leaning.gguf → control_models/Right-leaning.gguf +0 -0
control_models/Tripping.gguf +0 -0
tripping.gguf +0 -0
truthful.gguf +0 -0

anger.gguf DELETED Viewed

Binary file (509 kB)

app.py CHANGED Viewed

@@ -25,6 +25,9 @@ model = AutoModelForCausalLM.from_pretrained(
     use_safetensors=True
 )
 model = model.to("cuda:0" if torch.cuda.is_available() else "cpu")
 model = ControlModel(model, list(range(-5, -18, -1)))
 # Generation settings
@@ -39,17 +42,50 @@ default_generation_settings = {
 user_tag, asst_tag = "[INST]", "[/INST]"
 # List available control vectors
-control_vector_files = [f for f in os.listdir('.') if f.endswith('.gguf')]
 if not control_vector_files:
-    raise FileNotFoundError("No .gguf control vector files found in the current directory.")
 # Function to toggle slider visibility based on checkbox state
 def toggle_slider(checked):
     return gr.update(visible=checked)
-# Function to generate the model's response
 def generate_response(system_prompt, user_message, history, max_new_tokens, repitition_penalty, do_sample, *args):
     # Separate checkboxes and sliders based on type
     # The first x in args are the checkbox names (the file names)
@@ -69,7 +105,7 @@ def generate_response(system_prompt, user_message, history, max_new_tokens, repi
             weight = sliders[i]
             try:
                 # Set the control vector's weight (and sign) by multiplying by its slider value
-                control_vectors.append(ControlVector.import_gguf(cv_file) * weight)
                 assistant_message_title += f"{cv_file.split('.')[0]}: {weight};"
             except Exception as e:
                 print(f"Failed to set control vector {cv_file}: {e}")
@@ -83,32 +119,14 @@ def generate_response(system_prompt, user_message, history, max_new_tokens, repi
         else:
             combined_vector += control_vectors[i]
-    if combined_vector is not None:
-        model.set_control(combined_vector)
-    formatted_prompt = ""
-    # <s>[INST] user message[/INST] assistant message</s>[INST] new user message[/INST]
-    # Mistral expects the history to be wrapped in <s>history</s>
-    if len(history) > 0:
-        formatted_prompt += "<s>"
-    # Append the system prompt if provided
-    if system_prompt.strip():
-        formatted_prompt += f"{user_tag} {system_prompt}{asst_tag} "
-    # Construct the formatted prompt based on history
-    #TODO move back to ChatMessage type instead of Tuple, because the message title gets into the history
-    if len(history) > 0:
-        for turn in history:
-            user_msg, asst_msg = turn
-            formatted_prompt += f"{user_tag} {user_msg} {asst_tag} {asst_msg}"
-    if len(history) > 0:
-        formatted_prompt += "</s>"
-    # Append the new user message
-    formatted_prompt += f"{user_tag} {user_message} {asst_tag}"
     # Tokenize the input
     input_ids = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
@@ -120,40 +138,81 @@ def generate_response(system_prompt, user_message, history, max_new_tokens, repi
         "repetition_penalty": repetition_penalty.value,
     }
-    # Generate the response
-    output_ids = model.generate(**input_ids, **generation_settings)
-    response = tokenizer.decode(output_ids.squeeze(), skip_special_tokens=False)
     def get_assistant_response(input_string):
-        # Use regex to find the text between the final [/INST] tag and </s>
-        pattern = r'\[/INST\](?!.*\[/INST\])\s*(.*?)(?:</s>|$)'
-        match = re.search(pattern, input_string, re.DOTALL)
-        if match:
-            return match.group(1).strip()
-        return None
-    assistant_response = get_assistant_response(response)
     # Update conversation history
-    assistant_response = get_assistant_response(response)
     assistant_response_display = f"*{assistant_message_title}*\n\n{assistant_response}"
     # Update conversation history
     history.append((user_message, assistant_response_display))
-    return history
 def generate_response_with_retry(system_prompt, user_message, history, max_new_tokens, repitition_penalty, do_sample, *args):
     # Remove last user input and assistant response from history, then call generate_response()
     if history:
         history = history[0:-1]
-    return generate_response(system_prompt, user_message, history, max_new_tokens, repitition_penalty, do_sample, *args)
 # Function to reset the conversation history
 def reset_chat():
     # returns a blank state
-    return [], []
-# I'm not a good enough coder with Python and Gradio to figure out how to generalize this. PRs accepted!
 def set_preset_helpful(*args):
     # gets the list of all checkboxes and sliders
     # sets checkboxes and sliders accordingly to this persona
@@ -162,18 +221,20 @@ def set_preset_helpful(*args):
     count_checkboxes = int(len(args)/2)
     new_checkbox_values = []
     new_slider_values = []
-    for i in range(count_checkboxes):
-        if i == 4:
             new_checkbox_values.append(True)
-            # set slider value (sliders are after the checkboxes)
             new_slider_values.append(1.0)
-        elif i == 7:
             new_checkbox_values.append(True)
-            # set slider value (sliders are after the checkboxes)
             new_slider_values.append(1.0)
         else:
             new_checkbox_values.append(False)
             new_slider_values.append(0.0)
     return new_checkbox_values + new_slider_values
 def set_preset_conspiracist(*args):
@@ -181,29 +242,29 @@ def set_preset_conspiracist(*args):
     # sets checkboxes and sliders accordingly to this persona
     # args is a list of checkboxes and then slider values
     # must return the updated list of checkboxes and sliders
-    count_checkboxes = int(len(args)/2)
     new_checkbox_values = []
     new_slider_values = []
-    for i in range(count_checkboxes):
-        if i == 2:
             new_checkbox_values.append(True)
-            # set slider value (sliders are after the checkboxes)
             new_slider_values.append(1.5)
-        elif i == 3:
             new_checkbox_values.append(True)
-            # set slider value (sliders are after the checkboxes)
             new_slider_values.append(1.0)
-        elif i == 6:
             new_checkbox_values.append(True)
-            # set slider value (sliders are after the checkboxes)
             new_slider_values.append(-0.5)
-        elif i == 10:
             new_checkbox_values.append(True)
-            # set slider value (sliders are after the checkboxes)
             new_slider_values.append(-1.0)
         else:
             new_checkbox_values.append(False)
             new_slider_values.append(0.0)
     return new_checkbox_values + new_slider_values
 def set_preset_stoner(*args):
@@ -211,25 +272,25 @@ def set_preset_stoner(*args):
     # sets checkboxes and sliders accordingly to this persona
     # args is a list of checkboxes and then slider values
     # must return the updated list of checkboxes and sliders
-    count_checkboxes = int(len(args)/2)
     new_checkbox_values = []
     new_slider_values = []
-    for i in range(count_checkboxes):
-        if i == 0:
             new_checkbox_values.append(True)
-            # set slider value (sliders are after the checkboxes)
             new_slider_values.append(0.5)
-        elif i == 8:
             new_checkbox_values.append(True)
-            # set slider value (sliders are after the checkboxes)
             new_slider_values.append(-0.5)
-        elif i == 9:
             new_checkbox_values.append(True)
-            # set slider value (sliders are after the checkboxes)
             new_slider_values.append(0.6)
         else:
             new_checkbox_values.append(False)
             new_slider_values.append(0.0)
     return new_checkbox_values + new_slider_values
 def set_preset_facts(*args):
@@ -237,29 +298,28 @@ def set_preset_facts(*args):
     # sets checkboxes and sliders accordingly to this persona
     # args is a list of checkboxes and then slider values
     # must return the updated list of checkboxes and sliders
-    count_checkboxes = int(len(args)/2)
     new_checkbox_values = []
     new_slider_values = []
-    for i in range(count_checkboxes):
-        if i == 1:
             new_checkbox_values.append(True)
-            # set slider value (sliders are after the checkboxes)
             new_slider_values.append(0.5)
-        elif i == 5:
             new_checkbox_values.append(True)
-            # set slider value (sliders are after the checkboxes)
             new_slider_values.append(-0.5)
-        elif i == 6:
             new_checkbox_values.append(True)
-            # set slider value (sliders are after the checkboxes)
             new_slider_values.append(-0.5)
-        elif i == 10:
             new_checkbox_values.append(True)
-            # set slider value (sliders are after the checkboxes)
             new_slider_values.append(0.5)
         else:
             new_checkbox_values.append(False)
             new_slider_values.append(0.0)
     return new_checkbox_values + new_slider_values
 tooltip_css = """
@@ -315,6 +375,7 @@ with gr.Blocks(
     theme=dark_theme,
     css=tooltip_css,
     ) as app:
     # Header
     gr.Markdown("# 🧠 LLM Brain Control")
@@ -326,7 +387,7 @@ with gr.Blocks(
             gr.Markdown("### ⚡ Control Vectors")
             control_vector_label = gr.HTML("""
                 <div class="tooltip">
-                    <span>Select how you want to control the LLM - towards (+) or away (-). Or start with a preset:</span>
                     <span class="tooltiptext">+/- 1.0 is a good start. Check the examples for each vector.</span>
                 </div>
             """)
@@ -334,7 +395,7 @@ with gr.Blocks(
             with gr.Row():
                 button_helpful = gr.Button(
-                    value="Kind and helpful"
                 )
                 button_facts = gr.Button(
                     value="Just the facts"
@@ -353,7 +414,7 @@ with gr.Blocks(
             for cv_file in control_vector_files:
                 with gr.Row():
                     # Checkbox to select the control vector
-                    checkbox = gr.Checkbox(label=cv_file, value=False)
                     control_checks.append(checkbox)
                     # Slider to adjust the control vector's weight
@@ -362,7 +423,7 @@ with gr.Blocks(
                         maximum=2.5,
                         value=0.0,
                         step=0.1,
-                        label=f"{cv_file} Voltage",
                         visible=False
                     )
                     control_sliders.append(slider)
@@ -390,7 +451,7 @@ with gr.Blocks(
                         max_tokens_label = gr.HTML("""
                             <div class="tooltip">
                                 <span>Max Response Length (in tokens)</span>
-                                <span class="tooltiptext">192 allows for short answers and is faster.</span>
                             </div>
                         """)
                         max_new_tokens = gr.Number(
@@ -433,7 +494,9 @@ with gr.Blocks(
             gr.Markdown("### 🗨️ Conversation")
             # Chatbot to display conversation
-            chatbot = gr.Chatbot()
             # User Message Input with tooltip
             #with gr.Row():

     use_safetensors=True
 )
 model = model.to("cuda:0" if torch.cuda.is_available() else "cpu")
+print(f"Is CUDA available: {torch.cuda.is_available()}")
+print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
 model = ControlModel(model, list(range(-5, -18, -1)))
 # Generation settings
 user_tag, asst_tag = "[INST]", "[/INST]"
 # List available control vectors
+control_vector_files = [f for f in os.listdir('control_models') if f.endswith('.gguf')]
 if not control_vector_files:
+    raise FileNotFoundError("No .gguf control vector files found in the control_models directory.")
 # Function to toggle slider visibility based on checkbox state
 def toggle_slider(checked):
     return gr.update(visible=checked)
+def construct_prompt(history, system_prompt, user_message):
+    """
+    Converts the history (list of tuples) back into the string format Mistral expects
+    """
+    formatted_prompt = ""
+    # <s>[INST] user message[/INST] assistant message</s>[INST] new user message[/INST]
+    # Mistral expects the history to be wrapped in <s>history</s>, so it's added here
+    if len(history) > 0:
+        formatted_prompt += "<s>"
+    # Append the system prompt if provided
+    if system_prompt.strip():
+        formatted_prompt += f"{user_tag} {system_prompt}{asst_tag} "
+    # Construct the formatted prompt based on history
+    if len(history) > 0:
+        for turn in history:
+            user_msg, asst_msg = turn
+            asst_msg = asst_msg.split("\n")[1:]
+            formatted_prompt += f"{user_tag} {user_msg} {asst_tag} {asst_msg}"
+    if len(history) > 0:
+        formatted_prompt += "</s>"
+    # Append the new user message
+    formatted_prompt += f"{user_tag} {user_message} {asst_tag}"
+    return formatted_prompt
 def generate_response(system_prompt, user_message, history, max_new_tokens, repitition_penalty, do_sample, *args):
+    """
+    Applies the control vectors and calls the language model.
+    Returns a list of tuples, the user message and the assistant response,
+        which Gradio uses to update the chatbot history
+    """
     # Separate checkboxes and sliders based on type
     # The first x in args are the checkbox names (the file names)
             weight = sliders[i]
             try:
                 # Set the control vector's weight (and sign) by multiplying by its slider value
+                control_vectors.append(ControlVector.import_gguf(f"control_models/{cv_file}") * weight)
                 assistant_message_title += f"{cv_file.split('.')[0]}: {weight};"
             except Exception as e:
                 print(f"Failed to set control vector {cv_file}: {e}")
         else:
             combined_vector += control_vectors[i]
+    # Set the combined set of vectors as the control for the model
+    try:
+        if combined_vector is not None:
+            model.set_control(combined_vector)
+    except Exception as e:
+        print(f"Failed to set Control: {e}")
+    formatted_prompt = construct_prompt(history, system_prompt, user_message)
     # Tokenize the input
     input_ids = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
         "repetition_penalty": repetition_penalty.value,
     }
+    _streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=False,)
+    generate_kwargs = dict(
+        input_ids,
+        streamer=_streamer,
+        pad_token_id= tokenizer.eos_token_id,
+        do_sample= do_sample,
+        max_new_tokens= int(max_new_tokens),
+        repetition_penalty= repetition_penalty.value,
+    )
+    t = threading.Thread(target=model.generate, kwargs=generate_kwargs)
+    t.start()
+    # Display the response as it streams in, prepending the control vector info
+    partial_message = ""
+    for new_token in _streamer:
+        if new_token != '<' and new_token != '</s>': # seems to hit EOS correctly without this needed
+            partial_message += new_token
+            partial_with_title = "*" + assistant_message_title + "*" + "\n\n" + partial_message
+            temp_history = history + [(user_message, partial_with_title)]
+            yield temp_history
+        else:
+            _streamer.end()
+    # remove the trailing </s> if present
+    # it won't be present if the model ran out from max_tokens
     def get_assistant_response(input_string):
+        if len(input_string) >= 4:
+            if input_string[-4:] == "</s>":
+                return input_string[:-4]
+            else:
+                return input_string
+        else:
+            return input_string
     # Update conversation history
+    assistant_response = get_assistant_response(partial_message)
     assistant_response_display = f"*{assistant_message_title}*\n\n{assistant_response}"
     # Update conversation history
     history.append((user_message, assistant_response_display))
+    yield history
 def generate_response_with_retry(system_prompt, user_message, history, max_new_tokens, repitition_penalty, do_sample, *args):
     # Remove last user input and assistant response from history, then call generate_response()
     if history:
         history = history[0:-1]
+    for output in generate_response(system_prompt, user_message, history, max_new_tokens, repetition_penalty, do_sample, *args):
+        yield output
 # Function to reset the conversation history
 def reset_chat():
     # returns a blank state
+    return [], ""
+def get_checkboxes():
+    # rebuilding the list of checkboxes, so that these presets don't have to change
+    # when adding a new control model
+    checkbox_column = app.children[2].children[0].children
+    model_names_and_indexes = {}
+    checkbox_index = 0
+    for i in range(len(checkbox_column)):
+        if isinstance(checkbox_column[i], gr.Row):
+            try:
+                model_name = checkbox_column[i].children[0].children[0].label
+                model_names_and_indexes[model_name] = checkbox_index
+                checkbox_index += 1
+            except IndexError:
+                # allow for other rows to be in the interface
+                pass
+            except AttributeError:
+                pass
+    return model_names_and_indexes
 def set_preset_helpful(*args):
     # gets the list of all checkboxes and sliders
     # sets checkboxes and sliders accordingly to this persona
     count_checkboxes = int(len(args)/2)
     new_checkbox_values = []
     new_slider_values = []
+    model_names_and_indexes = get_checkboxes()
+    for check in model_names_and_indexes:
+        if check == "Empathatic":
             new_checkbox_values.append(True)
             new_slider_values.append(1.0)
+        elif check == "Optimistic":
             new_checkbox_values.append(True)
             new_slider_values.append(1.0)
         else:
             new_checkbox_values.append(False)
             new_slider_values.append(0.0)
     return new_checkbox_values + new_slider_values
 def set_preset_conspiracist(*args):
     # sets checkboxes and sliders accordingly to this persona
     # args is a list of checkboxes and then slider values
     # must return the updated list of checkboxes and sliders
     new_checkbox_values = []
     new_slider_values = []
+    model_names_and_indexes = get_checkboxes()
+    for check in model_names_and_indexes:
+        if check == "Conspiracies":
             new_checkbox_values.append(True)
             new_slider_values.append(1.5)
+        elif check == "Creative":
             new_checkbox_values.append(True)
             new_slider_values.append(1.0)
+        elif check == "Lazy":
             new_checkbox_values.append(True)
             new_slider_values.append(-0.5)
+        elif check == "Truthful":
             new_checkbox_values.append(True)
             new_slider_values.append(-1.0)
         else:
             new_checkbox_values.append(False)
             new_slider_values.append(0.0)
     return new_checkbox_values + new_slider_values
 def set_preset_stoner(*args):
     # sets checkboxes and sliders accordingly to this persona
     # args is a list of checkboxes and then slider values
     # must return the updated list of checkboxes and sliders
     new_checkbox_values = []
     new_slider_values = []
+    model_names_and_indexes = get_checkboxes()
+    for check in model_names_and_indexes:
+        if check == "Angry":
             new_checkbox_values.append(True)
             new_slider_values.append(0.5)
+        elif check == "Right-leaning":
             new_checkbox_values.append(True)
             new_slider_values.append(-0.5)
+        elif check == "Tripping":
             new_checkbox_values.append(True)
             new_slider_values.append(0.6)
         else:
             new_checkbox_values.append(False)
             new_slider_values.append(0.0)
     return new_checkbox_values + new_slider_values
 def set_preset_facts(*args):
     # sets checkboxes and sliders accordingly to this persona
     # args is a list of checkboxes and then slider values
     # must return the updated list of checkboxes and sliders
     new_checkbox_values = []
     new_slider_values = []
+    model_names_and_indexes = get_checkboxes()
+    for check in model_names_and_indexes:
+        if check == "Confident":
             new_checkbox_values.append(True)
             new_slider_values.append(0.5)
+        elif check == "Joking":
             new_checkbox_values.append(True)
             new_slider_values.append(-0.5)
+        elif check == "Lazy":
             new_checkbox_values.append(True)
             new_slider_values.append(-0.5)
+        elif check == "Truthful":
             new_checkbox_values.append(True)
             new_slider_values.append(0.5)
         else:
             new_checkbox_values.append(False)
             new_slider_values.append(0.0)
     return new_checkbox_values + new_slider_values
 tooltip_css = """
     theme=dark_theme,
     css=tooltip_css,
     ) as app:
     # Header
     gr.Markdown("# 🧠 LLM Brain Control")
             gr.Markdown("### ⚡ Control Vectors")
             control_vector_label = gr.HTML("""
                 <div class="tooltip">
+                    <span>Select how you want to control the LLM per turn - towards (+) or away (-). Or start with a preset:</span>
                     <span class="tooltiptext">+/- 1.0 is a good start. Check the examples for each vector.</span>
                 </div>
             """)
             with gr.Row():
                 button_helpful = gr.Button(
+                    value="Kind and helpful",
                 )
                 button_facts = gr.Button(
                     value="Just the facts"
             for cv_file in control_vector_files:
                 with gr.Row():
                     # Checkbox to select the control vector
+                    checkbox = gr.Checkbox(label=cv_file.split('.')[0], value=False)
                     control_checks.append(checkbox)
                     # Slider to adjust the control vector's weight
                         maximum=2.5,
                         value=0.0,
                         step=0.1,
+                        label=f"Voltage",
                         visible=False
                     )
                     control_sliders.append(slider)
                         max_tokens_label = gr.HTML("""
                             <div class="tooltip">
                                 <span>Max Response Length (in tokens)</span>
+                                <span class="tooltiptext">Lower for faster output, higher to allow longer answers</span>
                             </div>
                         """)
                         max_new_tokens = gr.Number(
             gr.Markdown("### 🗨️ Conversation")
             # Chatbot to display conversation
+            chatbot = gr.Chatbot(
+                type="tuples"
+            )
             # User Message Input with tooltip
             #with gr.Row():

control_models/Angry.gguf ADDED Viewed

Binary file (509 kB). View file

control_models/Confident.gguf ADDED Viewed

Binary file (509 kB). View file

control_models/Conspiracies.gguf ADDED Viewed

Binary file (509 kB). View file

creative.gguf → control_models/Creative.gguf RENAMED Viewed

File without changes

control_models/Empathatic.gguf ADDED Viewed

Binary file (509 kB). View file

control_models/Joking.gguf ADDED Viewed

Binary file (509 kB). View file

lazy.gguf → control_models/Lazy.gguf RENAMED Viewed

File without changes

control_models/Optimistic.gguf ADDED Viewed

Binary file (509 kB). View file

right-leaning.gguf → control_models/Right-leaning.gguf RENAMED Viewed

File without changes

control_models/Tripping.gguf ADDED Viewed

Binary file (509 kB). View file

tripping.gguf DELETED Viewed

Binary file (509 kB)

truthful.gguf DELETED Viewed

Binary file (509 kB)