Spaces:

cafe3310
/

ling-series-spaces

Running

File size: 15,896 Bytes

from mimetypes import init
import gradio as gr
import uuid
from datetime import datetime
import pandas as pd
import re
from model_handler import ModelHandler
from config import CHAT_MODEL_SPECS, LING_1T
from recommand_config import get_recommended_inputs
from ui_components.model_selector import create_model_selector
from i18n import get_text

def on_app_load(request: gr.Request, history, conv_id, current_lang_state):
    """
    Handles the application's initial state on load.
    - Determines language from URL parameter.
    - Loads conversation history or creates a new one.
    """
    # --- Language Detection ---
    query_params = dict(request.query_params)
    url_lang = query_params.get("lang")

    updated_lang = current_lang_state  # Start with the default
    if url_lang and url_lang in ["en", "zh"]:
        updated_lang = url_lang

    # --- History Loading Logic ---
    if not history:
        # First time ever, create a new conversation
        conv_id = str(uuid.uuid4())
        new_convo_title = get_text("chat_new_conversation_title", updated_lang)
        new_convo = {
            "id": conv_id,
            "title": new_convo_title,
            "messages": [],
            "timestamp": datetime.now().isoformat(),
            "system_prompt": "",
            "model": CHAT_MODEL_SPECS[LING_1T]["display_name"],
            "temperature": 0.7
        }
        history = [new_convo]
        return (
            conv_id,
            history,
            gr.update(value=get_history_df(history, updated_lang)),
            [],
            updated_lang,
        )

    if conv_id and any(c["id"] == conv_id for c in history):
        # Valid last session, load it
        for convo in history:
            if convo["id"] == conv_id:
                return (
                    conv_id,
                    history,
                    gr.update(value=get_history_df(history, updated_lang)),
                    convo["messages"],
                    updated_lang,
                )

    # Fallback to most recent conversation
    most_recent_convo = history[0]
    return (
        most_recent_convo["id"],
        history,
        gr.update(value=get_history_df(history, updated_lang)),
        most_recent_convo["messages"],
        updated_lang,
    )


def generate_conversation_title(messages, system_prompt):
    """
    Generates a conversation title based on a heuristic, defensively handling
    multiple possible message formats.
    1. Tries to use the first user query.
    2. Falls back to the system prompt.
    3. Falls back to the current time.
    """
    first_query = None
    
    # Rule 1: Try to extract the first user query from various possible formats
    if messages:
        first_message = messages[0]
        # Case 1: List[List[str]] -> [['user', 'assistant'], ...]
        if isinstance(first_message, (list, tuple)) and len(first_message) > 0:
            first_query = first_message[0]
        # Case 2: List[Dict] (OpenAI format or others)
        elif isinstance(first_message, dict):
            if first_message.get("role") == "user":
                first_query = first_message.get("content")
            elif "text" in first_message: # Fallback for other observed formats
                first_query = first_message["text"]

    if first_query and isinstance(first_query, str):
        # Split by common Chinese and English punctuation and whitespace
        delimiters = r"[，。？！,?!.\s]+"
        segments = re.split(delimiters, first_query)
        
        title = ""
        for seg in segments:
            if seg:
                title += seg
                if len(title) > 3:
                    return title[:50]  # Limit title length
        if title:
             return title[:50]

    # Rule 2: Use the system prompt
    if system_prompt:
        return system_prompt[:32]
        
    # Rule 3: Use the current time
    return datetime.now().strftime("%H:%M")


def get_history_df(history, lang: str):
    """
    Generates a language-aware DataFrame for the conversation history.
    """
    if not history:
        # Provide explicit column names for an empty DataFrame
        return pd.DataFrame({'ID': pd.Series(dtype='str'), get_text('chat_history_dataframe_header', lang): pd.Series(dtype='str')})
    
    df = pd.DataFrame(history)
    
    # Ensure columns exist before renaming
    if 'id' in df.columns and 'title' in df.columns:
        header_text = get_text('chat_history_dataframe_header', lang)
        # Ensure title is a string
        df['title'] = df['title'].astype(str)
        return df[['id', 'title']].rename(columns={'id': 'ID', 'title': header_text})
    else:
        return pd.DataFrame({'ID': pd.Series(dtype='str'), get_text('chat_history_dataframe_header', lang): pd.Series(dtype='str')})


def create_chat_tab(initial_lang: str, current_lang_state: gr.State):
    model_handler = ModelHandler()

    # Browser-side storage for conversation history and current ID
    conversation_store = gr.BrowserState(default_value=[], storage_key="ling_conversation_history")
    current_conversation_id = gr.BrowserState(default_value=None, storage_key="ling_current_conversation_id")

    def handle_new_chat(history, current_conv_id, lang):
        current_convo = next((c for c in history if c["id"] == current_conv_id), None) if history else None

        if current_convo and not current_convo.get("messages", []):
            return current_conv_id, history, [], gr.update(value=get_history_df(history, lang))

        conv_id = str(uuid.uuid4())
        new_convo_title = get_text('chat_new_conversation_title', lang)
        new_convo = {
            "id": conv_id, "title": new_convo_title,
            "messages": [], "timestamp": datetime.now().isoformat(),
            "system_prompt": "",
            "model": CHAT_MODEL_SPECS[LING_1T]["display_name"],
            "temperature": 0.7
        }
        updated_history = [new_convo] + (history or [])
        return conv_id, updated_history, [], gr.update(value=get_history_df(updated_history, lang))

    def load_conversation_from_df(df: pd.DataFrame, evt: gr.SelectData, history, lang):
        if evt.index is None or len(df) == 0:
            return None, [], "", CHAT_MODEL_SPECS[LING_1T]["display_name"], 0.7, ""

        selected_id = df.iloc[evt.index[0]]['ID']
        convo = next((c for c in history if c["id"] == selected_id), None)

        if convo:
            # Use .get() to provide defaults for old conversations
            system_prompt = convo.get("system_prompt", "")
            model = convo.get("model", CHAT_MODEL_SPECS[LING_1T]["display_name"])
            temperature = convo.get("temperature", 0.7)
            
            # Return updates for all components
            return selected_id, convo["messages"], system_prompt, model, temperature, ""
        
        # Fallback to creating a new chat if something goes wrong
        new_id, _, new_msgs, _ = handle_new_chat(history, None, lang)
        return new_id, new_msgs, "", CHAT_MODEL_SPECS[LING_1T]["display_name"], 0.7, ""

    with gr.Row(equal_height=False, elem_id="indicator-chat-tab"):
        with gr.Column(scale=1):
            new_chat_btn = gr.Button(get_text('chat_new_chat_button', initial_lang))
            history_df = gr.DataFrame(
                value=get_history_df(conversation_store.value, initial_lang),
                headers=["ID", get_text('chat_history_dataframe_header', initial_lang)],
                datatype=["str", "str"],
                interactive=False,
                visible=True,
                column_widths=["0%", "100%"]
            )

        with gr.Column(scale=4):
            chatbot = gr.Chatbot(height=500, placeholder=get_text('chat_chatbot_placeholder', initial_lang))
            with gr.Row():
                textbox = gr.Textbox(placeholder=get_text('chat_textbox_placeholder', initial_lang), container=False, scale=7)
                submit_btn = gr.Button(get_text('chat_submit_button', initial_lang), scale=1)
            
            recommended_title = gr.Markdown(get_text('chat_recommended_dialogues_title', initial_lang))
            recommended_dataset = gr.Dataset(
                components=[gr.Textbox(visible=False)],
                samples=[[item["task"]] for item in get_recommended_inputs(initial_lang)],
                label=get_text('chat_recommended_dataset_label', initial_lang),
                headers=[get_text('chat_recommended_dataset_header', initial_lang)],
            )

        with gr.Column(scale=1):
            model_dropdown, model_description_markdown = create_model_selector(
                model_specs=CHAT_MODEL_SPECS,
                default_model_constant=LING_1T,
                lang_state=current_lang_state,
                initial_lang=initial_lang
            )

            system_prompt_textbox = gr.Textbox(label=get_text('chat_system_prompt_label', initial_lang), lines=5, placeholder=get_text('chat_system_prompt_placeholder', initial_lang))
            temperature_slider = gr.Slider(minimum=0, maximum=1.0, value=0.7, step=0.1, label=get_text('chat_temperature_slider_label', initial_lang))

        # --- Event Handlers --- #
        def on_select_recommendation(evt: gr.SelectData, history, current_conv_id, lang):
            selected_task = evt.value[0]
            item = next((i for i in get_recommended_inputs(lang) if i["task"] == selected_task), None)
            if not item:
                return gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update()

            new_id, new_history, new_messages, history_df_update = handle_new_chat(history, current_conv_id, lang)

            return (
                new_id, new_history,
                gr.update(value=item["model"]),
                gr.update(value=item["system_prompt"]),
                gr.update(value=item["temperature"]),
                gr.update(value=item["user_message"]),
                history_df_update,
                new_messages
            )

        def chat_stream(conv_id, history, model_display_name, message, chat_history, system_prompt, temperature):
            if not message:
                yield chat_history
                return
            model_constant = next((k for k, v in CHAT_MODEL_SPECS.items() if v["display_name"] == model_display_name), LING_1T)
            response_generator = model_handler.get_response(model_constant, message, chat_history, system_prompt, temperature)
            for history_update in response_generator:
                yield history_update

        def on_chat_stream_complete(conv_id, history, final_chat_history, system_prompt, model_display_name, temperature, lang):
            current_convo = next((c for c in history if c["id"] == conv_id), None)
            if not current_convo:
                return history, gr.update()
            
            # Check if this is the first turn of a new conversation
            new_convo_title_default = get_text('chat_new_conversation_title', lang)
            is_new_conversation = current_convo["title"] == new_convo_title_default
            
            # If it's a new conversation and we have messages, generate a title and save metadata
            if is_new_conversation and len(final_chat_history) > len(current_convo.get("messages", [])):
                current_convo["system_prompt"] = system_prompt
                current_convo["model"] = model_display_name
                current_convo["temperature"] = temperature
                new_title = generate_conversation_title(final_chat_history, system_prompt)
                current_convo["title"] = new_title

            current_convo["messages"] = final_chat_history
            current_convo["timestamp"] = datetime.now().isoformat()

            history = sorted([c for c in history if c["id"] != conv_id] + [current_convo], key=lambda x: x["timestamp"], reverse=True)
            return history, gr.update(value=get_history_df(history, lang))
        
        # Store all components that need i18n updates
        components = {
            "new_chat_btn": new_chat_btn,
            "history_df": history_df,
            "chatbot": chatbot,
            "textbox": textbox,
            "submit_btn": submit_btn,
            "recommended_title": recommended_title,
            "recommended_dataset": recommended_dataset,
            "system_prompt_textbox": system_prompt_textbox,
            "temperature_slider": temperature_slider,
            "model_dropdown": model_dropdown,
            "model_description_markdown": model_description_markdown,
            # Non-updatable components needed for event handlers and app.py
            "conversation_store": conversation_store,
            "current_conversation_id": current_conversation_id,
        }

        # Wire event handlers
        recommended_dataset.select(on_select_recommendation, inputs=[conversation_store, current_conversation_id, current_lang_state], outputs=[current_conversation_id, conversation_store, model_dropdown, system_prompt_textbox, temperature_slider, textbox, history_df, chatbot], show_progress="hidden")

        submit_btn.click(
            chat_stream,
            [current_conversation_id, conversation_store, model_dropdown, textbox, chatbot, system_prompt_textbox, temperature_slider],
            [chatbot]
        ).then(
            on_chat_stream_complete,
            [current_conversation_id, conversation_store, chatbot, system_prompt_textbox, model_dropdown, temperature_slider, current_lang_state],
            [conversation_store, history_df]
        )
        textbox.submit(
            chat_stream,
            [current_conversation_id, conversation_store, model_dropdown, textbox, chatbot, system_prompt_textbox, temperature_slider],
            [chatbot]
        ).then(
            on_chat_stream_complete,
            [current_conversation_id, conversation_store, chatbot, system_prompt_textbox, model_dropdown, temperature_slider, current_lang_state],
            [conversation_store, history_df]
        )

        new_chat_btn.click(handle_new_chat, inputs=[conversation_store, current_conversation_id, current_lang_state], outputs=[current_conversation_id, conversation_store, chatbot, history_df])
        history_df.select(load_conversation_from_df, inputs=[history_df, conversation_store, current_lang_state], outputs=[current_conversation_id, chatbot, system_prompt_textbox, model_dropdown, temperature_slider, textbox])

    return components

def update_language(lang: str, components: dict):
    """
    Returns a dictionary mapping components to their gr.update calls for language change.
    """
    updates = {
        components["new_chat_btn"]: gr.update(value=get_text('chat_new_chat_button', lang)),
        components["history_df"]: gr.update(headers=["ID", get_text('chat_history_dataframe_header', lang)]),
        components["chatbot"]: gr.update(placeholder=get_text('chat_chatbot_placeholder', lang)),
        components["textbox"]: gr.update(placeholder=get_text('chat_textbox_placeholder', lang)),
        components["submit_btn"]: gr.update(value=get_text('chat_submit_button', lang)),
        components["recommended_title"]: gr.update(value=get_text('chat_recommended_dialogues_title', lang)),
        components["recommended_dataset"]: gr.update(
            samples=[[item["task"]] for item in get_recommended_inputs(lang)],
            label=get_text('chat_recommended_dataset_label', lang),
            headers=[get_text('chat_recommended_dataset_header', lang)],
        ),
        components["system_prompt_textbox"]: gr.update(label=get_text('chat_system_prompt_label', lang), placeholder=get_text('chat_system_prompt_placeholder', lang)),
        components["temperature_slider"]: gr.update(label=get_text('chat_temperature_slider_label', lang)),
    }
    return updates