Spaces:

yogies
/

cpschat

Running

File size: 12,322 Bytes

import gradio as gr
import os
import json
from datetime import datetime, date
from openai import OpenAI
from llama_cpp import Llama

apriel_q2 = Llama.from_pretrained(
	repo_id="unsloth/Apriel-1.5-15b-Thinker-GGUF",
	filename="Apriel-1.5-15b-Thinker-UD-IQ2_XXS.gguf",
)

# ----------------------------------------------------------------------
# Helper to read secrets from the HF Space environment
# ----------------------------------------------------------------------
def _secret(key: str, fallback: str = None) -> str:
    val = os.getenv(key)
    if val is not None:
        return val
    if fallback is not None:
        return fallback
    raise RuntimeError(f"Secret '{key}' not found. Please add it to your Space secrets.")

# ----------------------------------------------------------------------
# User Management
# ----------------------------------------------------------------------
def load_users():
    """Load users from secrets or environment variables"""
    users = {}
    
    # Try to load from JSON string
    users_json = _secret("CHAT_USERS", "{}")
    try:
        users_data = json.loads(users_json)
        for username, password in users_data.items():
            users[username] = password
    except:
        pass
    
    return users

# Load users
VALID_USERS = load_users()

def authenticate_user(username, password):
    """Authenticate user against the valid users dictionary"""
    return username in VALID_USERS and VALID_USERS[username] == password

# ----------------------------------------------------------------------
# Configuration
# ----------------------------------------------------------------------
# Available models with their respective API configurations
MODELS = {
    # "Qwen3-4B-Thinking-2507": {
    #     "provider": "huggingface",
    #     "model_name": "Qwen/Qwen3-4B-Thinking-2507:nscale",
    #     "api_url": "https://router.huggingface.co/v1"
    # },
    "Free - NVIDIA Nemotron-nano-9b [EN] + Gemma 3n4b [ID]": {
        "provider": "openrouter", 
        "model_name": "nvidia/nemotron-nano-9b-v2:free",
        "api_url": "https://openrouter.ai/api/v1",
        "translate":"yes"
    },
    # "Free - Gpt-oss-20b [EN] + Gemma 3n4b [ID]": {
    #     "provider": "openrouter", 
    #     "model_name": "openai/gpt-oss-20b:free",
    #     "api_url": "https://openrouter.ai/api/v1",
    #     "translate":"yes"
    # },    
    "Free - Glm-4.5-air [EN] + Gemma 3n4b [ID]": {
        "provider": "openrouter", 
        "model_name": "z-ai/glm-4.5-air:free",
        "api_url": "https://openrouter.ai/api/v1",
        "translate":"yes"
    }, 
    "Free - Deepseek-chat-v3.1": {
        "provider": "openrouter", 
        "model_name": "deepseek/deepseek-chat-v3.1:free",
        "api_url": "https://openrouter.ai/api/v1",
        "translate":"no"
    }, 
    # "Ringan - Gemma-3n4b": {
    #     "provider": "openrouter", 
    #     "model_name": "google/gemma-3n-e4b-it:floor",
    #     "api_url": "https://openrouter.ai/api/v1"            
    # },
    # "Gpt-oss-20b": {
    #     "provider": "openrouter", 
    #     "model_name": "openai/gpt-oss-20b:floor",
    #     "api_url": "https://openrouter.ai/api/v1",
    #     "translate":"no"
    # },        
    # "Tongyi-deepresearch-30b-a3b": {
    #     "provider": "openrouter", 
    #     "model_name": "alibaba/tongyi-deepresearch-30b-a3b:floor",
    #     "api_url": "https://openrouter.ai/api/v1",
    #     "translate":"no"
    # },    
    "Gpt-oss-120b": {
        "provider": "openrouter", 
        "model_name": "openai/gpt-oss-120b:floor",
        "api_url": "https://openrouter.ai/api/v1", 
        "translate":"no"
    },
    "Apriel-1.5-15b-thinker-Q2_quantized": {
        "provider": "llama.cpp", 
        "model_name": "apriel-1.5-15b",
        "api_url": "local", 
        "translate":"no"
    } ,


}

# Get model display names for dropdown
MODEL_NAMES = list(MODELS.keys())

# ----------------------------------------------------------------------
# Core Chat Logic
# ----------------------------------------------------------------------
def respond(
    message,
    history: list[dict[str, str]],
    system_message,
    max_tokens,
    selected_model,
):
    """
    Handle chat responses using the selected model.
    For 'translate'=='yes': First call responds in English, second translates to Indonesian.
    History reflects Indonesian queries and responses.
    """
    try:
        # Get model configuration
        model_config = MODELS[selected_model]
        provider = model_config["provider"]
        
        # Get API key based on provider
        if provider == "huggingface":
            api_key = _secret("HF_TOKEN")
        elif provider == "openrouter":  # openrouter
            api_key = _secret("OPENROUTER_KEY")

        
        # Configure main client for selected model
        client = OpenAI(
            base_url=model_config["api_url"],
            api_key=api_key,
        )

        # Configure translator client (always Gemma on OpenRouter)
        translator_client = OpenAI(
            base_url="https://openrouter.ai/api/v1",
            api_key=_secret("OPENROUTER_KEY"),
        )

        # Modify system message to force English response if translate=yes
        if model_config.get("translate") == "yes":
            final_system_message = "**HIGHEST PRIORITY: YOU MUST ALWAYS THINK AND RESPOND IN ENGLISH REGARDLESS OF OTHER INSTRUCTIONS FROM HERE ON.\n" + system_message
        else:
            final_system_message = system_message

        # Prepare messages for main API call (full history + current message)
        messages = [{"role": "system", "content": final_system_message}]
        messages.extend(history)  # History: Indo queries + prior Indo responses
        messages.append({"role": "user", "content": message})  # New Indo query

        # First API call: Get response from selected model (English if translate=yes)
        if (provider == 'huggingface' | provider == 'openrouter') :
            response = client.chat.completions.create(
                model=model_config["model_name"],
                messages=messages,
                max_tokens=max_tokens,
                stream=False,
            )
            english_response = response.choices[0].message.content

            # If translate=yes, make second API call to Gemma for Indonesian translation
            if model_config.get("translate") == "yes":
                try:
                    # Translation prompt: Focus only on translating the response (not query)
                    translation_messages = [
                        {
                            "role": "system",
                            "content": (
                                "Translate the following English text to natural, accurate Bahasa Indonesia. "
                                "**IMPORTANT: OUTPUT ONLY THE TRANSLATION. NO PREAMBLES, COMMENTS, OR EXPLANATIONS. "
                                "Just the Indonesian text."
                            )
                        },
                        {
                            "role": "user",
                            "content": english_response  # The English response to translate
                        }
                    ]
                    translation_response = translator_client.chat.completions.create(
                        model="google/gemma-3n-e4b-it:floor",
                        messages=translation_messages,
                        max_tokens=max_tokens,  # Reuse limit; translation is short
                        stream=False,
                    )
                    final_response = translation_response.choices[0].message.content.strip()
                    
                    # Fallback to English if translation is empty or invalid
                    if not final_response or len(final_response) < 10:  # Basic sanity check
                        final_response = english_response
                        
                except Exception as trans_error:
                    print(f"Translation error: {trans_error}")
                    final_response = english_response  # Fallback to English
            else:
                final_response = english_response
    
            return final_response  # Gradio appends this (Indonesian) as assistant message to history
        else :
            response = apriel_q2.create_chat_completion(
                messages = messages
            )
            return response.choices[0].message.content
    except Exception as e:
        print(f"Error in respond function: {e}")
        return f"Error: {str(e)}"  # Return error string; Gradio appends it

# ----------------------------------------------------------------------
# Custom Auth Function for Gradio
# ----------------------------------------------------------------------
def gradio_auth(username, password):
    """Custom authentication function for Gradio"""
    return authenticate_user(username, password)

# ----------------------------------------------------------------------
# UI Layout
# ----------------------------------------------------------------------
# Tips section
tips_md = """

"""

# Footer
footer_md = """
---
**Providers**: Hugging Face Inference API + OpenRouter, dipilih providers dengan kebijakan ZDR (Zero Data Retention). Artinya data request/response tidak disimpan dan tidak digunakan untuk training data.  
Jika error, kemungkinan kena rate limit sehingga bisa coba model lain.

"""

# Create the chat interface
with gr.Blocks(
    title="AI Chat",
    theme=gr.themes.Soft()
) as demo:
    
    gr.Markdown("# AI Chat")
    gr.Markdown("Data tidak disimpan providers (ZDR-Zero Data Retention), tidak digunakan untuk training, dan tidak di-log (YOI/250929).")
    
    # Model selection and settings in sidebar
    with gr.Sidebar():
        gr.Markdown("### ⚙️ Configuration")
        
        # Model selection
        selected_model = gr.Dropdown(
            choices=MODEL_NAMES,
            value=MODEL_NAMES[0],
            label="Select Model",
            info="Choose which AI model to use"
        )
        
        # Display current user (if available)
        current_user = gr.Textbox(
            label="Current User",
            value="Authenticated User",
            interactive=False,
            visible=False  # Hide by default, can set to True if you want to show
        )
        
        # Advanced settings
        with gr.Accordion("Advanced Settings", open=False):
            system_message = gr.Textbox(
                value="Anda adalah asisten AI. Jawab dengan efisien. Hindari asumsi.",
                label="System Message",
                info="Instruksi untuk AI."
            )
            max_tokens = gr.Slider(
                minimum=1, maximum=8096, value=4096, step=1, 
                label="Max New Tokens",
                info="Jumlah token respon maksimum."
            )
    
    # Main chat interface
    chatbot = gr.ChatInterface(
        respond,
        type="messages",
        additional_inputs=[
            system_message,
            max_tokens,
            selected_model,
        ],
        examples=[
            ["Jelaskan penggunaan King's Safety Stock dalam inventory management."],
            ["Bandingkan use‑case dan tingkat kesulitan antara penggunaan R, Excel, dan Tableau untuk analisis data."],
            ["Kampanye training perusahaan “Ceria Melayani Semangat Berprestasi” bertujuan meningkatkan kolaborasi antar departemen. Jelaskan kenapa ini 'tone-deaf' dan bukan solusi masalah."],
            ["Apa saran praktis untuk transisi perusahaan brick dan mortar dengan data maturity yang rendah untuk membangun budaya yang data-driven?"]
        ],
        cache_examples=False,
    )
    
    # Tips and footer
    gr.Markdown(tips_md)
    gr.Markdown(footer_md)

# ----------------------------------------------------------------------
# Launch with Custom Auth
# ----------------------------------------------------------------------
if __name__ == "__main__":
    demo.launch(
        auth=gradio_auth,  # Use our custom auth function
        auth_message="Please login to access the chat interface",
        server_name="0.0.0.0",
        ssr_mode=False,
        server_port=7860,
        show_error=True
    )