import gradio as gr from huggingface_hub import InferenceClient import json from datetime import datetime, timedelta import os # List of 20 well-known large language models MODELS = [ "meta-llama/Llama-3.3-70B-Instruct", "meta-llama/Llama-3.1-405B-Instruct", "mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.3", "google/gemma-2-27b-it", "google/gemma-2-9b-it", "Qwen/Qwen2.5-72B-Instruct", "Qwen/Qwen2.5-Coder-32B-Instruct", "microsoft/Phi-3.5-mini-instruct", "tiiuae/falcon-180B-chat", "HuggingFaceH4/zephyr-7b-beta", "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", "01-ai/Yi-34B-Chat", "databricks/dbrx-instruct", "openchat/openchat-3.5-0106", "teknium/OpenHermes-2.5-Mistral-7B", "cognitivecomputations/dolphin-2.6-mixtral-8x7b", "Nexusflow/Starling-LM-7B-beta", "EleutherAI/llemma_34b", "upstage/SOLAR-10.7B-Instruct-v1.0" ] def get_usage_data(request: gr.Request): """Get usage data from browser storage""" try: # This will be handled by JavaScript return {"chats_used": 0, "reset_time": None} except: return {"chats_used": 0, "reset_time": None} def check_usage_limit(chats_used, reset_time): """Check if user has exceeded usage limit""" if reset_time: reset_dt = datetime.fromisoformat(reset_time) if datetime.now() > reset_dt: return 0, None # Reset the counter return chats_used, reset_time def chat_with_model(message, history, model_name, hf_token, chats_used, reset_time): """Chat with the selected model""" # Check usage limit current_chats, current_reset = check_usage_limit(chats_used, reset_time) if current_chats >= 2: if current_reset: reset_dt = datetime.fromisoformat(current_reset) return history, current_chats, current_reset, f"⚠️ You've used your 2 free chats this month. Next reset: {reset_dt.strftime('%Y-%m-%d %H:%M')}" return history, current_chats, current_reset, "⚠️ You've used your 2 free chats this month." if not hf_token: return history, current_chats, current_reset, "⚠️ Please log in with your Hugging Face token first." if not message.strip(): return history, current_chats, current_reset, "" try: # Initialize client with user's token client = InferenceClient(token=hf_token) # Prepare messages for API messages = [] for user_msg, assistant_msg in history: messages.append({"role": "user", "content": user_msg}) if assistant_msg: messages.append({"role": "assistant", "content": assistant_msg}) messages.append({"role": "user", "content": message}) # Stream response response_text = "" history.append([message, ""]) for chunk in client.chat_completion( model=model_name, messages=messages, max_tokens=2000, stream=True, ): if chunk.choices[0].delta.content: response_text += chunk.choices[0].delta.content history[-1][1] = response_text yield history, current_chats, current_reset, "" # Increment usage counter new_chats_used = current_chats + 1 new_reset_time = current_reset if new_reset_time is None: # Set reset time to 1 month from now new_reset_time = (datetime.now() + timedelta(days=30)).isoformat() status_msg = f"✅ Chat successful! Chats used: {new_chats_used}/2" if new_chats_used >= 2: reset_dt = datetime.fromisoformat(new_reset_time) status_msg += f" | Next reset: {reset_dt.strftime('%Y-%m-%d %H:%M')}" yield history, new_chats_used, new_reset_time, status_msg except Exception as e: error_msg = f"❌ Error: {str(e)}" if "429" in str(e): error_msg = "❌ Rate limit exceeded. Please try again later." elif "401" in str(e) or "403" in str(e): error_msg = "❌ Invalid Hugging Face token. Please check your token." yield history, current_chats, current_reset, error_msg # Custom CSS css = """ #header { text-align: center; padding: 20px; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px; margin-bottom: 20px; } #header a { color: #FFD700; text-decoration: none; font-weight: bold; font-size: 0.9em; } #header a:hover { text-decoration: underline; } #chatbot { height: 500px; } .usage-info { padding: 10px; border-radius: 5px; margin: 10px 0; } """ # JavaScript for localStorage management js_code = """ function() { // Load usage data from localStorage const usageData = localStorage.getItem('hf_chat_usage'); let chatsUsed = 0; let resetTime = null; if (usageData) { const data = JSON.parse(usageData); chatsUsed = data.chats_used || 0; resetTime = data.reset_time || null; // Check if reset time has passed if (resetTime && new Date() > new Date(resetTime)) { chatsUsed = 0; resetTime = null; localStorage.setItem('hf_chat_usage', JSON.stringify({chats_used: 0, reset_time: null})); } } return [chatsUsed, resetTime]; } """ # Build the Gradio interface with gr.Blocks(css=css, theme=gr.themes.Soft(), title="HF Model Chat - 2 Free Chats/Month") as demo: # Header with attribution gr.HTML("""
Chat with 20+ Large Language Models | 2 Free Chats per Month