File size: 8,307 Bytes
24b390f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
import os, time, sys, asyncio
from typing import List, Dict
import gradio as gr
from dotenv import load_dotenv
from openai import OpenAI

# ---- Windows event loop fix ----
if sys.platform.startswith("win"):
    try:
        asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
    except Exception:
        pass

# ---- Env ----
load_dotenv()
API_KEY = os.getenv("API_KEY")
HOST = os.getenv("HOST")
Embed_Model_Name = os.getenv("EMBEDDING_MODEL_NAME")
Reranker_Model_Name = os.getenv("RERANKER_MODEL_NAME")
K = int(os.getenv("K", "8"))
TOP_N = int(os.getenv("TOP_N", "5"))

RPM_LIMIT = 2
MIN_SECONDS_BETWEEN = 30
N_DIM = 384

# ---- OpenAI client ----
client = None
if API_KEY:
    client = OpenAI(api_key=API_KEY, base_url="https://genai.ghaymah.systems")

# ---- Your RAG bits ----
from embedder import EmbeddingModel
from Reranker import Reranker

def safe_chat_complete(model: str, messages: List[Dict], max_tokens: int = 9000) -> str:
    delays = [5, 10, 20]
    attempt = 0
    while True:
        try:
            resp = client.chat.completions.create(
                model=model,
                messages=messages,
                max_tokens=max_tokens,
                temperature=0.3,
                timeout=60,
            )
            return resp.choices[0].message.content
        except Exception as e:
            msg = str(e)
            if "429" in msg or "Rate Limit" in msg:
                if attempt < len(delays):
                    time.sleep(delays[attempt]); attempt += 1
                    continue
            raise

def build_single_system_context(query: str, max_total_chars: int = 9000, k: int = 10) -> str:
    Embedder = EmbeddingModel(model_name=Embed_Model_Name)
    RankerModel = Reranker(model_name=Reranker_Model_Name)
    results = Embedder.retrieve_top_k_remote_texts(query, k=k, HOST=HOST)
    Top_sort_results = RankerModel.rerank_results(query, results, top_n=TOP_N)

    snippets, sources = [], []
    for p in Top_sort_results:
        txt = (p.get("text") or "").strip()
        if not txt: continue
        src = p.get("source")
        if isinstance(src, str) and src: sources.append(src)
        snippets.append(txt)

    if not snippets:
        return ("You are a strict RAG assistant. No context was retrieved from the vector store for this query. "
                "If the answer is not present, say you don’t know.")

    header = ("You are a strict RAG assistant. Answer ONLY using the provided context snippets. "
              "If the answer is not present, say you don’t know. ")
    body_budget = max_total_chars - len(header)
    body_parts, used = [], 0
    for snip in snippets:
        piece = snip + "\n\n"
        if used + len(piece) <= body_budget:
            body_parts.append(piece); used += len(piece)
        else:
            break
    seen, uniq_sources = set(), []
    for s in sources:
        if s not in seen:
            uniq_sources.append(s); seen.add(s)
    footer = "Sources:\n" + "\n".join(f"- {s}" for s in uniq_sources) + "\n" if uniq_sources else ""
    return (header + "".join(body_parts) + footer).strip()

SYSTEM_SEED = "You are a strict RAG assistant. Answer ONLY using the provided context."
def init_state():
    return {"messages": [{"role": "system", "content": SYSTEM_SEED}], "last_call_ts": None}

def can_call_now(state: dict) -> bool:
    last = state.get("last_call_ts")
    return True if last is None else (time.time() - last) >= MIN_SECONDS_BETWEEN

def record_call_time(state: dict):
    state["last_call_ts"] = time.time()

def respond(user_message: str, state: dict):
    # Basic env checks – we still show a bot response so the UI proves it’s working
    missing = []
    if not API_KEY: missing.append("API_KEY")
    if not HOST: missing.append("HOST")
    if not Embed_Model_Name: missing.append("EMBEDDING_MODEL_NAME")
    if not Reranker_Model_Name: missing.append("RERANKER_MODEL_NAME")
    if missing:
        return (f"Config missing: {', '.join(missing)}. Set them in your .env and restart."), state

    state["messages"].append({"role": "user", "content": user_message})

    if not can_call_now(state):
        remaining = int(MIN_SECONDS_BETWEEN - (time.time() - (state.get("last_call_ts") or 0)))
        remaining = max(1, remaining)
        msg = f"Rate limit in effect. Please wait ~{remaining} seconds."
        state["messages"].append({"role": "assistant", "content": msg})
        return msg, state

    rag_ctx = build_single_system_context(query=user_message, max_total_chars=5000, k=K)
    msgs = [{"role": "system", "content": rag_ctx}]
    msgs.extend([m for m in state["messages"] if m["role"] != "system"][-10:])

    try:
        reply = safe_chat_complete("DeepSeek-V3-0324", msgs, max_tokens=1000)
        record_call_time(state)
    except Exception as e:
        reply = f"Request failed: {e}"

    state["messages"].append({"role": "assistant", "content": reply})
    return reply, state

# ------------------- Gradio UI: messages API (Gradio >= 5) -------------------
with gr.Blocks(title="Ghaymah Chatbot") as demo:
    gr.Markdown("# 🤖 Ghaymah Chatbot ")
    gr.Markdown(
        "Vector store: **Connected**  \n"
        f"Embedder: `{Embed_Model_Name or 'unset'}`  \n"
        f"RPM limit: **{RPM_LIMIT}** (min {MIN_SECONDS_BETWEEN}s between calls)  \n"
    )

    state = gr.State(init_state())  # {"messages": [...], "last_call_ts": ...}

    # Start with an explicit empty list so it's never None
    chatbot = gr.Chatbot(label="Chat", height=520, type="messages", value=[])

    with gr.Row():
        txt = gr.Textbox(
            placeholder="Ask anything about the Ghaymah documentation…",
            label="Your message",
            lines=2,
            autofocus=True,
        )
    with gr.Row():
        send_btn = gr.Button("Send", variant="primary")
        clear_btn = gr.Button("Clear")

    # Step 1: add a user message immediately
    def _on_user_submit(user_input, chat_messages):
        try:
            if not user_input:
                return "", (chat_messages or [])
            chat_messages = chat_messages or []  # guard for None
            updated = chat_messages + [{"role": "user", "content": user_input}]
            print("[on_submit] user:", user_input)
            return "", updated
        except Exception as e:
            print("[on_submit][ERROR]", repr(e))
            # keep textbox text so you can retry; don't mutate chat on error
            return user_input, (chat_messages or [])

    txt.submit(_on_user_submit, [txt, chatbot], [txt, chatbot])
    send_btn.click(_on_user_submit, [txt, chatbot], [txt, chatbot])

    # Step 2: call backend and append assistant message
    def _bot_step(chat_messages, state):
        try:
            chat_messages = chat_messages or []
            last_user = None
            for msg in reversed(chat_messages):
                if msg.get("role") == "user" and isinstance(msg.get("content"), str):
                    last_user = msg["content"]
                    break
            if last_user is None:
                print("[bot_step] no user message found")
                return chat_messages, state

            print("[bot_step] responding to:", last_user)
            bot_reply, new_state = respond(last_user, state)  # <-- your 2-arg respond

            updated = chat_messages + [{"role": "assistant", "content": bot_reply}]
            return updated, new_state

        except Exception as e:
            print("[bot_step][ERROR]", repr(e))
            # show the error in the chat so you see *something* in the UI
            updated = (chat_messages or []) + [
                {"role": "assistant", "content": f"⚠️ Internal error: {e}"}
            ]
            return updated, state

    # Submit (Enter)
    txt.submit(_on_user_submit, [txt, chatbot], [txt, chatbot])\
        .then(_bot_step, [chatbot, state], [chatbot, state])

    # Click (Send)
    send_btn.click(_on_user_submit, [txt, chatbot], [txt, chatbot])\
        .then(_bot_step, [chatbot, state], [chatbot, state])

    def _clear():
        print("[clear] resetting state and chat")
        return [], init_state()

    clear_btn.click(_clear, outputs=[chatbot, state])

if __name__ == "__main__":
    demo.queue()
    demo.launch(debug=True)