MohamedFahim commited on
Commit
102c719
Β·
verified Β·
1 Parent(s): b43c9ef

Update interface.py

Browse files
Files changed (1) hide show
  1. interface.py +380 -435
interface.py CHANGED
@@ -2,297 +2,107 @@ import gradio as gr
2
  import requests
3
  import time
4
  import os
5
- import json
6
- from typing import Optional, Tuple, List
7
 
 
 
8
 
9
- # ==================== API CONFIGURATION ====================
10
-
11
- # For Hugging Face Spaces, both Gradio and FastAPI run in same container
12
- API_BASE_URL = os.getenv("API_BASE_URL", "http://localhost:8000")
13
- MAX_RETRIES = 3
14
- RETRY_DELAY = 5
15
-
16
- # ==================== UTILITY FUNCTIONS ====================
17
-
18
- def check_api_health(max_attempts=3) -> Tuple[bool, dict]:
19
- """Check API health with retry logic"""
20
- for attempt in range(max_attempts):
21
- try:
22
- response = requests.get(f"{API_BASE_URL}/health_check", timeout=10)
23
- if response.status_code == 200:
24
- return True, response.json()
25
- except Exception as e:
26
- if attempt < max_attempts - 1:
27
- time.sleep(2)
28
- return False, {}
29
-
30
- def make_request_with_retry(method: str, endpoint: str, **kwargs) -> requests.Response:
31
- """Make API request with retry logic"""
32
- url = f"{API_BASE_URL}{endpoint}"
33
-
34
- for attempt in range(MAX_RETRIES):
35
- try:
36
- if method.upper() == "GET":
37
- response = requests.get(url, **kwargs)
38
- elif method.upper() == "POST":
39
- response = requests.post(url, **kwargs)
40
- elif method.upper() == "DELETE":
41
- response = requests.delete(url, **kwargs)
42
- else:
43
- raise ValueError(f"Unsupported method: {method}")
44
-
45
- if response.status_code in [200, 201]:
46
- return response
47
- elif response.status_code == 500 and attempt < MAX_RETRIES - 1:
48
- time.sleep(RETRY_DELAY)
49
- continue
50
- else:
51
- response.raise_for_status()
52
-
53
- except requests.exceptions.ConnectionError:
54
- if attempt < MAX_RETRIES - 1:
55
- time.sleep(RETRY_DELAY)
56
- else:
57
- raise gr.Error("❌ Cannot connect to API. Please refresh and try again.", duration=10)
58
- except requests.exceptions.Timeout:
59
- if attempt < MAX_RETRIES - 1:
60
- time.sleep(RETRY_DELAY)
61
- else:
62
- raise gr.Error("⏱️ Request timeout. Please try again.", duration=10)
63
- except Exception as e:
64
- if attempt == MAX_RETRIES - 1:
65
- raise gr.Error(f"❌ Error: {str(e)}", duration=10)
66
-
67
- raise gr.Error("❌ Maximum retries exceeded. Please try again later.", duration=10)
68
-
69
- # ==================== DOCUMENT UPLOAD FUNCTIONS ====================
70
-
71
- def upload_single_document(file, collection_name: str, progress=gr.Progress()) -> Tuple[str, dict]:
72
- """Upload single document with progress tracking"""
73
- if not file:
74
- raise gr.Error("❌ Please select a file to upload", duration=5)
75
-
76
- if not collection_name:
77
- collection_name = "default"
78
-
79
  try:
80
- progress(0.1, desc="πŸ“€ Uploading document...")
81
-
82
- # Handle file path (Gradio returns file path as string or file object)
83
- file_path = file.name if hasattr(file, 'name') else file
84
-
85
- # Ensure file exists
86
- if not os.path.exists(file_path):
87
- raise gr.Error(f"❌ File not found: {file_path}", duration=5)
88
-
89
- with open(file_path, 'rb') as f:
90
- files = {
91
- 'file': (os.path.basename(file_path), f, 'application/octet-stream')
92
- }
93
- params = {'collection_name': collection_name}
94
-
95
- progress(0.4, desc="πŸ”„ Processing document...")
96
- response = make_request_with_retry(
97
- "POST",
98
- "/upload_document",
99
- files=files,
100
- params=params,
101
- timeout=180
102
- )
103
-
104
- progress(1, desc="βœ… Upload complete!")
105
-
106
- result = response.json()
107
- status_msg = f"""βœ… Successfully uploaded '{result['filename']}'
108
- πŸ“Š Created {result['chunks_created']} chunks
109
- πŸ—‚οΈ Collection: {collection_name}
110
- πŸ“ File Type: {result['file_type']}"""
111
-
112
- return status_msg, result
113
-
114
- except gr.Error:
115
- raise
116
- except Exception as e:
117
- raise gr.Error(f"❌ Upload failed: {str(e)}", duration=10)
118
 
119
- def upload_multiple_documents(files, collection_name: str, progress=gr.Progress()) -> Tuple[str, dict]:
120
- """Upload multiple documents with progress tracking"""
121
- if not files or len(files) == 0:
122
- raise gr.Error("❌ Please select files to upload", duration=5)
123
-
124
- if not collection_name:
125
- collection_name = "default"
126
-
127
  try:
128
- progress(0.1, desc=f"πŸ“€ Uploading {len(files)} documents...")
129
-
130
- files_to_upload = []
131
- for idx, file in enumerate(files):
132
- file_path = file.name if hasattr(file, 'name') else file
133
-
134
- # Check if file exists
135
- if not os.path.exists(file_path):
136
- continue
137
-
138
- with open(file_path, 'rb') as f:
139
- file_content = f.read()
140
- files_to_upload.append(
141
- ('files', (os.path.basename(file_path), file_content, 'application/octet-stream'))
142
- )
143
- progress((idx + 1) / len(files) * 0.5, desc=f"Reading file {idx + 1}/{len(files)}...")
144
-
145
- if not files_to_upload:
146
- raise gr.Error("❌ No valid files found to upload", duration=5)
147
-
148
- progress(0.5, desc="πŸ”„ Processing all documents...")
149
- params = {'collection_name': collection_name}
150
- response = make_request_with_retry(
151
- "POST",
152
- "/upload_multiple_documents",
153
- files=files_to_upload,
154
- params=params,
155
- timeout=300
156
- )
157
-
158
- progress(1, desc="βœ… All uploads complete!")
159
-
160
- result = response.json()
161
- status_msg = f"""βœ… Successfully uploaded {result['successful_uploads']} files
162
- ❌ Failed: {result['failed_uploads']}
163
- πŸ—‚οΈ Collection: {collection_name}"""
164
-
165
- return status_msg, result
166
-
167
- except gr.Error:
168
- raise
169
- except Exception as e:
170
- raise gr.Error(f"❌ Upload failed: {str(e)}", duration=10)
171
 
172
- def query_rag_documents(query: str, collection_name: str, top_k: int, progress=gr.Progress()) -> Tuple[str, str, dict]:
173
- """Query documents with progress tracking"""
174
- if not query:
175
- raise gr.Error("❌ Please enter a query", duration=5)
176
-
177
- if not collection_name:
178
- raise gr.Error("❌ Please select a collection", duration=5)
179
-
180
  try:
181
- progress(0.3, desc="πŸ” Searching documents...")
182
-
183
- response = make_request_with_retry(
184
- "POST",
185
- "/query_documents",
186
- json={
187
- "query": query,
188
- "collection_name": collection_name,
189
- "top_k": top_k
190
- },
191
- timeout=60
192
- )
193
-
194
- progress(0.8, desc="πŸ€– Generating answer...")
195
- result = response.json()
196
-
197
- progress(1, desc="βœ… Complete!")
198
-
199
- if "I couldn't find this information" in result['answer']:
200
- status_msg = "⚠️ No relevant information found in documents"
201
  else:
202
- status_msg = f"βœ… Found relevant information from {len(result['sources'])} sources"
203
-
204
- answer_text = f"**Query:** {result['query']}\n\n**Answer:** {result['answer']}"
205
-
206
- return status_msg, answer_text, result['sources']
207
-
208
- except gr.Error:
209
- raise
210
- except Exception as e:
211
- raise gr.Error(f"❌ Query failed: {str(e)}", duration=10)
212
 
213
- def list_all_collections() -> Tuple[str, dict, gr.Dropdown]:
214
- """List all collections with error handling"""
215
  try:
216
- response = make_request_with_retry("GET", "/list_collections", timeout=10)
217
- result = response.json()
218
- collections = result['collections']
219
-
220
- if not collections:
221
- return "πŸ“‚ No collections found. Upload documents to create a collection.", None, gr.Dropdown(choices=["default"], value="default")
222
-
223
- summary = f"πŸ“Š **Total Collections:** {len(collections)}\n\n"
224
- for col in collections:
225
- summary += f"πŸ—‚οΈ **{col['collection_name']}**\n"
226
- summary += f" - Chunks: {col['total_chunks']}\n"
227
- summary += f" - Dimension: {col['dimension']}\n\n"
228
-
229
- collection_names = [col['collection_name'] for col in collections]
230
-
231
- return summary, result, gr.Dropdown(choices=collection_names, value=collection_names[0] if collection_names else "default")
232
-
233
- except Exception as e:
234
- raise gr.Error(f"❌ Failed to list collections: {str(e)}", duration=10)
235
 
236
- def delete_collection(collection_name: str) -> Tuple[str, str, dict, gr.Dropdown]:
237
- """Delete collection with confirmation"""
238
- if not collection_name:
239
- raise gr.Error("❌ Please select a collection to delete", duration=5)
 
 
 
240
 
241
  try:
242
- response = make_request_with_retry(
243
- "DELETE",
244
- f"/delete_collection/{collection_name}",
245
- timeout=10
246
- )
247
-
248
- status = f"βœ… Successfully deleted collection '{collection_name}'"
249
 
250
- # Refresh collections list
251
- summary, result, dropdown = list_all_collections()
 
252
 
253
- return status, summary, result, dropdown
254
 
255
  except Exception as e:
256
- raise gr.Error(f"❌ Failed to delete collection: {str(e)}", duration=10)
257
 
258
- def get_system_health() -> Tuple[str, dict]:
259
- """Get system health information"""
 
 
 
 
 
 
260
  try:
261
- is_healthy, health_data = check_api_health()
 
262
 
263
- if not is_healthy:
264
- raise gr.Error("❌ System is offline. Please refresh the page.", duration=None)
 
265
 
266
- health_summary = f"""🟒 **System Status: Healthy**
267
-
268
- πŸ“Š **Configuration:**
269
- - Supabase: {'βœ… Configured' if health_data.get('supabase_configured') else '❌ Not Configured'}
270
- - Groq API: {'βœ… Configured' if health_data.get('groq_configured') else '❌ Not Configured'}
271
- - Embedding Model: {health_data.get('embedding_model', 'N/A')}
272
-
273
- πŸ“ **Vector Stores:**
274
- - Total Collections: {health_data.get('vector_stores', 0)}
275
- - Total Chunks: {health_data.get('total_chunks', 0)}
276
- - Storage Path: {health_data.get('persistent_storage', 'N/A')}
277
-
278
- πŸ“š **Available Collections:**
279
- {', '.join(health_data.get('collections', [])) if health_data.get('collections') else 'None'}
280
- """
281
- return health_summary, health_data
282
 
283
- except gr.Error:
284
- raise
285
  except Exception as e:
286
- raise gr.Error(f"❌ Health check failed: {str(e)}", duration=10)
287
-
288
- # ==================== GRADIO UI ====================
289
 
290
- # Custom CSS
291
  custom_css = """
292
  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&display=swap');
293
  * {
294
  font-family: 'Inter', sans-serif !important;
295
  }
 
296
  .gradio-container {
297
  background: linear-gradient(-45deg, #0f0c29, #302b63, #24243e, #1a1a2e);
298
  background-size: 400% 400%;
@@ -303,6 +113,7 @@ custom_css = """
303
  50% { background-position: 100% 50%; }
304
  100% { background-position: 0% 50%; }
305
  }
 
306
  .main-container {
307
  backdrop-filter: blur(20px);
308
  background: rgba(20, 20, 30, 0.85);
@@ -313,9 +124,16 @@ custom_css = """
313
  animation: fadeInUp 0.8s ease;
314
  }
315
  @keyframes fadeInUp {
316
- from { opacity: 0; transform: translateY(30px); }
317
- to { opacity: 1; transform: translateY(0); }
 
 
 
 
 
 
318
  }
 
319
  .animated-title {
320
  background: linear-gradient(135deg, #00f2fe 0%, #4facfe 50%, #00c6ff 100%);
321
  background-size: 200% 200%;
@@ -334,6 +152,7 @@ custom_css = """
334
  50% { background-position: 100% 50%; }
335
  100% { background-position: 0% 50%; }
336
  }
 
337
  .floating-icon {
338
  animation: float 3s ease-in-out infinite;
339
  display: inline-block;
@@ -342,6 +161,7 @@ custom_css = """
342
  0%, 100% { transform: translateY(0px); }
343
  50% { transform: translateY(-10px); }
344
  }
 
345
  textarea, input[type="text"] {
346
  font-size: 1.1rem !important;
347
  border-radius: 12px !important;
@@ -356,11 +176,14 @@ textarea:focus, input[type="text"]:focus {
356
  transform: translateY(-2px);
357
  background: rgba(35, 35, 50, 0.95) !important;
358
  }
 
359
  label {
360
  font-weight: 600 !important;
361
  color: #b0b0b0 !important;
362
  font-size: 1.1rem !important;
 
363
  }
 
364
  .gr-button {
365
  background: linear-gradient(135deg, #00f2fe 0%, #4facfe 100%) !important;
366
  color: #0a0a0f !important;
@@ -375,224 +198,346 @@ label {
375
  .gr-button:hover {
376
  transform: translateY(-3px) !important;
377
  box-shadow: 0 6px 25px rgba(0, 242, 254, 0.6) !important;
 
378
  }
 
 
 
 
379
  .output-box {
380
  background: rgba(30, 30, 45, 0.95) !important;
381
  border-radius: 16px !important;
382
  border: 1px solid rgba(0, 242, 254, 0.2) !important;
383
  backdrop-filter: blur(10px);
 
384
  box-shadow: 0 4px 15px rgba(0, 0, 0, 0.3) !important;
385
  color: #e0e0e0 !important;
386
  padding: 1.5rem !important;
387
- min-height: 150px !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
388
  }
389
  .output-box strong {
390
  color: #4facfe !important;
391
  font-weight: 600 !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
392
  }
393
  """
394
 
395
- # Build interface
396
- with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="RAG Document Assistant") as interface:
397
  gr.HTML("""
398
  <div class="main-container">
399
  <h1 class="animated-title">
400
- <span class="floating-icon">πŸ“„</span> RAG Document Assistant
401
  </h1>
402
- <p class="subtitle" style="color: #b0b0b0; font-size: 1.3rem; text-align: center; margin-bottom: 2rem;">
403
- Upload documents (PDF/Markdown/TXT) and ask questions using AI-powered retrieval
404
  </p>
405
  </div>
406
  """)
407
 
408
  with gr.Tabs() as tabs:
409
- # Document Upload Tab
410
- with gr.Tab("πŸ“„ Document Upload & Query"):
411
- gr.Markdown("""
412
- ### Upload PDF or Markdown documents and query them using RAG
413
- - Supports **PDF**, **Markdown**, and **TXT** files
414
- - Documents are chunked and stored in FAISS vector database
415
- - Organize documents into collections for better management
416
- """)
417
-
418
  with gr.Row():
419
  with gr.Column():
420
- gr.Markdown("#### πŸ“€ Upload Documents")
421
- collection_name_upload = gr.Textbox(
422
- label="πŸ—‚οΈ Collection Name",
423
- placeholder="default",
424
- value="default"
425
- )
426
-
427
- with gr.Tab("Single File"):
428
- file_upload_single = gr.File(
429
- label="πŸ“ Select Document (PDF/Markdown/TXT)",
430
- file_types=[".pdf", ".md", ".txt"]
431
- )
432
- upload_btn_single = gr.Button("πŸ“€ Upload Single Document", variant="primary")
433
- upload_status_single = gr.Textbox(label="πŸ“Š Upload Status", elem_classes="output-box")
434
- upload_result_single = gr.JSON(label="πŸ“‹ Upload Details", elem_classes="output-box")
435
-
436
- with gr.Tab("Multiple Files"):
437
- file_upload_multi = gr.File(
438
- label="πŸ“ Select Documents (PDF/Markdown/TXT)",
439
- file_count="multiple",
440
- file_types=[".pdf", ".md", ".txt"]
441
- )
442
- upload_btn_multi_doc = gr.Button("πŸ“€ Upload Multiple Documents", variant="primary")
443
- upload_status_multi = gr.Textbox(label="πŸ“Š Upload Status", elem_classes="output-box")
444
- upload_result_multi = gr.JSON(label="πŸ“‹ Upload Details", elem_classes="output-box")
445
-
446
- with gr.Column():
447
- gr.Markdown("#### πŸ” Query Documents")
448
-
449
- refresh_btn = gr.Button("πŸ”„ Refresh Collections", variant="secondary")
450
-
451
- collection_dropdown = gr.Dropdown(
452
- label="πŸ—‚οΈ Select Collection",
453
- choices=["default"],
454
- value="default"
455
  )
456
-
457
- query_input = gr.Textbox(
458
- label="πŸ’­ Your Question",
459
- placeholder="Ask a question about your documents...",
460
- lines=3
461
  )
462
-
463
- top_k_slider = gr.Slider(
464
- minimum=1,
465
- maximum=10,
466
- value=3,
467
- step=1,
468
- label="πŸ“Š Number of Sources (top-k)"
469
- )
470
-
471
- query_btn = gr.Button("πŸ” Search Documents", variant="primary")
472
-
473
- query_status = gr.Textbox(label="πŸ“Š Query Status", elem_classes="output-box")
474
- query_response = gr.Markdown(label="πŸ€– AI Answer", elem_classes="output-box")
475
- query_sources = gr.JSON(label="πŸ“š Source Citations", elem_classes="output-box")
476
-
477
- # Connect buttons
478
- upload_btn_single.click(
479
- fn=upload_single_document,
480
- inputs=[file_upload_single, collection_name_upload],
481
- outputs=[upload_status_single, upload_result_single]
482
- )
483
-
484
- upload_btn_multi_doc.click(
485
- fn=upload_multiple_documents,
486
- inputs=[file_upload_multi, collection_name_upload],
487
- outputs=[upload_status_multi, upload_result_multi]
488
- )
489
-
490
- query_btn.click(
491
- fn=query_rag_documents,
492
- inputs=[query_input, collection_dropdown, top_k_slider],
493
- outputs=[query_status, query_response, query_sources]
494
- )
495
-
496
- def refresh_collections():
497
- _, _, dropdown = list_all_collections()
498
- return dropdown
499
-
500
- refresh_btn.click(
501
- fn=refresh_collections,
502
- outputs=[collection_dropdown]
503
- )
504
-
505
- gr.HTML("""
506
- <div class="example-box" style="background: linear-gradient(135deg, rgba(0, 242, 254, 0.1) 0%, rgba(79, 172, 254, 0.1) 100%); border-radius: 16px; padding: 1.5rem; border-left: 4px solid #00f2fe; margin-top: 2rem;">
507
- <h3 style="margin-top: 0; font-size: 1.4rem; color: #e0e0e0;">
508
- <span class="floating-icon">πŸ’‘</span> Example Usage
509
- </h3>
510
- <p style="font-size: 1.1rem; color: #e0e0e0;"><strong>1.</strong> Upload your PDF/Markdown documents to a collection</p>
511
- <p style="font-size: 1.1rem; color: #e0e0e0;"><strong>2.</strong> Ask questions like: "What are the main findings?" or "Summarize the methodology"</p>
512
- <p style="font-size: 1.1rem; color: #e0e0e0;"><strong>3.</strong> System returns answers with source citations</p>
513
- </div>
514
- """)
515
-
516
- # Collection Management Tab
517
- with gr.Tab("πŸ—‚οΈ Collection Management"):
518
- gr.Markdown("### Manage Your Document Collections")
519
 
520
  with gr.Row():
521
  with gr.Column():
522
- list_btn = gr.Button("πŸ“‹ List All Collections", variant="primary")
523
- collections_output = gr.Markdown(label="πŸ“Š Collections Overview", elem_classes="output-box")
524
- collections_json = gr.JSON(label="πŸ“‹ Detailed Information", elem_classes="output-box")
525
-
526
- with gr.Column():
527
- gr.Markdown("#### πŸ—‘οΈ Delete Collection")
528
- collection_to_delete = gr.Dropdown(label="πŸ—‚οΈ Select Collection to Delete", choices=["default"])
529
- delete_btn = gr.Button("πŸ—‘οΈ Delete Collection", variant="stop")
530
- delete_status = gr.Textbox(label="πŸ“Š Status", elem_classes="output-box")
531
 
532
- list_btn.click(
533
- fn=list_all_collections,
534
- outputs=[collections_output, collections_json, collection_to_delete]
 
535
  )
536
-
537
- delete_btn.click(
538
- fn=delete_collection,
539
- inputs=[collection_to_delete],
540
- outputs=[delete_status, collections_output, collections_json, collection_to_delete]
541
- )
542
-
543
- gr.HTML("""
544
- <div class="note-box" style="background: linear-gradient(135deg, rgba(0, 242, 254, 0.08) 0%, rgba(79, 172, 254, 0.08) 100%); border-radius: 12px; padding: 1rem; border-left: 4px solid #00f2fe; margin-top: 1rem;">
545
- <p style="margin: 0; font-size: 1.05rem; color: #00c6ff;">
546
- ⚠️ <strong>Warning:</strong> Deleting a collection is permanent and cannot be undone. All documents in the collection will be removed.
547
- </p>
548
- </div>
549
- """)
550
 
551
- # System Health Tab
552
- with gr.Tab("βš™οΈ System Health"):
553
- gr.Markdown("### System Status & Configuration")
 
 
 
 
 
 
 
 
 
 
 
 
554
 
555
- health_check_btn = gr.Button("πŸ” Check System Health", variant="primary")
556
- health_output = gr.Markdown(label="🟒 System Status", elem_classes="output-box")
557
- health_json = gr.JSON(label="πŸ“‹ Detailed Configuration", elem_classes="output-box")
 
 
558
 
559
- health_check_btn.click(
560
- fn=get_system_health,
561
- outputs=[health_output, health_json]
 
562
  )
563
-
564
- gr.HTML("""
565
- <div class="example-box" style="background: linear-gradient(135deg, rgba(0, 242, 254, 0.1) 0%, rgba(79, 172, 254, 0.1) 100%); border-radius: 16px; padding: 1.5rem; border-left: 4px solid #00f2fe; margin-top: 2rem;">
566
- <h3 style="margin-top: 0; font-size: 1.4rem; color: #e0e0e0;">
567
- <span class="floating-icon">πŸ“Š</span> Health Check Information
568
- </h3>
569
- <p style="font-size: 1.1rem; color: #e0e0e0;"><strong>Supabase:</strong> Cloud storage for documents (optional)</p>
570
- <p style="font-size: 1.1rem; color: #e0e0e0;"><strong>Groq API:</strong> LLM for generating answers</p>
571
- <p style="font-size: 1.1rem; color: #e0e0e0;"><strong>Vector Stores:</strong> FAISS collections for document embeddings</p>
572
- </div>
573
- """)
574
 
575
  gr.HTML("""
576
- <div class="note-box" style="margin-top: 2rem; background: linear-gradient(135deg, rgba(0, 242, 254, 0.08) 0%, rgba(79, 172, 254, 0.08) 100%); border-radius: 12px; padding: 1rem; border-left: 4px solid #00f2fe;">
577
- <p style="margin: 0; font-size: 1.05rem; color: #00c6ff;">
578
- ℹ️ <strong>Note:</strong> This app features automatic retry logic and progress tracking.
579
- If you encounter errors, the system will automatically retry. Large files may take longer to process.
 
 
 
 
 
 
 
580
  </p>
581
  </div>
582
  """)
583
 
584
- # Launch configuration for Hugging Face Spaces
585
  if __name__ == "__main__":
586
- # Check API health on startup
587
- is_healthy, _ = check_api_health(max_attempts=5)
588
-
589
- if not is_healthy:
590
- print("⚠️ Warning: API is not responding. The app will launch but may not work correctly.")
591
-
592
  interface.launch(
593
  server_name="0.0.0.0",
594
  server_port=7860,
595
  share=False,
596
  show_error=True,
597
- show_api=False
598
- )
 
2
  import requests
3
  import time
4
  import os
 
 
5
 
6
+ # Use localhost for HF Spaces since both services run in the same container
7
+ API_BASE_URL = "http://localhost:8000"
8
 
9
+ def extract_links(url):
10
+ """Extract links from the given URL"""
11
+ endpoint = f"{API_BASE_URL}/extract_links"
12
+ payload = {"url": url}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  try:
14
+ response = requests.post(endpoint, json=payload, timeout=30)
15
+ if response.status_code == 200:
16
+ return response.json()["unique_links"]
17
+ else:
18
+ raise Exception(f"Failed to extract links: {response.text}")
19
+ except requests.exceptions.RequestException as e:
20
+ raise Exception(f"Connection error: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
+ def extract_text(urls):
23
+ """Extract text from URLs"""
24
+ endpoint = f"{API_BASE_URL}/extract_text"
 
 
 
 
 
25
  try:
26
+ response = requests.post(endpoint, json=urls, timeout=60)
27
+ if response.status_code == 200:
28
+ return response.json()["file_saved"]
29
+ else:
30
+ raise Exception(f"Failed to extract text: {response.text}")
31
+ except requests.exceptions.RequestException as e:
32
+ raise Exception(f"Connection error: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
+ def perform_rag(file_path, prompt):
35
+ """Perform RAG on the extracted text"""
36
+ endpoint = f"{API_BASE_URL}/rag"
37
+ payload = {"file_path": file_path, "prompt": prompt}
 
 
 
 
38
  try:
39
+ response = requests.post(endpoint, json=payload, timeout=60)
40
+ if response.status_code == 200:
41
+ return response.json()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  else:
43
+ raise Exception(f"Failed to perform RAG: {response.text}")
44
+ except requests.exceptions.RequestException as e:
45
+ raise Exception(f"Connection error: {str(e)}")
 
 
 
 
 
 
 
46
 
47
+ def check_api_health():
48
+ """Check if FastAPI is running"""
49
  try:
50
+ response = requests.get(f"{API_BASE_URL}/", timeout=5)
51
+ return response.status_code == 200
52
+ except:
53
+ return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
+ def process_multiple_links(url, prompt):
56
+ """Process multiple links from a webpage"""
57
+ if not url or not prompt:
58
+ return "❌ Error: Please provide both URL and prompt", "", ""
59
+
60
+ if not check_api_health():
61
+ return "❌ Error: FastAPI service is not available. Please wait a moment and try again.", "", ""
62
 
63
  try:
64
+ links = extract_links(url)
65
+ sample_links = links[:5]
66
+ file_path = extract_text(sample_links)
67
+ result = perform_rag(file_path, prompt)
 
 
 
68
 
69
+ status_msg = f"βœ… Processed {len(sample_links)} pages from {len(links)} total links found"
70
+ response_text = f"**Query:** {result['user_query']}\n\n**Response:** {result['assistant_response']}"
71
+ sources_text = result['sources']
72
 
73
+ return status_msg, response_text, sources_text
74
 
75
  except Exception as e:
76
+ return f"❌ Error: {str(e)}", "", ""
77
 
78
+ def process_homepage_only(url, prompt):
79
+ """Process homepage content only"""
80
+ if not url or not prompt:
81
+ return "❌ Error: Please provide both URL and prompt", "", ""
82
+
83
+ if not check_api_health():
84
+ return "❌ Error: FastAPI service is not available. Please wait a moment and try again.", "", ""
85
+
86
  try:
87
+ file_path = extract_text([url])
88
+ result = perform_rag(file_path, prompt)
89
 
90
+ status_msg = "βœ… Processed homepage content"
91
+ response_text = f"**Query:** {result['user_query']}\n\n**Response:** {result['assistant_response']}"
92
+ sources_text = result['sources']
93
 
94
+ return status_msg, response_text, sources_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
 
 
96
  except Exception as e:
97
+ return f"❌ Error: {str(e)}", "", ""
 
 
98
 
99
+ # Dark theme custom CSS with animations
100
  custom_css = """
101
  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&display=swap');
102
  * {
103
  font-family: 'Inter', sans-serif !important;
104
  }
105
+ /* Dark animated gradient background */
106
  .gradio-container {
107
  background: linear-gradient(-45deg, #0f0c29, #302b63, #24243e, #1a1a2e);
108
  background-size: 400% 400%;
 
113
  50% { background-position: 100% 50%; }
114
  100% { background-position: 0% 50%; }
115
  }
116
+ /* Main container with dark glassmorphism */
117
  .main-container {
118
  backdrop-filter: blur(20px);
119
  background: rgba(20, 20, 30, 0.85);
 
124
  animation: fadeInUp 0.8s ease;
125
  }
126
  @keyframes fadeInUp {
127
+ from {
128
+ opacity: 0;
129
+ transform: translateY(30px);
130
+ }
131
+ to {
132
+ opacity: 1;
133
+ transform: translateY(0);
134
+ }
135
  }
136
+ /* Animated title with neon glow */
137
  .animated-title {
138
  background: linear-gradient(135deg, #00f2fe 0%, #4facfe 50%, #00c6ff 100%);
139
  background-size: 200% 200%;
 
152
  50% { background-position: 100% 50%; }
153
  100% { background-position: 0% 50%; }
154
  }
155
+ /* Floating animation for icons */
156
  .floating-icon {
157
  animation: float 3s ease-in-out infinite;
158
  display: inline-block;
 
161
  0%, 100% { transform: translateY(0px); }
162
  50% { transform: translateY(-10px); }
163
  }
164
+ /* Dark input fields with neon borders */
165
  textarea, input[type="text"] {
166
  font-size: 1.1rem !important;
167
  border-radius: 12px !important;
 
176
  transform: translateY(-2px);
177
  background: rgba(35, 35, 50, 0.95) !important;
178
  }
179
+ /* Dark labels */
180
  label {
181
  font-weight: 600 !important;
182
  color: #b0b0b0 !important;
183
  font-size: 1.1rem !important;
184
+ transition: all 0.3s ease;
185
  }
186
+ /* Neon buttons with hover effects */
187
  .gr-button {
188
  background: linear-gradient(135deg, #00f2fe 0%, #4facfe 100%) !important;
189
  color: #0a0a0f !important;
 
198
  .gr-button:hover {
199
  transform: translateY(-3px) !important;
200
  box-shadow: 0 6px 25px rgba(0, 242, 254, 0.6) !important;
201
+ filter: brightness(1.1);
202
  }
203
+ .gr-button:active {
204
+ transform: translateY(-1px) !important;
205
+ }
206
+ /* Dark output boxes with glassmorphism */
207
  .output-box {
208
  background: rgba(30, 30, 45, 0.95) !important;
209
  border-radius: 16px !important;
210
  border: 1px solid rgba(0, 242, 254, 0.2) !important;
211
  backdrop-filter: blur(10px);
212
+ animation: slideIn 0.5s ease;
213
  box-shadow: 0 4px 15px rgba(0, 0, 0, 0.3) !important;
214
  color: #e0e0e0 !important;
215
  padding: 1.5rem !important;
216
+ min-height: 200px !important;
217
+ }
218
+ /* Markdown container styling */
219
+ .output-box .prose, .output-box .markdown {
220
+ font-size: 1.2rem !important;
221
+ line-height: 1.8 !important;
222
+ }
223
+ /* Markdown styling */
224
+ .output-box h1 {
225
+ color: #00f2fe !important;
226
+ margin-top: 1.5rem !important;
227
+ margin-bottom: 1rem !important;
228
+ font-size: 2rem !important;
229
+ }
230
+ .output-box h2 {
231
+ color: #00f2fe !important;
232
+ margin-top: 1.5rem !important;
233
+ margin-bottom: 0.75rem !important;
234
+ font-size: 1.6rem !important;
235
+ }
236
+ .output-box h3 {
237
+ color: #4facfe !important;
238
+ margin-top: 1rem !important;
239
+ margin-bottom: 0.5rem !important;
240
+ font-size: 1.3rem !important;
241
+ }
242
+ .output-box p {
243
+ color: #e0e0e0 !important;
244
+ line-height: 1.9 !important;
245
+ margin-bottom: 1.2rem !important;
246
+ font-size: 1.15rem !important;
247
  }
248
  .output-box strong {
249
  color: #4facfe !important;
250
  font-weight: 600 !important;
251
+ font-size: 1.2rem !important;
252
+ }
253
+ .output-box code {
254
+ background: rgba(0, 242, 254, 0.15) !important;
255
+ padding: 3px 8px !important;
256
+ border-radius: 6px !important;
257
+ color: #00f2fe !important;
258
+ font-size: 1.1rem !important;
259
+ border: 1px solid rgba(0, 242, 254, 0.3) !important;
260
+ }
261
+ .output-box ul, .output-box ol {
262
+ font-size: 1.15rem !important;
263
+ line-height: 1.8 !important;
264
+ margin-left: 1.5rem !important;
265
+ color: #e0e0e0 !important;
266
+ }
267
+ .output-box li {
268
+ margin-bottom: 0.5rem !important;
269
+ }
270
+ /* Enhanced response area glow effect */
271
+ .output-box:hover {
272
+ border-color: rgba(0, 242, 254, 0.4) !important;
273
+ box-shadow: 0 6px 25px rgba(0, 242, 254, 0.3) !important;
274
+ transition: all 0.3s ease !important;
275
+ }
276
+ /* JSON viewer styling */
277
+ .output-box pre {
278
+ background: rgba(20, 20, 30, 0.9) !important;
279
+ border-radius: 10px !important;
280
+ padding: 1.5rem !important;
281
+ overflow-x: auto !important;
282
+ border: 1px solid rgba(0, 242, 254, 0.3) !important;
283
+ font-size: 1.05rem !important;
284
+ line-height: 1.6 !important;
285
+ }
286
+ .output-box .json-holder {
287
+ background: rgba(20, 20, 30, 0.9) !important;
288
+ border-radius: 10px !important;
289
+ padding: 1.5rem !important;
290
+ font-size: 1.05rem !important;
291
+ }
292
+ .output-box .json-key {
293
+ color: #00f2fe !important;
294
+ font-weight: 600 !important;
295
+ }
296
+ .output-box .json-string {
297
+ color: #90ee90 !important;
298
+ }
299
+ .output-box .json-number {
300
+ color: #ffa07a !important;
301
+ }
302
+ /* Special styling for AI response area */
303
+ div[data-testid="markdown"] {
304
+ background: rgba(35, 35, 50, 0.6) !important;
305
+ padding: 1.5rem !important;
306
+ border-radius: 12px !important;
307
+ border: 1px solid rgba(0, 242, 254, 0.15) !important;
308
+ }
309
+ @keyframes slideIn {
310
+ from {
311
+ opacity: 0;
312
+ transform: translateX(-20px);
313
+ }
314
+ to {
315
+ opacity: 1;
316
+ transform: translateX(0);
317
+ }
318
+ }
319
+ /* Dark tab styling */
320
+ .tab-nav button {
321
+ border-radius: 12px !important;
322
+ font-weight: 600 !important;
323
+ transition: all 0.3s ease !important;
324
+ font-size: 1.05rem !important;
325
+ background: rgba(30, 30, 45, 0.6) !important;
326
+ color: #b0b0b0 !important;
327
+ border: 1px solid rgba(0, 242, 254, 0.2) !important;
328
+ }
329
+ .tab-nav button:hover {
330
+ background: rgba(40, 40, 55, 0.8) !important;
331
+ border-color: rgba(0, 242, 254, 0.4) !important;
332
+ }
333
+ .tab-nav button[aria-selected="true"] {
334
+ background: linear-gradient(135deg, #00f2fe 0%, #4facfe 100%) !important;
335
+ color: #0a0a0f !important;
336
+ box-shadow: 0 4px 15px rgba(0, 242, 254, 0.4) !important;
337
+ border: none !important;
338
+ }
339
+ /* Info cards with pulse animation */
340
+ .info-card {
341
+ animation: pulse 2s ease-in-out infinite;
342
+ }
343
+ @keyframes pulse {
344
+ 0%, 100% { transform: scale(1); }
345
+ 50% { transform: scale(1.02); }
346
+ }
347
+ /* Dark example box styling */
348
+ .example-box {
349
+ background: linear-gradient(135deg, rgba(0, 242, 254, 0.1) 0%, rgba(79, 172, 254, 0.1) 100%);
350
+ border-radius: 16px;
351
+ padding: 1.5rem;
352
+ border-left: 4px solid #00f2fe;
353
+ margin-top: 2rem;
354
+ transition: all 0.3s ease;
355
+ animation: fadeIn 1s ease;
356
+ }
357
+ .example-box:hover {
358
+ transform: translateX(5px);
359
+ box-shadow: 0 5px 20px rgba(0, 242, 254, 0.2);
360
+ }
361
+ .example-box h3, .example-box p {
362
+ color: #e0e0e0 !important;
363
+ }
364
+ @keyframes fadeIn {
365
+ from { opacity: 0; }
366
+ to { opacity: 1; }
367
+ }
368
+ /* Dark note box with shimmer effect */
369
+ .note-box {
370
+ background: linear-gradient(135deg, rgba(0, 242, 254, 0.08) 0%, rgba(79, 172, 254, 0.08) 100%);
371
+ border-radius: 12px;
372
+ padding: 1rem;
373
+ border-left: 4px solid #00f2fe;
374
+ margin-top: 1rem;
375
+ position: relative;
376
+ overflow: hidden;
377
+ }
378
+ .note-box::before {
379
+ content: '';
380
+ position: absolute;
381
+ top: 0;
382
+ left: -100%;
383
+ width: 100%;
384
+ height: 100%;
385
+ background: linear-gradient(90deg, transparent, rgba(0, 242, 254, 0.2), transparent);
386
+ animation: shimmer 3s infinite;
387
+ }
388
+ .note-box p {
389
+ color: #00c6ff !important;
390
+ position: relative;
391
+ z-index: 1;
392
+ }
393
+ @keyframes shimmer {
394
+ 0% { left: -100%; }
395
+ 100% { left: 100%; }
396
+ }
397
+ /* Dark subtitle animation */
398
+ .subtitle {
399
+ color: #b0b0b0;
400
+ font-size: 1.3rem;
401
+ text-align: center;
402
+ margin-bottom: 2rem;
403
+ animation: fadeInDown 1s ease;
404
+ }
405
+ @keyframes fadeInDown {
406
+ from {
407
+ opacity: 0;
408
+ transform: translateY(-20px);
409
+ }
410
+ to {
411
+ opacity: 1;
412
+ transform: translateY(0);
413
+ }
414
+ }
415
+ /* Loading spinner styles */
416
+ .loading {
417
+ display: inline-block;
418
+ width: 20px;
419
+ height: 20px;
420
+ border: 3px solid rgba(0, 242, 254, 0.3);
421
+ border-radius: 50%;
422
+ border-top-color: #00f2fe;
423
+ animation: spin 1s ease-in-out infinite;
424
+ }
425
+ @keyframes spin {
426
+ to { transform: rotate(360deg); }
427
+ }
428
+ /* Neon glow effect on containers */
429
+ .main-container::before {
430
+ content: '';
431
+ position: absolute;
432
+ top: -2px;
433
+ left: -2px;
434
+ right: -2px;
435
+ bottom: -2px;
436
+ background: linear-gradient(45deg, #00f2fe, #4facfe, #00f2fe);
437
+ border-radius: 24px;
438
+ z-index: -1;
439
+ opacity: 0.3;
440
+ filter: blur(10px);
441
+ animation: neonPulse 3s ease-in-out infinite;
442
+ }
443
+ @keyframes neonPulse {
444
+ 0%, 100% { opacity: 0.3; }
445
+ 50% { opacity: 0.5; }
446
  }
447
  """
448
 
449
+ # Main interface with tabs
450
+ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as interface:
451
  gr.HTML("""
452
  <div class="main-container">
453
  <h1 class="animated-title">
454
+ <span class="floating-icon">🌐</span> Web RAG System
455
  </h1>
456
+ <p class="subtitle">
457
+ Extract content from web pages and ask questions using AI-powered retrieval
458
  </p>
459
  </div>
460
  """)
461
 
462
  with gr.Tabs() as tabs:
463
+ with gr.Tab("πŸ”— Multiple Links Analysis"):
 
 
 
 
 
 
 
 
464
  with gr.Row():
465
  with gr.Column():
466
+ url_input_multi = gr.Textbox(
467
+ label="🌍 Website URL",
468
+ placeholder="https://example.com",
469
+ elem_classes="output-box"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
470
  )
471
+ prompt_input_multi = gr.Textbox(
472
+ label="πŸ’­ Your Question",
473
+ placeholder="What is this website about?",
474
+ lines=3,
475
+ elem_classes="output-box"
476
  )
477
+ submit_btn_multi = gr.Button("✨ Analyze Multiple Links", variant="primary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
478
 
479
  with gr.Row():
480
  with gr.Column():
481
+ status_output_multi = gr.Textbox(label="πŸ“Š Status", elem_classes="output-box")
482
+ response_output_multi = gr.Markdown(label="πŸ€– AI Response", elem_classes="output-box")
483
+ sources_output_multi = gr.JSON(label="πŸ“š Sources", elem_classes="output-box")
 
 
 
 
 
 
484
 
485
+ submit_btn_multi.click(
486
+ fn=process_multiple_links,
487
+ inputs=[url_input_multi, prompt_input_multi],
488
+ outputs=[status_output_multi, response_output_multi, sources_output_multi]
489
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
490
 
491
+ with gr.Tab("🏠 Homepage Only Analysis"):
492
+ with gr.Row():
493
+ with gr.Column():
494
+ url_input_home = gr.Textbox(
495
+ label="🌍 Website URL",
496
+ placeholder="https://example.com",
497
+ elem_classes="output-box"
498
+ )
499
+ prompt_input_home = gr.Textbox(
500
+ label="πŸ’­ Your Question",
501
+ placeholder="What is this website about?",
502
+ lines=3,
503
+ elem_classes="output-box"
504
+ )
505
+ submit_btn_home = gr.Button("✨ Analyze Homepage", variant="primary")
506
 
507
+ with gr.Row():
508
+ with gr.Column():
509
+ status_output_home = gr.Textbox(label="πŸ“Š Status", elem_classes="output-box")
510
+ response_output_home = gr.Markdown(label="πŸ€– AI Response", elem_classes="output-box")
511
+ sources_output_home = gr.JSON(label="πŸ“š Sources", elem_classes="output-box")
512
 
513
+ submit_btn_home.click(
514
+ fn=process_homepage_only,
515
+ inputs=[url_input_home, prompt_input_home],
516
+ outputs=[status_output_home, response_output_home, sources_output_home]
517
  )
 
 
 
 
 
 
 
 
 
 
 
518
 
519
  gr.HTML("""
520
+ <div class="example-box">
521
+ <h3 style="margin-top: 0; font-size: 1.4rem;">
522
+ <span class="floating-icon">πŸ’‘</span> Example Usage
523
+ </h3>
524
+ <p style="font-size: 1.1rem;"><strong>URL:</strong> https://openai.com</p>
525
+ <p style="font-size: 1.1rem;"><strong>Question:</strong> What are the main products and services offered?</p>
526
+ </div>
527
+
528
+ <div class="note-box">
529
+ <p style="margin: 0; font-size: 1.05rem;">
530
+ ℹ️ <strong>Note:</strong> If you encounter connection errors, please wait a moment for the system to initialize and try again.
531
  </p>
532
  </div>
533
  """)
534
 
535
+ # Launch the interface
536
  if __name__ == "__main__":
 
 
 
 
 
 
537
  interface.launch(
538
  server_name="0.0.0.0",
539
  server_port=7860,
540
  share=False,
541
  show_error=True,
542
+ quiet=False
543
+ )