minhvtt commited on
Commit
fee9af1
·
verified ·
1 Parent(s): a356b57

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +225 -1282
main.py CHANGED
@@ -88,7 +88,7 @@ pdf_indexer = PDFIndexer(
88
  )
89
  print("✓ PDF Indexer initialized")
90
 
91
- # Initialize Multimodal PDF Indexer (for PDFs with images)
92
  multimodal_pdf_indexer = MultimodalPDFIndexer(
93
  embedding_service=embedding_service,
94
  qdrant_service=qdrant_service,
@@ -143,12 +143,6 @@ Quy tắc tuyệt đối:
143
  use_reranking: bool = True
144
  use_compression: bool = True
145
  score_threshold: float = 0.5
146
- # Advanced RAG options
147
- use_advanced_rag: bool = True
148
- use_query_expansion: bool = True
149
- use_reranking: bool = True
150
- use_compression: bool = True
151
- score_threshold: float = 0.5
152
 
153
 
154
  class ChatResponse(BaseModel):
@@ -156,7 +150,6 @@ class ChatResponse(BaseModel):
156
  context_used: List[Dict]
157
  timestamp: str
158
  rag_stats: Optional[Dict] = None # Stats from advanced RAG pipeline
159
- rag_stats: Optional[Dict] = None # Stats from advanced RAG pipeline
160
 
161
 
162
  class AddDocumentRequest(BaseModel):
@@ -170,724 +163,274 @@ class AddDocumentResponse(BaseModel):
170
  message: str
171
 
172
 
173
- class UploadPDFResponse(BaseModel):
174
- success: bool
175
- document_id: str
176
- filename: str
177
- chunks_indexed: int
178
- message: str
179
-
180
-
181
- class UploadPDFResponse(BaseModel):
182
- success: bool
183
- document_id: str
184
- filename: str
185
- chunks_indexed: int
186
- message: str
187
-
188
-
189
  @app.get("/")
190
  async def root():
191
  """Health check endpoint with comprehensive API documentation"""
192
  return {
193
  "status": "running",
194
- "service": "ChatbotRAG API - Advanced RAG with Multimodal Support",
195
- "version": "3.0.0",
196
- "service": "ChatbotRAG API - Advanced RAG with Multimodal Support",
197
- "version": "3.0.0",
198
  "vector_db": "Qdrant",
199
- "document_db": "MongoDB",
200
- "features": {
201
- "multiple_inputs": "Index up to 10 texts + 10 images per request",
202
- "advanced_rag": "Query expansion, reranking, contextual compression",
203
- "pdf_support": "Upload PDFs and chat about their content",
204
- "multimodal_pdf": "PDFs with text and image URLs - perfect for user guides",
205
- "chat_history": "Track conversation history",
206
- "hybrid_search": "Text + image search with Jina CLIP v2"
207
- },
208
- "document_db": "MongoDB",
209
- "features": {
210
- "multiple_inputs": "Index up to 10 texts + 10 images per request",
211
- "advanced_rag": "Query expansion, reranking, contextual compression",
212
- "pdf_support": "Upload PDFs and chat about their content",
213
- "multimodal_pdf": "PDFs with text and image URLs - perfect for user guides",
214
- "chat_history": "Track conversation history",
215
- "hybrid_search": "Text + image search with Jina CLIP v2"
216
- },
217
  "endpoints": {
218
- "indexing": {
219
- "POST /index": {
220
- "description": "Index multiple texts and images (NEW: up to 10 each)",
221
- "content_type": "multipart/form-data",
222
- "body": {
223
- "id": "string (required) - Document ID (primary)",
224
- "texts": "List[string] (optional) - Up to 10 texts",
225
- "images": "List[UploadFile] (optional) - Up to 10 images",
226
- "id_use": "string (optional) - ID của SocialMedia hoặc EventCode",
227
- "id_user": "string (optional) - ID của User"
228
- },
229
- "example": "curl -X POST '/index' -F 'id=doc1' -F 'id_use=social_123' -F 'id_user=user_789' -F 'texts=Text 1' -F 'images=@img1.jpg'",
230
- "indexing": {
231
- "POST /index": {
232
- "description": "Index multiple texts and images (NEW: up to 10 each)",
233
- "content_type": "multipart/form-data",
234
- "body": {
235
- "id": "string (required) - Document ID (primary)",
236
- "texts": "List[string] (optional) - Up to 10 texts",
237
- "images": "List[UploadFile] (optional) - Up to 10 images",
238
- "id_use": "string (optional) - ID của SocialMedia hoặc EventCode",
239
- "id_user": "string (optional) - ID của User"
240
  },
241
- "example": "curl -X POST '/index' -F 'id=doc1' -F 'id_use=social_123' -F 'id_user=user_789' -F 'texts=Text 1' -F 'images=@img1.jpg'",
242
  "response": {
243
- "success": True,
244
- "id": "doc1",
245
- "message": "Indexed successfully with 2 texts and 1 images"
246
- "success": True,
247
- "id": "doc1",
248
- "message": "Indexed successfully with 2 texts and 1 images"
 
 
 
 
 
249
  },
250
- "use_cases": {
251
- "social_media_post": {
252
- "id": "post_uuid_123",
253
- "id_use": "social_media_456",
254
- "id_user": "user_789",
255
- "description": "Link post to social media account and user"
256
- },
257
- "event_post": {
258
- "id": "post_uuid_789",
259
- "id_use": "event_code_ABC123",
260
- "id_user": "user_101",
261
- "description": "Link post to event and user"
262
- }
263
- }
264
- "use_cases": {
265
- "social_media_post": {
266
- "id": "post_uuid_123",
267
- "id_use": "social_media_456",
268
- "id_user": "user_789",
269
- "description": "Link post to social media account and user"
270
- },
271
- "event_post": {
272
- "id": "post_uuid_789",
273
- "id_use": "event_code_ABC123",
274
- "id_user": "user_101",
275
- "description": "Link post to event and user"
276
- }
277
- }
278
- },
279
- "POST /documents": {
280
- "description": "Add text document to knowledge base",
281
- "content_type": "application/json",
282
- "body": {
283
- "text": "string (required) - Document content",
284
- "metadata": "object (optional) - Additional metadata"
285
  },
286
- "example": {
287
- "text": "How to create event: Click 'Create Event' button...",
288
- "metadata": {"category": "tutorial", "source": "user_guide"}
289
- }
290
- },
291
- "POST /upload-pdf": {
292
- "description": "Upload PDF file (text only)",
293
- "content_type": "multipart/form-data",
294
- "body": {
295
- "file": "UploadFile (required) - PDF file",
296
- "title": "string (optional) - Document title",
297
- "category": "string (optional) - Category",
298
- "description": "string (optional) - Description"
 
 
 
 
 
 
299
  },
300
- "example": "curl -X POST '/upload-pdf' -F 'file=@guide.pdf' -F 'title=User Guide'"
 
 
 
 
301
  },
302
- "POST /upload-pdf-multimodal": {
303
- "description": "Upload PDF with text and image URLs (RECOMMENDED for user guides)",
304
- "content_type": "multipart/form-data",
305
- "features": [
306
- "Extracts text from PDF",
307
- "Detects image URLs (http://, https://)",
308
- "Supports markdown: ![alt](url)",
309
- "Supports HTML: <img src='url'>",
310
- "Links images to text chunks",
311
- "Returns images with context in chat"
312
- ],
313
- "body": {
314
- "file": "UploadFile (required) - PDF file with image URLs",
315
- "title": "string (optional) - Document title",
316
- "category": "string (optional) - e.g. 'user_guide', 'tutorial'",
317
- "description": "string (optional)"
318
- },
319
- "example": "curl -X POST '/upload-pdf-multimodal' -F 'file=@guide_with_images.pdf' -F 'category=user_guide'",
320
- "description": "Add text document to knowledge base",
321
- "content_type": "application/json",
322
- "body": {
323
- "text": "string (required) - Document content",
324
- "metadata": "object (optional) - Additional metadata"
325
  },
326
- "example": {
327
- "text": "How to create event: Click 'Create Event' button...",
328
- "metadata": {"category": "tutorial", "source": "user_guide"}
329
- }
330
- },
331
- "POST /upload-pdf": {
332
- "description": "Upload PDF file (text only)",
333
- "content_type": "multipart/form-data",
334
- "body": {
335
- "file": "UploadFile (required) - PDF file",
336
- "title": "string (optional) - Document title",
337
- "category": "string (optional) - Category",
338
- "description": "string (optional) - Description"
339
  },
340
- "example": "curl -X POST '/upload-pdf' -F 'file=@guide.pdf' -F 'title=User Guide'"
341
- },
342
- "POST /upload-pdf-multimodal": {
343
- "description": "Upload PDF with text and image URLs (RECOMMENDED for user guides)",
344
- "content_type": "multipart/form-data",
345
- "features": [
346
- "Extracts text from PDF",
347
- "Detects image URLs (http://, https://)",
348
- "Supports markdown: ![alt](url)",
349
- "Supports HTML: <img src='url'>",
350
- "Links images to text chunks",
351
- "Returns images with context in chat"
352
- ],
353
- "body": {
354
- "file": "UploadFile (required) - PDF file with image URLs",
355
- "title": "string (optional) - Document title",
356
- "category": "string (optional) - e.g. 'user_guide', 'tutorial'",
357
- "description": "string (optional)"
358
  },
359
- "example": "curl -X POST '/upload-pdf-multimodal' -F 'file=@guide_with_images.pdf' -F 'category=user_guide'",
360
- "response": {
361
  "success": True,
362
- "document_id": "pdf_multimodal_20251029_150000",
363
- "chunks_indexed": 25,
364
- "message": "PDF indexed with 25 chunks and 15 images"
365
- "success": True,
366
- "document_id": "pdf_multimodal_20251029_150000",
367
- "chunks_indexed": 25,
368
- "message": "PDF indexed with 25 chunks and 15 images"
369
- },
370
- "use_case": "Perfect for user guides with screenshots, tutorials with diagrams"
371
- }
372
- },
373
- "search": {
374
- "POST /search": {
375
- "description": "Hybrid search with text and/or image",
376
- "body": {
377
- "text": "string (optional) - Query text",
378
- "image": "UploadFile (optional) - Query image",
379
- "limit": "int (default: 10)",
380
- "score_threshold": "float (optional, 0-1)",
381
- "text_weight": "float (default: 0.5)",
382
- "image_weight": "float (default: 0.5)"
383
- }
384
- },
385
- "POST /search/text": {
386
- "description": "Text-only search",
387
- "body": {"text": "string", "limit": "int", "score_threshold": "float"}
388
- },
389
- "POST /search/image": {
390
- "description": "Image-only search",
391
- "body": {"image": "UploadFile", "limit": "int", "score_threshold": "float"}
392
- "use_case": "Perfect for user guides with screenshots, tutorials with diagrams"
393
- }
394
- },
395
- "search": {
396
- "POST /search": {
397
- "description": "Hybrid search with text and/or image",
398
- "body": {
399
- "text": "string (optional) - Query text",
400
- "image": "UploadFile (optional) - Query image",
401
- "limit": "int (default: 10)",
402
- "score_threshold": "float (optional, 0-1)",
403
- "text_weight": "float (default: 0.5)",
404
- "image_weight": "float (default: 0.5)"
405
  }
406
  },
407
- "POST /search/text": {
408
- "description": "Text-only search",
409
- "body": {"text": "string", "limit": "int", "score_threshold": "float"}
410
- },
411
- "POST /search/image": {
412
- "description": "Image-only search",
413
- "body": {"image": "UploadFile", "limit": "int", "score_threshold": "float"}
414
- },
415
  "POST /rag/search": {
416
- "description": "Search in RAG knowledge base",
417
- "body": {"query": "string", "top_k": "int (default: 5)", "score_threshold": "float (default: 0.5)"}
418
- }
419
- },
420
- "chat": {
421
- "POST /chat": {
422
- "description": "Chat với Advanced RAG (Query expansion + Reranking + Compression)",
423
- "content_type": "application/json",
424
- "body": {
425
- "message": "string (required) - User question",
426
- "use_rag": "bool (default: true) - Enable RAG retrieval",
427
- "use_advanced_rag": "bool (default: true) - Use advanced RAG pipeline (RECOMMENDED)",
428
- "use_query_expansion": "bool (default: true) - Expand query with variations",
429
- "use_reranking": "bool (default: true) - Rerank results for accuracy",
430
- "use_compression": "bool (default: true) - Compress context to relevant parts",
431
- "top_k": "int (default: 3) - Number of documents to retrieve",
432
- "score_threshold": "float (default: 0.5) - Min relevance score (0-1)",
433
- "max_tokens": "int (default: 512) - Max response tokens",
434
- "temperature": "float (default: 0.7) - Creativity (0-1)",
435
- "hf_token": "string (optional) - Hugging Face token"
436
- },
437
- "response": {
438
- "response": "string - AI answer",
439
- "context_used": "array - Retrieved documents with metadata",
440
- "timestamp": "string",
441
- "rag_stats": "object - RAG pipeline statistics (query variants, retrieval counts)"
442
  },
443
- "example_advanced": {
444
- "message": "Làm sao để upload PDF có hình ảnh?",
445
- "use_advanced_rag": True,
446
- "use_reranking": True,
447
- "top_k": 5,
448
- "score_threshold": 0.5
449
- "description": "Search in RAG knowledge base",
450
- "body": {"query": "string", "top_k": "int (default: 5)", "score_threshold": "float (default: 0.5)"}
451
- }
452
- },
453
- "chat": {
454
- "POST /chat": {
455
- "description": "Chat với Advanced RAG (Query expansion + Reranking + Compression)",
456
- "content_type": "application/json",
457
- "body": {
458
- "message": "string (required) - User question",
459
- "use_rag": "bool (default: true) - Enable RAG retrieval",
460
- "use_advanced_rag": "bool (default: true) - Use advanced RAG pipeline (RECOMMENDED)",
461
- "use_query_expansion": "bool (default: true) - Expand query with variations",
462
- "use_reranking": "bool (default: true) - Rerank results for accuracy",
463
- "use_compression": "bool (default: true) - Compress context to relevant parts",
464
- "top_k": "int (default: 3) - Number of documents to retrieve",
465
- "score_threshold": "float (default: 0.5) - Min relevance score (0-1)",
466
- "max_tokens": "int (default: 512) - Max response tokens",
467
- "temperature": "float (default: 0.7) - Creativity (0-1)",
468
- "hf_token": "string (optional) - Hugging Face token"
469
  },
470
  "response": {
471
- "response": "string - AI answer",
472
- "context_used": "array - Retrieved documents with metadata",
473
- "timestamp": "string",
474
- "rag_stats": "object - RAG pipeline statistics (query variants, retrieval counts)"
475
- },
476
- "example_advanced": {
477
- "message": "Làm sao để upload PDF có hình ảnh?",
478
- "use_advanced_rag": True,
479
- "use_reranking": True,
480
- "top_k": 5,
481
- "score_threshold": 0.5
482
- },
483
- "example_response_with_images": {
484
- "response": "Để upload PDF có hình ảnh, sử dụng endpoint /upload-pdf-multimodal...",
485
- "context_used": [
486
  {
487
- "id": "pdf_multimodal_...._p2_c1",
488
- "confidence": 0.89,
489
- "metadata": {
490
- "text": "Bước 1: Chuẩn bị PDF với image URLs...",
491
- "has_images": True,
492
- "image_urls": [
493
- "https://example.com/screenshot1.png",
494
- "https://example.com/diagram.jpg"
495
- ],
496
- "num_images": 2,
497
- "page": 2
498
- }
499
  }
500
  ],
501
- "rag_stats": {
502
- "original_query": "Làm sao để upload PDF có hình ảnh?",
503
- "expanded_queries": ["upload PDF hình ảnh", "PDF có ảnh"],
504
- "initial_results": 10,
505
- "after_rerank": 5,
506
- "after_compression": 5
507
- }
508
  },
509
- "notes": [
510
- "Advanced RAG significantly improves answer quality",
511
- "When multimodal PDF is used, images are returned in metadata",
512
- "Requires HUGGINGFACE_TOKEN for actual LLM generation"
513
- ]
514
- "example_response_with_images": {
515
- "response": "Để upload PDF có hình ảnh, sử dụng endpoint /upload-pdf-multimodal...",
516
- "context_used": [
517
  {
518
- "id": "pdf_multimodal_...._p2_c1",
519
- "confidence": 0.89,
520
- "metadata": {
521
- "text": "Bước 1: Chuẩn bị PDF với image URLs...",
522
- "has_images": True,
523
- "image_urls": [
524
- "https://example.com/screenshot1.png",
525
- "https://example.com/diagram.jpg"
526
- ],
527
- "num_images": 2,
528
- "page": 2
529
- }
530
  }
531
  ],
532
- "rag_stats": {
533
- "original_query": "Làm sao để upload PDF có hình ảnh?",
534
- "expanded_queries": ["upload PDF hình ảnh", "PDF có ảnh"],
535
- "initial_results": 10,
536
- "after_rerank": 5,
537
- "after_compression": 5
 
 
 
538
  }
539
  },
540
- "notes": [
541
- "Advanced RAG significantly improves answer quality",
542
- "When multimodal PDF is used, images are returned in metadata",
543
- "Requires HUGGINGFACE_TOKEN for actual LLM generation"
544
- ]
545
- },
546
- "GET /history": {
547
- "description": "Get chat history",
548
- "query_params": {"limit": "int (default: 10)", "skip": "int (default: 0)"},
549
- "response": {"history": "array", "total": "int"}
550
- }
551
- },
552
- "management": {
553
- "GET /documents/pdf": {
554
- "description": "List all PDF documents",
555
- "response": {"documents": "array", "total": "int"}
556
- },
557
- "DELETE /documents/pdf/{document_id}": {
558
- "description": "Delete PDF and all its chunks",
559
- "response": {"success": "bool", "message": "string"}
560
- },
561
- "GET /document/{doc_id}": {
562
- "description": "Get document by ID",
563
- "response": {"success": "bool", "data": "object"}
564
- },
565
- "DELETE /delete/{doc_id}": {
566
- "description": "Delete document by ID",
567
- "response": {"success": "bool", "message": "string"}
568
- },
569
- "GET /stats": {
570
- "description": "Get Qdrant collection statistics",
571
- "response": {"vectors_count": "int", "segments": "int", "indexed_vectors_count": "int"}
572
  }
573
  }
574
  },
575
- "quick_start": {
576
- "1_upload_multimodal_pdf": "curl -X POST '/upload-pdf-multimodal' -F 'file=@user_guide.pdf' -F 'title=Guide'",
577
- "2_verify_upload": "curl '/documents/pdf'",
578
- "3_chat_with_rag": "curl -X POST '/chat' -H 'Content-Type: application/json' -d '{\"message\": \"How to...?\", \"use_advanced_rag\": true}'",
579
- "4_see_images_in_context": "response['context_used'][0]['metadata']['image_urls']"
580
- },
581
- "use_cases": {
582
- "user_guide_with_screenshots": {
583
- "endpoint": "/upload-pdf-multimodal",
584
- "description": "PDFs with text instructions + image URLs for visual guidance",
585
- "benefits": ["Images linked to text chunks", "Chatbot returns relevant screenshots", "Perfect for step-by-step guides"]
586
- },
587
- "simple_text_docs": {
588
- "endpoint": "/upload-pdf",
589
- "description": "Simple PDFs with text only (FAQ, policies, etc.)"
590
- },
591
- "social_media_posts": {
592
- "endpoint": "/index",
593
- "description": "Index multiple posts with texts (up to 10) and images (up to 10)"
594
- },
595
- "complex_queries": {
596
- "endpoint": "/chat",
597
- "description": "Use advanced RAG for better accuracy on complex questions",
598
- "settings": {"use_advanced_rag": True, "use_reranking": True, "use_compression": True}
599
- }
600
- },
601
- "best_practices": {
602
- "pdf_format": [
603
- "Include image URLs in text (http://, https://)",
604
- "Use markdown format: ![alt](url) or HTML: <img src='url'>",
605
- "Clear structure with headings and sections",
606
- "Link images close to their related text"
607
- ],
608
- "chat_settings": {
609
- "for_accuracy": {"temperature": 0.3, "use_advanced_rag": True, "use_reranking": True},
610
- "for_creativity": {"temperature": 0.8, "use_advanced_rag": False},
611
- "for_factual_answers": {"temperature": 0.3, "use_compression": True, "score_threshold": 0.6}
612
- },
613
- "retrieval_tuning": {
614
- "not_finding_info": "Lower score_threshold to 0.3-0.4, increase top_k to 7-10",
615
- "too_much_context": "Increase score_threshold to 0.6-0.7, decrease top_k to 3-5",
616
- "slow_responses": "Disable compression, use basic RAG, decrease top_k"
617
- }
618
- "description": "Get chat history",
619
- "query_params": {"limit": "int (default: 10)", "skip": "int (default: 0)"},
620
- "response": {"history": "array", "total": "int"}
621
- }
622
- },
623
- "management": {
624
- "GET /documents/pdf": {
625
- "description": "List all PDF documents",
626
- "response": {"documents": "array", "total": "int"}
627
- },
628
- "DELETE /documents/pdf/{document_id}": {
629
- "description": "Delete PDF and all its chunks",
630
- "response": {"success": "bool", "message": "string"}
631
- },
632
- "GET /document/{doc_id}": {
633
- "description": "Get document by ID",
634
- "response": {"success": "bool", "data": "object"}
635
- },
636
- "DELETE /delete/{doc_id}": {
637
- "description": "Delete document by ID",
638
- "response": {"success": "bool", "message": "string"}
639
- },
640
- "GET /stats": {
641
- "description": "Get Qdrant collection statistics",
642
- "response": {"vectors_count": "int", "segments": "int", "indexed_vectors_count": "int"}
643
- }
644
- }
645
  },
646
- "quick_start": {
647
- "1_upload_multimodal_pdf": "curl -X POST '/upload-pdf-multimodal' -F 'file=@user_guide.pdf' -F 'title=Guide'",
648
- "2_verify_upload": "curl '/documents/pdf'",
649
- "3_chat_with_rag": "curl -X POST '/chat' -H 'Content-Type: application/json' -d '{\"message\": \"How to...?\", \"use_advanced_rag\": true}'",
650
- "4_see_images_in_context": "response['context_used'][0]['metadata']['image_urls']"
651
  },
652
- "use_cases": {
653
- "user_guide_with_screenshots": {
654
- "endpoint": "/upload-pdf-multimodal",
655
- "description": "PDFs with text instructions + image URLs for visual guidance",
656
- "benefits": ["Images linked to text chunks", "Chatbot returns relevant screenshots", "Perfect for step-by-step guides"]
657
- },
658
- "simple_text_docs": {
659
- "endpoint": "/upload-pdf",
660
- "description": "Simple PDFs with text only (FAQ, policies, etc.)"
661
- },
662
- "social_media_posts": {
663
- "endpoint": "/index",
664
- "description": "Index multiple posts with texts (up to 10) and images (up to 10)"
665
- },
666
- "complex_queries": {
667
- "endpoint": "/chat",
668
- "description": "Use advanced RAG for better accuracy on complex questions",
669
- "settings": {"use_advanced_rag": True, "use_reranking": True, "use_compression": True}
670
- }
671
  },
672
- "best_practices": {
673
- "pdf_format": [
674
- "Include image URLs in text (http://, https://)",
675
- "Use markdown format: ![alt](url) or HTML: <img src='url'>",
676
- "Clear structure with headings and sections",
677
- "Link images close to their related text"
678
- ],
679
- "chat_settings": {
680
- "for_accuracy": {"temperature": 0.3, "use_advanced_rag": True, "use_reranking": True},
681
- "for_creativity": {"temperature": 0.8, "use_advanced_rag": False},
682
- "for_factual_answers": {"temperature": 0.3, "use_compression": True, "score_threshold": 0.6}
683
- },
684
- "retrieval_tuning": {
685
- "not_finding_info": "Lower score_threshold to 0.3-0.4, increase top_k to 7-10",
686
- "too_much_context": "Increase score_threshold to 0.6-0.7, decrease top_k to 3-5",
687
- "slow_responses": "Disable compression, use basic RAG, decrease top_k"
688
- }
689
  },
690
  "links": {
691
  "docs": "http://localhost:8000/docs",
692
  "redoc": "http://localhost:8000/redoc",
693
- "openapi": "http://localhost:8000/openapi.json",
694
- "guides": {
695
- "multimodal_pdf": "See MULTIMODAL_PDF_GUIDE.md",
696
- "advanced_rag": "See ADVANCED_RAG_GUIDE.md",
697
- "pdf_general": "See PDF_RAG_GUIDE.md",
698
- "quick_start": "See QUICK_START_PDF.md"
699
- }
700
- },
701
- "system_info": {
702
- "embedding_model": "Jina CLIP v2 (multimodal)",
703
- "vector_db": "Qdrant with HNSW index",
704
- "document_db": "MongoDB",
705
- "rag_pipeline": "Advanced RAG with query expansion, reranking, compression",
706
- "pdf_parser": "pypdfium2 with URL extraction",
707
- "max_inputs": "10 texts + 10 images per /index request"
708
- "openapi": "http://localhost:8000/openapi.json",
709
- "guides": {
710
- "multimodal_pdf": "See MULTIMODAL_PDF_GUIDE.md",
711
- "advanced_rag": "See ADVANCED_RAG_GUIDE.md",
712
- "pdf_general": "See PDF_RAG_GUIDE.md",
713
- "quick_start": "See QUICK_START_PDF.md"
714
- }
715
- },
716
- "system_info": {
717
- "embedding_model": "Jina CLIP v2 (multimodal)",
718
- "vector_db": "Qdrant with HNSW index",
719
- "document_db": "MongoDB",
720
- "rag_pipeline": "Advanced RAG with query expansion, reranking, compression",
721
- "pdf_parser": "pypdfium2 with URL extraction",
722
- "max_inputs": "10 texts + 10 images per /index request"
723
  }
724
  }
725
 
726
  @app.post("/index", response_model=IndexResponse)
727
  async def index_data(
728
  id: str = Form(...),
729
- texts: Optional[List[str]] = Form(None),
730
- images: Optional[List[UploadFile]] = File(None),
731
- id_use: Optional[str] = Form(None),
732
- id_user: Optional[str] = Form(None)
733
- texts: Optional[List[str]] = Form(None),
734
- images: Optional[List[UploadFile]] = File(None),
735
- id_use: Optional[str] = Form(None),
736
- id_user: Optional[str] = Form(None)
737
  ):
738
  """
739
- Index data vào vector database (hỗ trợ nhiều texts và images)
740
- Index data vào vector database (hỗ trợ nhiều texts và images)
741
 
742
  Body:
743
- - id: Document ID (primary ID)
744
- - texts: List of text contents (tiếng Việt supported) - Tối đa 10 texts
745
- - images: List of image files (optional) - Tối đa 10 images
746
- - id_use: ID của SocialMedia hoặc EventCode (optional)
747
- - id_user: ID của User (optional)
748
- - id: Document ID (primary ID)
749
- - texts: List of text contents (tiếng Việt supported) - Tối đa 10 texts
750
- - images: List of image files (optional) - Tối đa 10 images
751
- - id_use: ID của SocialMedia hoặc EventCode (optional)
752
- - id_user: ID của User (optional)
753
 
754
  Returns:
755
  - success: True/False
756
  - id: Document ID
757
  - message: Status message
758
-
759
- Example:
760
- ```bash
761
- curl -X POST '/index' \
762
- -F 'id=doc123' \
763
- -F 'id_use=social_media_456' \
764
- -F 'id_user=user_789' \
765
- -F 'texts=Post content 1' \
766
- -F 'texts=Post content 2' \
767
- -F 'images=@image1.jpg'
768
- ```
769
-
770
- Example:
771
- ```bash
772
- curl -X POST '/index' \
773
- -F 'id=doc123' \
774
- -F 'id_use=social_media_456' \
775
- -F 'id_user=user_789' \
776
- -F 'texts=Post content 1' \
777
- -F 'texts=Post content 2' \
778
- -F 'images=@image1.jpg'
779
- ```
780
  """
781
  try:
782
- # Validation
783
- if texts is None and images is None:
784
- raise HTTPException(status_code=400, detail="Phải cung cấp ít nhất texts hoặc images")
785
-
786
- if texts and len(texts) > 10:
787
- raise HTTPException(status_code=400, detail="Tối đa 10 texts")
788
-
789
- if images and len(images) > 10:
790
- raise HTTPException(status_code=400, detail="Tối đa 10 images")
791
-
792
- # Validation
793
- if texts is None and images is None:
794
- raise HTTPException(status_code=400, detail="Phải cung cấp ít nhất texts hoặc images")
795
-
796
- if texts and len(texts) > 10:
797
- raise HTTPException(status_code=400, detail="Tối đa 10 texts")
798
-
799
- if images and len(images) > 10:
800
- raise HTTPException(status_code=400, detail="Tối đa 10 images")
801
-
802
  # Prepare embeddings
803
- text_embeddings = []
804
- image_embeddings = []
805
- text_embeddings = []
806
- image_embeddings = []
807
-
808
- # Encode multiple texts (tiếng Việt)
809
- if texts:
810
- for text in texts:
811
- if text and text.strip():
812
- text_emb = embedding_service.encode_text(text)
813
- text_embeddings.append(text_emb)
814
- # Encode multiple texts (tiếng Việt)
815
- if texts:
816
- for text in texts:
817
- if text and text.strip():
818
- text_emb = embedding_service.encode_text(text)
819
- text_embeddings.append(text_emb)
820
-
821
- # Encode multiple images
822
- if images:
823
- for image in images:
824
- if image.filename: # Check if image is provided
825
- image_bytes = await image.read()
826
- pil_image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
827
- image_emb = embedding_service.encode_image(pil_image)
828
- image_embeddings.append(image_emb)
829
- # Encode multiple images
830
- if images:
831
- for image in images:
832
- if image.filename: # Check if image is provided
833
- image_bytes = await image.read()
834
- pil_image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
835
- image_emb = embedding_service.encode_image(pil_image)
836
- image_embeddings.append(image_emb)
837
-
838
- # Combine embeddings
839
- all_embeddings = []
840
-
841
- if text_embeddings:
842
- # Average all text embeddings
843
- avg_text_embedding = np.mean(text_embeddings, axis=0)
844
- all_embeddings.append(avg_text_embedding)
845
-
846
- if image_embeddings:
847
- # Average all image embeddings
848
- avg_image_embedding = np.mean(image_embeddings, axis=0)
849
- all_embeddings.append(avg_image_embedding)
850
-
851
- if not all_embeddings:
852
- raise HTTPException(status_code=400, detail="Không có embedding nào được tạo từ texts hoặc images")
853
-
854
- # Final combined embedding
855
- combined_embedding = np.mean(all_embeddings, axis=0)
856
- all_embeddings = []
857
-
858
- if text_embeddings:
859
- # Average all text embeddings
860
- avg_text_embedding = np.mean(text_embeddings, axis=0)
861
- all_embeddings.append(avg_text_embedding)
862
 
863
- if image_embeddings:
864
- # Average all image embeddings
865
- avg_image_embedding = np.mean(image_embeddings, axis=0)
866
- all_embeddings.append(avg_image_embedding)
867
 
868
- if not all_embeddings:
869
- raise HTTPException(status_code=400, detail="Không có embedding nào được tạo từ texts hoặc images")
 
 
 
870
 
871
- # Final combined embedding
872
- combined_embedding = np.mean(all_embeddings, axis=0)
 
 
 
 
 
 
 
 
873
 
874
  # Normalize
875
  combined_embedding = combined_embedding / np.linalg.norm(combined_embedding, axis=1, keepdims=True)
876
 
877
  # Index vào Qdrant
878
  metadata = {
879
- "texts": texts if texts else [],
880
- "text_count": len(texts) if texts else 0,
881
- "image_count": len(images) if images else 0,
882
- "image_filenames": [img.filename for img in images] if images else [],
883
- "id_use": id_use if id_use else None, # ID của SocialMedia hoặc EventCode
884
- "id_user": id_user if id_user else None # ID của User
885
- "texts": texts if texts else [],
886
- "text_count": len(texts) if texts else 0,
887
- "image_count": len(images) if images else 0,
888
- "image_filenames": [img.filename for img in images] if images else [],
889
- "id_use": id_use if id_use else None, # ID của SocialMedia hoặc EventCode
890
- "id_user": id_user if id_user else None # ID của User
891
  }
892
 
893
  result = qdrant_service.index_data(
@@ -899,14 +442,9 @@ async def index_data(
899
  return IndexResponse(
900
  success=True,
901
  id=result["original_id"], # Trả về MongoDB ObjectId
902
- message=f"Đã index thành công document {result['original_id']} với {len(texts) if texts else 0} texts và {len(images) if images else 0} images (Qdrant UUID: {result['qdrant_id']})"
903
- message=f"Đã index thành công document {result['original_id']} với {len(texts) if texts else 0} texts và {len(images) if images else 0} images (Qdrant UUID: {result['qdrant_id']})"
904
  )
905
 
906
- except HTTPException:
907
- raise
908
- except HTTPException:
909
- raise
910
  except Exception as e:
911
  raise HTTPException(status_code=500, detail=f"Lỗi khi index: {str(e)}")
912
 
@@ -1129,8 +667,7 @@ async def get_stats():
1129
  @app.post("/chat", response_model=ChatResponse)
1130
  async def chat(request: ChatRequest):
1131
  """
1132
- Chat endpoint với Advanced RAG
1133
- Chat endpoint với Advanced RAG
1134
 
1135
  Body:
1136
  - message: User message
@@ -1140,137 +677,38 @@ async def chat(request: ChatRequest):
1140
  - max_tokens: Max tokens for response (default: 512)
1141
  - temperature: Temperature for generation (default: 0.7)
1142
  - hf_token: Hugging Face token (optional, sẽ dùng env nếu không truyền)
1143
- - use_advanced_rag: Use advanced RAG pipeline (default: true)
1144
- - use_query_expansion: Enable query expansion (default: true)
1145
- - use_reranking: Enable reranking (default: true)
1146
- - use_compression: Enable context compression (default: true)
1147
- - score_threshold: Minimum relevance score (default: 0.5)
1148
- - use_advanced_rag: Use advanced RAG pipeline (default: true)
1149
- - use_query_expansion: Enable query expansion (default: true)
1150
- - use_reranking: Enable reranking (default: true)
1151
- - use_compression: Enable context compression (default: true)
1152
- - score_threshold: Minimum relevance score (default: 0.5)
1153
 
1154
  Returns:
1155
  - response: Generated response
1156
  - context_used: Retrieved context documents
1157
  - timestamp: Response timestamp
1158
- - rag_stats: Statistics from RAG pipeline
1159
- - rag_stats: Statistics from RAG pipeline
1160
  """
1161
  try:
1162
- # ============================================
1163
- # CAG Layer: Check Semantic Cache First
1164
- # ============================================
1165
- cache_hit = None
1166
- if cag_service and request.use_rag:
1167
- cache_hit = cag_service.check_cache(request.message)
1168
-
1169
- if cache_hit:
1170
- # Cache hit! Return cached response immediately
1171
- return ChatResponse(
1172
- response=cache_hit["response"],
1173
- context_used=cache_hit["context_used"],
1174
- timestamp=datetime.utcnow().isoformat(),
1175
- rag_stats={
1176
- **cache_hit.get("rag_stats", {}),
1177
- "cache_hit": True,
1178
- "cached_query": cache_hit["cached_query"],
1179
- "similarity_score": cache_hit["similarity_score"],
1180
- "cached_at": cache_hit["cached_at"]
1181
- }
1182
- )
1183
-
1184
- # ============================================
1185
- # RAG Pipeline (if cache miss)
1186
- # ============================================
1187
  # Retrieve context if RAG enabled
1188
  context_used = []
1189
- rag_stats = None
1190
-
1191
- rag_stats = None
1192
-
1193
  if request.use_rag:
1194
- if request.use_advanced_rag:
1195
- # Initialize LLM client for query expansion
1196
- hf_client = None
1197
- if request.hf_token or hf_token:
1198
- hf_client = InferenceClient(token=request.hf_token or hf_token)
1199
-
1200
- # Use Advanced RAG Pipeline (Best Case 2025)
1201
- documents, stats = advanced_rag.hybrid_rag_pipeline(
1202
- query=request.message,
1203
- top_k=request.top_k,
1204
- score_threshold=request.score_threshold,
1205
- use_reranking=request.use_reranking,
1206
- use_compression=request.use_compression,
1207
- use_query_expansion=request.use_query_expansion,
1208
- max_context_tokens=500,
1209
- hf_client=hf_client
1210
- )
1211
-
1212
- # Convert to dict format for compatibility
1213
- context_used = [
1214
- {
1215
- "id": doc.id,
1216
- "confidence": doc.confidence,
1217
- "metadata": doc.metadata
1218
- }
1219
- for doc in documents
1220
- ]
1221
- rag_stats = stats
1222
-
1223
- # Format context using advanced RAG formatter
1224
- context_text = advanced_rag.format_context_for_llm(documents)
1225
-
1226
- else:
1227
- # Use basic RAG (original implementation)
1228
- query_embedding = embedding_service.encode_text(request.message)
1229
-
1230
- results = qdrant_service.search(
1231
- query_embedding=query_embedding,
1232
- limit=request.top_k,
1233
- score_threshold=request.score_threshold
1234
- )
1235
- context_used = results
1236
-
1237
- # Build context text (basic format)
1238
- context_text = "\n\nRelevant Context:\n"
1239
- for i, doc in enumerate(context_used, 1):
1240
- doc_text = doc["metadata"].get("text", "")
1241
- confidence = doc["confidence"]
1242
- context_text += f"\n[{i}] (Confidence: {confidence:.2f})\n{doc_text}\n"
1243
- # Build context text (basic format)
1244
- context_text = "\n\nRelevant Context:\n"
1245
- for i, doc in enumerate(context_used, 1):
1246
- doc_text = doc["metadata"].get("text", "")
1247
- confidence = doc["confidence"]
1248
- context_text += f"\n[{i}] (Confidence: {confidence:.2f})\n{doc_text}\n"
1249
-
1250
- # Build system message with context
1251
- if request.use_rag and context_used:
1252
- if request.use_advanced_rag:
1253
- # Use advanced prompt builder
1254
- system_message = advanced_rag.build_rag_prompt(
1255
- query=request.message,
1256
- context=context_text,
1257
- system_message=request.system_message
1258
- )
1259
- else:
1260
- # Basic prompt
1261
- system_message = f"{request.system_message}\n{context_text}\n\nPlease use the above context to answer the user's question when relevant."
1262
- # Build system message with context
1263
- if request.use_rag and context_used:
1264
- if request.use_advanced_rag:
1265
- # Use advanced prompt builder
1266
- system_message = advanced_rag.build_rag_prompt(
1267
- query=request.message,
1268
- context=context_text,
1269
- system_message=request.system_message
1270
- )
1271
- else:
1272
- # Basic prompt
1273
- system_message = f"{request.system_message}\n{context_text}\n\nPlease use the above context to answer the user's question when relevant."
1274
  else:
1275
  system_message = request.system_message
1276
 
@@ -1330,28 +768,11 @@ Example:
1330
  "timestamp": datetime.utcnow()
1331
  }
1332
  chat_history_collection.insert_one(chat_data)
1333
-
1334
- # ============================================
1335
- # CAG: Save to Cache (if RAG was used)
1336
- # ============================================
1337
- if cag_service and request.use_rag and context_used and response:
1338
- try:
1339
- cag_service.save_to_cache(
1340
- query=request.message,
1341
- response=response,
1342
- context_used=context_used,
1343
- rag_stats=rag_stats
1344
- )
1345
- except Exception as cache_error:
1346
- print(f"Warning: Failed to save to cache: {cache_error}")
1347
 
1348
  return ChatResponse(
1349
  response=response,
1350
  context_used=context_used,
1351
- timestamp=datetime.utcnow().isoformat(),
1352
- rag_stats=rag_stats
1353
- timestamp=datetime.utcnow().isoformat(),
1354
- rag_stats=rag_stats
1355
  )
1356
 
1357
  except Exception as e:
@@ -1511,484 +932,6 @@ async def delete_document_from_kb(doc_id: str):
1511
  raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
1512
 
1513
 
1514
- @app.post("/upload-pdf", response_model=UploadPDFResponse)
1515
- async def upload_pdf(
1516
- file: UploadFile = File(...),
1517
- document_id: Optional[str] = Form(None),
1518
- title: Optional[str] = Form(None),
1519
- description: Optional[str] = Form(None),
1520
- category: Optional[str] = Form(None)
1521
- ):
1522
- """
1523
- Upload and index PDF file into knowledge base
1524
-
1525
- Body (multipart/form-data):
1526
- - file: PDF file (required)
1527
- - document_id: Custom document ID (optional, auto-generated if not provided)
1528
- - title: Document title (optional)
1529
- - description: Document description (optional)
1530
- - category: Document category (optional, e.g., "user_guide", "faq")
1531
-
1532
- Returns:
1533
- - success: True/False
1534
- - document_id: Document ID
1535
- - filename: Original filename
1536
- - chunks_indexed: Number of chunks created
1537
- - message: Status message
1538
-
1539
- Example:
1540
- ```bash
1541
- curl -X POST "http://localhost:8000/upload-pdf" \
1542
- -F "file=@user_guide.pdf" \
1543
- -F "title=Hướng dẫn sử dụng ChatbotRAG" \
1544
- -F "category=user_guide"
1545
- ```
1546
- """
1547
- try:
1548
- # Validate file type
1549
- if not file.filename.endswith('.pdf'):
1550
- raise HTTPException(status_code=400, detail="Only PDF files are allowed")
1551
-
1552
- # Generate document ID if not provided
1553
- if not document_id:
1554
- from datetime import datetime
1555
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1556
- document_id = f"pdf_{timestamp}"
1557
-
1558
- # Read PDF bytes
1559
- pdf_bytes = await file.read()
1560
-
1561
- # Prepare metadata
1562
- metadata = {}
1563
- if title:
1564
- metadata['title'] = title
1565
- if description:
1566
- metadata['description'] = description
1567
- if category:
1568
- metadata['category'] = category
1569
-
1570
- # Index PDF
1571
- result = pdf_indexer.index_pdf_bytes(
1572
- pdf_bytes=pdf_bytes,
1573
- document_id=document_id,
1574
- filename=file.filename,
1575
- document_metadata=metadata
1576
- )
1577
-
1578
- return UploadPDFResponse(
1579
- success=True,
1580
- document_id=result['document_id'],
1581
- filename=result['filename'],
1582
- chunks_indexed=result['chunks_indexed'],
1583
- message=f"PDF '{file.filename}' đã được index thành công với {result['chunks_indexed']} chunks"
1584
- )
1585
-
1586
- except HTTPException:
1587
- raise
1588
- except Exception as e:
1589
- raise HTTPException(status_code=500, detail=f"Error uploading PDF: {str(e)}")
1590
-
1591
-
1592
- @app.get("/documents/pdf")
1593
- async def list_pdf_documents():
1594
- """
1595
- List all PDF documents in knowledge base
1596
-
1597
- Returns:
1598
- - documents: List of PDF documents with metadata
1599
- """
1600
- try:
1601
- docs = list(documents_collection.find(
1602
- {"type": "pdf"},
1603
- {"_id": 0}
1604
- ))
1605
- return {"documents": docs, "total": len(docs)}
1606
- except Exception as e:
1607
- raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
1608
-
1609
-
1610
- @app.delete("/documents/pdf/{document_id}")
1611
- async def delete_pdf_document(document_id: str):
1612
- """
1613
- Delete PDF document and all its chunks from knowledge base
1614
-
1615
- Args:
1616
- - document_id: Document ID
1617
-
1618
- Returns:
1619
- - success: True/False
1620
- - message: Status message
1621
- """
1622
- try:
1623
- # Get document info
1624
- doc = documents_collection.find_one({"document_id": document_id, "type": "pdf"})
1625
-
1626
- if not doc:
1627
- raise HTTPException(status_code=404, detail=f"PDF document {document_id} not found")
1628
-
1629
- # Delete all chunks from Qdrant
1630
- chunk_ids = doc.get('chunk_ids', [])
1631
- for chunk_id in chunk_ids:
1632
- try:
1633
- qdrant_service.delete_by_id(chunk_id)
1634
- except:
1635
- pass # Chunk might already be deleted
1636
-
1637
- # Delete from MongoDB
1638
- documents_collection.delete_one({"document_id": document_id})
1639
-
1640
- return {
1641
- "success": True,
1642
- "message": f"PDF document {document_id} and {len(chunk_ids)} chunks deleted"
1643
- }
1644
-
1645
- except HTTPException:
1646
- raise
1647
- except Exception as e:
1648
- raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
1649
-
1650
-
1651
- @app.post("/upload-pdf-multimodal", response_model=UploadPDFResponse)
1652
- async def upload_pdf_multimodal(
1653
- file: UploadFile = File(...),
1654
- document_id: Optional[str] = Form(None),
1655
- title: Optional[str] = Form(None),
1656
- description: Optional[str] = Form(None),
1657
- category: Optional[str] = Form(None)
1658
- ):
1659
- """
1660
- Upload PDF with text and image URLs (for user guides with screenshots)
1661
-
1662
- This endpoint is optimized for PDFs containing:
1663
- - Text instructions
1664
- - Image URLs (http://... or https://...)
1665
- - Markdown images: ![alt](url)
1666
- - HTML images: <img src="url">
1667
-
1668
- The system will:
1669
- 1. Extract text from PDF
1670
- 2. Detect all image URLs in the text
1671
- 3. Link images to their corresponding text chunks
1672
- 4. Store image URLs in metadata
1673
- 5. Return images along with text during chat
1674
-
1675
- Body (multipart/form-data):
1676
- - file: PDF file (required)
1677
- - document_id: Custom document ID (optional, auto-generated if not provided)
1678
- - title: Document title (optional)
1679
- - description: Document description (optional)
1680
- - category: Document category (optional, e.g., "user_guide", "tutorial")
1681
-
1682
- Returns:
1683
- - success: True/False
1684
- - document_id: Document ID
1685
- - filename: Original filename
1686
- - chunks_indexed: Number of chunks created
1687
- - message: Status message (includes image count)
1688
-
1689
- Example:
1690
- ```bash
1691
- curl -X POST "http://localhost:8000/upload-pdf-multimodal" \
1692
- -F "file=@user_guide_with_images.pdf" \
1693
- -F "title=Hướng dẫn có ảnh minh họa" \
1694
- -F "category=user_guide"
1695
- ```
1696
-
1697
- Example Response:
1698
- ```json
1699
- {
1700
- "success": true,
1701
- "document_id": "pdf_20251029_150000",
1702
- "filename": "user_guide_with_images.pdf",
1703
- "chunks_indexed": 25,
1704
- "message": "PDF 'user_guide_with_images.pdf' indexed with 25 chunks and 15 images"
1705
- }
1706
- ```
1707
- """
1708
- try:
1709
- # Validate file type
1710
- if not file.filename.endswith('.pdf'):
1711
- raise HTTPException(status_code=400, detail="Only PDF files are allowed")
1712
-
1713
- # Generate document ID if not provided
1714
- if not document_id:
1715
- from datetime import datetime
1716
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1717
- document_id = f"pdf_multimodal_{timestamp}"
1718
-
1719
- # Read PDF bytes
1720
- pdf_bytes = await file.read()
1721
-
1722
- # Prepare metadata
1723
- metadata = {'type': 'multimodal'}
1724
- if title:
1725
- metadata['title'] = title
1726
- if description:
1727
- metadata['description'] = description
1728
- if category:
1729
- metadata['category'] = category
1730
-
1731
- # Index PDF with multimodal parser
1732
- result = multimodal_pdf_indexer.index_pdf_bytes(
1733
- pdf_bytes=pdf_bytes,
1734
- document_id=document_id,
1735
- filename=file.filename,
1736
- document_metadata=metadata
1737
- )
1738
-
1739
- return UploadPDFResponse(
1740
- success=True,
1741
- document_id=result['document_id'],
1742
- filename=result['filename'],
1743
- chunks_indexed=result['chunks_indexed'],
1744
- message=f"PDF '{file.filename}' indexed successfully with {result['chunks_indexed']} chunks and {result.get('images_found', 0)} images"
1745
- )
1746
-
1747
- except HTTPException:
1748
- raise
1749
- except Exception as e:
1750
- raise HTTPException(status_code=500, detail=f"Error uploading multimodal PDF: {str(e)}")
1751
-
1752
-
1753
- @app.post("/upload-pdf", response_model=UploadPDFResponse)
1754
- async def upload_pdf(
1755
- file: UploadFile = File(...),
1756
- document_id: Optional[str] = Form(None),
1757
- title: Optional[str] = Form(None),
1758
- description: Optional[str] = Form(None),
1759
- category: Optional[str] = Form(None)
1760
- ):
1761
- """
1762
- Upload and index PDF file into knowledge base
1763
-
1764
- Body (multipart/form-data):
1765
- - file: PDF file (required)
1766
- - document_id: Custom document ID (optional, auto-generated if not provided)
1767
- - title: Document title (optional)
1768
- - description: Document description (optional)
1769
- - category: Document category (optional, e.g., "user_guide", "faq")
1770
-
1771
- Returns:
1772
- - success: True/False
1773
- - document_id: Document ID
1774
- - filename: Original filename
1775
- - chunks_indexed: Number of chunks created
1776
- - message: Status message
1777
-
1778
- Example:
1779
- ```bash
1780
- curl -X POST "http://localhost:8000/upload-pdf" \
1781
- -F "file=@user_guide.pdf" \
1782
- -F "title=Hướng dẫn sử dụng ChatbotRAG" \
1783
- -F "category=user_guide"
1784
- ```
1785
- """
1786
- try:
1787
- # Validate file type
1788
- if not file.filename.endswith('.pdf'):
1789
- raise HTTPException(status_code=400, detail="Only PDF files are allowed")
1790
-
1791
- # Generate document ID if not provided
1792
- if not document_id:
1793
- from datetime import datetime
1794
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1795
- document_id = f"pdf_{timestamp}"
1796
-
1797
- # Read PDF bytes
1798
- pdf_bytes = await file.read()
1799
-
1800
- # Prepare metadata
1801
- metadata = {}
1802
- if title:
1803
- metadata['title'] = title
1804
- if description:
1805
- metadata['description'] = description
1806
- if category:
1807
- metadata['category'] = category
1808
-
1809
- # Index PDF
1810
- result = pdf_indexer.index_pdf_bytes(
1811
- pdf_bytes=pdf_bytes,
1812
- document_id=document_id,
1813
- filename=file.filename,
1814
- document_metadata=metadata
1815
- )
1816
-
1817
- return UploadPDFResponse(
1818
- success=True,
1819
- document_id=result['document_id'],
1820
- filename=result['filename'],
1821
- chunks_indexed=result['chunks_indexed'],
1822
- message=f"PDF '{file.filename}' đã được index thành công với {result['chunks_indexed']} chunks"
1823
- )
1824
-
1825
- except HTTPException:
1826
- raise
1827
- except Exception as e:
1828
- raise HTTPException(status_code=500, detail=f"Error uploading PDF: {str(e)}")
1829
-
1830
-
1831
- @app.get("/documents/pdf")
1832
- async def list_pdf_documents():
1833
- """
1834
- List all PDF documents in knowledge base
1835
-
1836
- Returns:
1837
- - documents: List of PDF documents with metadata
1838
- """
1839
- try:
1840
- docs = list(documents_collection.find(
1841
- {"type": "pdf"},
1842
- {"_id": 0}
1843
- ))
1844
- return {"documents": docs, "total": len(docs)}
1845
- except Exception as e:
1846
- raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
1847
-
1848
-
1849
- @app.delete("/documents/pdf/{document_id}")
1850
- async def delete_pdf_document(document_id: str):
1851
- """
1852
- Delete PDF document and all its chunks from knowledge base
1853
-
1854
- Args:
1855
- - document_id: Document ID
1856
-
1857
- Returns:
1858
- - success: True/False
1859
- - message: Status message
1860
- """
1861
- try:
1862
- # Get document info
1863
- doc = documents_collection.find_one({"document_id": document_id, "type": "pdf"})
1864
-
1865
- if not doc:
1866
- raise HTTPException(status_code=404, detail=f"PDF document {document_id} not found")
1867
-
1868
- # Delete all chunks from Qdrant
1869
- chunk_ids = doc.get('chunk_ids', [])
1870
- for chunk_id in chunk_ids:
1871
- try:
1872
- qdrant_service.delete_by_id(chunk_id)
1873
- except:
1874
- pass # Chunk might already be deleted
1875
-
1876
- # Delete from MongoDB
1877
- documents_collection.delete_one({"document_id": document_id})
1878
-
1879
- return {
1880
- "success": True,
1881
- "message": f"PDF document {document_id} and {len(chunk_ids)} chunks deleted"
1882
- }
1883
-
1884
- except HTTPException:
1885
- raise
1886
- except Exception as e:
1887
- raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
1888
-
1889
-
1890
- @app.post("/upload-pdf-multimodal", response_model=UploadPDFResponse)
1891
- async def upload_pdf_multimodal(
1892
- file: UploadFile = File(...),
1893
- document_id: Optional[str] = Form(None),
1894
- title: Optional[str] = Form(None),
1895
- description: Optional[str] = Form(None),
1896
- category: Optional[str] = Form(None)
1897
- ):
1898
- """
1899
- Upload PDF with text and image URLs (for user guides with screenshots)
1900
-
1901
- This endpoint is optimized for PDFs containing:
1902
- - Text instructions
1903
- - Image URLs (http://... or https://...)
1904
- - Markdown images: ![alt](url)
1905
- - HTML images: <img src="url">
1906
-
1907
- The system will:
1908
- 1. Extract text from PDF
1909
- 2. Detect all image URLs in the text
1910
- 3. Link images to their corresponding text chunks
1911
- 4. Store image URLs in metadata
1912
- 5. Return images along with text during chat
1913
-
1914
- Body (multipart/form-data):
1915
- - file: PDF file (required)
1916
- - document_id: Custom document ID (optional, auto-generated if not provided)
1917
- - title: Document title (optional)
1918
- - description: Document description (optional)
1919
- - category: Document category (optional, e.g., "user_guide", "tutorial")
1920
-
1921
- Returns:
1922
- - success: True/False
1923
- - document_id: Document ID
1924
- - filename: Original filename
1925
- - chunks_indexed: Number of chunks created
1926
- - message: Status message (includes image count)
1927
-
1928
- Example:
1929
- ```bash
1930
- curl -X POST "http://localhost:8000/upload-pdf-multimodal" \
1931
- -F "file=@user_guide_with_images.pdf" \
1932
- -F "title=Hướng dẫn có ảnh minh họa" \
1933
- -F "category=user_guide"
1934
- ```
1935
-
1936
- Example Response:
1937
- ```json
1938
- {
1939
- "success": true,
1940
- "document_id": "pdf_20251029_150000",
1941
- "filename": "user_guide_with_images.pdf",
1942
- "chunks_indexed": 25,
1943
- "message": "PDF 'user_guide_with_images.pdf' indexed with 25 chunks and 15 images"
1944
- }
1945
- ```
1946
- """
1947
- try:
1948
- # Validate file type
1949
- if not file.filename.endswith('.pdf'):
1950
- raise HTTPException(status_code=400, detail="Only PDF files are allowed")
1951
-
1952
- # Generate document ID if not provided
1953
- if not document_id:
1954
- from datetime import datetime
1955
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1956
- document_id = f"pdf_multimodal_{timestamp}"
1957
-
1958
- # Read PDF bytes
1959
- pdf_bytes = await file.read()
1960
-
1961
- # Prepare metadata
1962
- metadata = {'type': 'multimodal'}
1963
- if title:
1964
- metadata['title'] = title
1965
- if description:
1966
- metadata['description'] = description
1967
- if category:
1968
- metadata['category'] = category
1969
-
1970
- # Index PDF with multimodal parser
1971
- result = multimodal_pdf_indexer.index_pdf_bytes(
1972
- pdf_bytes=pdf_bytes,
1973
- document_id=document_id,
1974
- filename=file.filename,
1975
- document_metadata=metadata
1976
- )
1977
-
1978
- return UploadPDFResponse(
1979
- success=True,
1980
- document_id=result['document_id'],
1981
- filename=result['filename'],
1982
- chunks_indexed=result['chunks_indexed'],
1983
- message=f"PDF '{file.filename}' indexed successfully with {result['chunks_indexed']} chunks and {result.get('images_found', 0)} images"
1984
- )
1985
-
1986
- except HTTPException:
1987
- raise
1988
- except Exception as e:
1989
- raise HTTPException(status_code=500, detail=f"Error uploading multimodal PDF: {str(e)}")
1990
-
1991
-
1992
  if __name__ == "__main__":
1993
  import uvicorn
1994
  uvicorn.run(
 
88
  )
89
  print("✓ PDF Indexer initialized")
90
 
91
+ # Initialize Multimodal PDF Indexer
92
  multimodal_pdf_indexer = MultimodalPDFIndexer(
93
  embedding_service=embedding_service,
94
  qdrant_service=qdrant_service,
 
143
  use_reranking: bool = True
144
  use_compression: bool = True
145
  score_threshold: float = 0.5
 
 
 
 
 
 
146
 
147
 
148
  class ChatResponse(BaseModel):
 
150
  context_used: List[Dict]
151
  timestamp: str
152
  rag_stats: Optional[Dict] = None # Stats from advanced RAG pipeline
 
153
 
154
 
155
  class AddDocumentRequest(BaseModel):
 
163
  message: str
164
 
165
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  @app.get("/")
167
  async def root():
168
  """Health check endpoint with comprehensive API documentation"""
169
  return {
170
  "status": "running",
171
+ "service": "ChatbotRAG API",
172
+ "version": "2.0.0",
 
 
173
  "vector_db": "Qdrant",
174
+ "document_db": "MongoDB",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  "endpoints": {
176
+ "chatbot_rag": {
177
+ "API endpoint": "https://minhvtt-ChatbotRAG.hf.space/",
178
+ "POST /chat": {
179
+ "description": "Chat với AI sử dụng RAG (Retrieval-Augmented Generation)",
180
+ "request": {
181
+ "method": "POST",
182
+ "content_type": "application/json",
183
+ "body": {
184
+ "message": "string (required) - User message/question",
185
+ "use_rag": "boolean (optional, default: true) - Enable RAG context retrieval",
186
+ "top_k": "integer (optional, default: 3) - Number of context documents to retrieve",
187
+ "system_message": "string (optional) - Custom system prompt",
188
+ "max_tokens": "integer (optional, default: 512) - Max response length",
189
+ "temperature": "float (optional, default: 0.7, range: 0-1) - Creativity level",
190
+ "top_p": "float (optional, default: 0.95) - Nucleus sampling",
191
+ "hf_token": "string (optional) - Hugging Face token (fallback to env)"
192
+ }
 
 
 
 
 
193
  },
 
194
  "response": {
195
+ "response": "string - AI generated response",
196
+ "context_used": [
197
+ {
198
+ "id": "string - Document ID",
199
+ "confidence": "float - Relevance score",
200
+ "metadata": {
201
+ "text": "string - Retrieved context"
202
+ }
203
+ }
204
+ ],
205
+ "timestamp": "string - ISO 8601 timestamp"
206
  },
207
+ "example_request": {
208
+ "message": "Dao có nguy hiểm không?",
209
+ "use_rag": True,
210
+ "top_k": 3,
211
+ "temperature": 0.7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  },
213
+ "example_response": {
214
+ "response": "Dựa trên thông tin trong database, dao được phân loại là vũ khí nguy hiểm. Dao sắc có thể gây thương tích nghiêm trọng nếu không sử dụng đúng cách. Cần tuân thủ các quy định an toàn khi sử dụng.",
215
+ "context_used": [
216
+ {
217
+ "id": "68a3fc14c853d7621e8977b5",
218
+ "confidence": 0.92,
219
+ "metadata": {
220
+ "text": "Vũ khí"
221
+ }
222
+ },
223
+ {
224
+ "id": "68a3fc4cc853d7621e8977b6",
225
+ "confidence": 0.85,
226
+ "metadata": {
227
+ "text": "Con dao sắc"
228
+ }
229
+ }
230
+ ],
231
+ "timestamp": "2025-10-13T10:30:45.123456"
232
  },
233
+ "notes": [
234
+ "RAG retrieves relevant context from vector DB before generating response",
235
+ "LLM uses context to provide accurate, grounded answers",
236
+ "Requires HUGGINGFACE_TOKEN environment variable or hf_token in request"
237
+ ]
238
  },
239
+ "POST /documents": {
240
+ "description": "Add document to knowledge base for RAG",
241
+ "request": {
242
+ "method": "POST",
243
+ "content_type": "application/json",
244
+ "body": {
245
+ "text": "string (required) - Document text content",
246
+ "metadata": "object (optional) - Additional metadata (source, category, etc.)"
247
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  },
249
+ "response": {
250
+ "success": "boolean",
251
+ "doc_id": "string - MongoDB ObjectId",
252
+ "message": "string - Status message"
 
 
 
 
 
 
 
 
 
253
  },
254
+ "example_request": {
255
+ "text": "Để tạo event mới: Click nút 'Tạo Event' ở góc trên bên phải màn hình. Điền thông tin sự kiện bao gồm tên, ngày giờ, địa điểm. Click Lưu để hoàn tất.",
256
+ "metadata": {
257
+ "source": "user_guide.pdf",
258
+ "section": "create_event",
259
+ "page": 5,
260
+ "category": "tutorial"
261
+ }
 
 
 
 
 
 
 
 
 
 
262
  },
263
+ "example_response": {
 
264
  "success": True,
265
+ "doc_id": "67a9876543210fedcba98765",
266
+ "message": "Document added successfully with ID: 67a9876543210fedcba98765"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
  }
268
  },
 
 
 
 
 
 
 
 
269
  "POST /rag/search": {
270
+ "description": "Search in knowledge base (similar to /search/text but for RAG documents)",
271
+ "request": {
272
+ "method": "POST",
273
+ "content_type": "multipart/form-data",
274
+ "body": {
275
+ "query": "string (required) - Search query",
276
+ "top_k": "integer (optional, default: 5) - Number of results",
277
+ "score_threshold": "float (optional, default: 0.5) - Minimum relevance score"
278
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
  },
280
+ "response": [
281
+ {
282
+ "id": "string",
283
+ "confidence": "float",
284
+ "metadata": {
285
+ "text": "string",
286
+ "source": "string"
287
+ }
288
+ }
289
+ ],
290
+ "example_request": {
291
+ "query": "cách tạo sự kiện mới",
292
+ "top_k": 3,
293
+ "score_threshold": 0.6
294
+ }
295
+ },
296
+ "GET /history": {
297
+ "description": "Get chat conversation history",
298
+ "request": {
299
+ "method": "GET",
300
+ "query_params": {
301
+ "limit": "integer (optional, default: 10) - Number of messages",
302
+ "skip": "integer (optional, default: 0) - Pagination offset"
303
+ }
 
 
304
  },
305
  "response": {
306
+ "history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
  {
308
+ "user_message": "string",
309
+ "assistant_response": "string",
310
+ "context_used": "array",
311
+ "timestamp": "string - ISO 8601"
 
 
 
 
 
 
 
 
312
  }
313
  ],
314
+ "total": "integer - Total messages count"
 
 
 
 
 
 
315
  },
316
+ "example_request": "GET /history?limit=5&skip=0",
317
+ "example_response": {
318
+ "history": [
 
 
 
 
 
319
  {
320
+ "user_message": "Dao có nguy hiểm không?",
321
+ "assistant_response": "Dao được phân loại là vũ khí...",
322
+ "context_used": [],
323
+ "timestamp": "2025-10-13T10:30:45.123456"
 
 
 
 
 
 
 
 
324
  }
325
  ],
326
+ "total": 15
327
+ }
328
+ },
329
+ "DELETE /documents/{doc_id}": {
330
+ "description": "Delete document from knowledge base",
331
+ "request": {
332
+ "method": "DELETE",
333
+ "path_params": {
334
+ "doc_id": "string - MongoDB ObjectId"
335
  }
336
  },
337
+ "response": {
338
+ "success": "boolean",
339
+ "message": "string"
340
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
341
  }
342
  }
343
  },
344
+ "usage_examples": {
345
+ "curl_chat": "curl -X POST 'http://localhost:8000/chat' -H 'Content-Type: application/json' -d '{\"message\": \"Dao có nguy hiểm không?\", \"use_rag\": true}'",
346
+ "python_chat": """
347
+ import requests
348
+
349
+ response = requests.post(
350
+ 'http://localhost:8000/chat',
351
+ json={
352
+ 'message': 'Nút tạo event ở đâu?',
353
+ 'use_rag': True,
354
+ 'top_k': 3
355
+ }
356
+ )
357
+ print(response.json()['response'])
358
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359
  },
360
+ "authentication": {
361
+ "embeddings_apis": "No authentication required",
362
+ "chat_api": "Requires HUGGINGFACE_TOKEN (env variable or request body)"
 
 
363
  },
364
+ "rate_limits": {
365
+ "embeddings": "No limit",
366
+ "chat_with_llm": "Limited by Hugging Face API (free tier: ~1000 requests/hour)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
367
  },
368
+ "error_codes": {
369
+ "400": "Bad Request - Missing required fields or invalid input",
370
+ "401": "Unauthorized - Invalid Hugging Face token",
371
+ "404": "Not Found - Document ID not found",
372
+ "500": "Internal Server Error - Server or database error"
 
 
 
 
 
 
 
 
 
 
 
 
373
  },
374
  "links": {
375
  "docs": "http://localhost:8000/docs",
376
  "redoc": "http://localhost:8000/redoc",
377
+ "openapi": "http://localhost:8000/openapi.json"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
378
  }
379
  }
380
 
381
  @app.post("/index", response_model=IndexResponse)
382
  async def index_data(
383
  id: str = Form(...),
384
+ text: str = Form(...),
385
+ image: Optional[UploadFile] = File(None)
 
 
 
 
 
 
386
  ):
387
  """
388
+ Index data vào vector database
 
389
 
390
  Body:
391
+ - id: Document ID (event ID, post ID, etc.)
392
+ - text: Text content (tiếng Việt supported)
393
+ - image: Image file (optional)
 
 
 
 
 
 
 
394
 
395
  Returns:
396
  - success: True/False
397
  - id: Document ID
398
  - message: Status message
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399
  """
400
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
401
  # Prepare embeddings
402
+ text_embedding = None
403
+ image_embedding = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
404
 
405
+ # Encode text (tiếng Việt)
406
+ if text and text.strip():
407
+ text_embedding = embedding_service.encode_text(text)
 
408
 
409
+ # Encode image nếu có
410
+ if image:
411
+ image_bytes = await image.read()
412
+ pil_image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
413
+ image_embedding = embedding_service.encode_image(pil_image)
414
 
415
+ # Combine embeddings
416
+ if text_embedding is not None and image_embedding is not None:
417
+ # Average của text và image embeddings
418
+ combined_embedding = np.mean([text_embedding, image_embedding], axis=0)
419
+ elif text_embedding is not None:
420
+ combined_embedding = text_embedding
421
+ elif image_embedding is not None:
422
+ combined_embedding = image_embedding
423
+ else:
424
+ raise HTTPException(status_code=400, detail="Phải cung cấp ít nhất text hoặc image")
425
 
426
  # Normalize
427
  combined_embedding = combined_embedding / np.linalg.norm(combined_embedding, axis=1, keepdims=True)
428
 
429
  # Index vào Qdrant
430
  metadata = {
431
+ "text": text,
432
+ "has_image": image is not None,
433
+ "image_filename": image.filename if image else None
 
 
 
 
 
 
 
 
 
434
  }
435
 
436
  result = qdrant_service.index_data(
 
442
  return IndexResponse(
443
  success=True,
444
  id=result["original_id"], # Trả về MongoDB ObjectId
445
+ message=f"Đã index thành công document {result['original_id']} (Qdrant UUID: {result['qdrant_id']})"
 
446
  )
447
 
 
 
 
 
448
  except Exception as e:
449
  raise HTTPException(status_code=500, detail=f"Lỗi khi index: {str(e)}")
450
 
 
667
  @app.post("/chat", response_model=ChatResponse)
668
  async def chat(request: ChatRequest):
669
  """
670
+ Chat endpoint với RAG
 
671
 
672
  Body:
673
  - message: User message
 
677
  - max_tokens: Max tokens for response (default: 512)
678
  - temperature: Temperature for generation (default: 0.7)
679
  - hf_token: Hugging Face token (optional, sẽ dùng env nếu không truyền)
 
 
 
 
 
 
 
 
 
 
680
 
681
  Returns:
682
  - response: Generated response
683
  - context_used: Retrieved context documents
684
  - timestamp: Response timestamp
 
 
685
  """
686
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
687
  # Retrieve context if RAG enabled
688
  context_used = []
 
 
 
 
689
  if request.use_rag:
690
+ # Generate query embedding
691
+ query_embedding = embedding_service.encode_text(request.message)
692
+
693
+ # Search in Qdrant
694
+ results = qdrant_service.search(
695
+ query_embedding=query_embedding,
696
+ limit=request.top_k,
697
+ score_threshold=0.5
698
+ )
699
+ context_used = results
700
+
701
+ # Build context text
702
+ context_text = ""
703
+ if context_used:
704
+ context_text = "\n\nRelevant Context:\n"
705
+ for i, doc in enumerate(context_used, 1):
706
+ doc_text = doc["metadata"].get("text", "")
707
+ confidence = doc["confidence"]
708
+ context_text += f"\n[{i}] (Confidence: {confidence:.2f})\n{doc_text}\n"
709
+
710
+ # Add context to system message
711
+ system_message = f"{request.system_message}\n{context_text}\n\nPlease use the above context to answer the user's question when relevant."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
712
  else:
713
  system_message = request.system_message
714
 
 
768
  "timestamp": datetime.utcnow()
769
  }
770
  chat_history_collection.insert_one(chat_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
771
 
772
  return ChatResponse(
773
  response=response,
774
  context_used=context_used,
775
+ timestamp=datetime.utcnow().isoformat()
 
 
 
776
  )
777
 
778
  except Exception as e:
 
932
  raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
933
 
934
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
935
  if __name__ == "__main__":
936
  import uvicorn
937
  uvicorn.run(