Spaces:
Running
Running
Upload app.py
Browse files
app.py
CHANGED
|
@@ -18,13 +18,20 @@ import warnings
|
|
| 18 |
from typing import Optional, Dict, Any, Tuple, List
|
| 19 |
from datetime import datetime
|
| 20 |
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, GPT2Tokenizer
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
import
|
| 24 |
-
from
|
| 25 |
-
import
|
| 26 |
-
import
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
# Suppress warnings for cleaner output
|
| 30 |
warnings.filterwarnings("ignore")
|
|
@@ -736,6 +743,12 @@ class HybridIntelligenceSearchEngine:
|
|
| 736 |
self.search_count = 0
|
| 737 |
self.timeout = 10 # seconds
|
| 738 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 739 |
# User-Agent for web requests
|
| 740 |
self.headers = {
|
| 741 |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
|
@@ -745,6 +758,9 @@ class HybridIntelligenceSearchEngine:
|
|
| 745 |
|
| 746 |
def needs_current_info(self, prompt: str, domain: str) -> bool:
|
| 747 |
"""Intelligent detection of queries requiring current/real-time information"""
|
|
|
|
|
|
|
|
|
|
| 748 |
prompt_lower = prompt.lower()
|
| 749 |
|
| 750 |
# Time-sensitive indicators
|
|
@@ -860,6 +876,10 @@ class HybridIntelligenceSearchEngine:
|
|
| 860 |
|
| 861 |
def search_duckduckgo(self, query: str, max_results: int = 5) -> List[Dict[str, str]]:
|
| 862 |
"""Search using DuckDuckGo Instant Answer API (privacy-focused)"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 863 |
try:
|
| 864 |
# DuckDuckGo Instant Answer API
|
| 865 |
url = "https://api.duckduckgo.com/"
|
|
@@ -904,6 +924,10 @@ class HybridIntelligenceSearchEngine:
|
|
| 904 |
|
| 905 |
def search_wikipedia(self, query: str, max_results: int = 3) -> List[Dict[str, str]]:
|
| 906 |
"""Search Wikipedia for factual information"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 907 |
try:
|
| 908 |
# Simple Wikipedia search without the wikipedia library
|
| 909 |
search_url = "https://en.wikipedia.org/api/rest_v1/page/summary/"
|
|
|
|
| 18 |
from typing import Optional, Dict, Any, Tuple, List
|
| 19 |
from datetime import datetime
|
| 20 |
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, GPT2Tokenizer
|
| 21 |
+
# Web search imports - install with: pip install beautifulsoup4 requests
|
| 22 |
+
try:
|
| 23 |
+
import requests
|
| 24 |
+
from urllib.parse import quote_plus
|
| 25 |
+
import re
|
| 26 |
+
from bs4 import BeautifulSoup
|
| 27 |
+
import wikipedia
|
| 28 |
+
import threading
|
| 29 |
+
from concurrent.futures import ThreadPoolExecutor, TimeoutError
|
| 30 |
+
WEB_SEARCH_AVAILABLE = True
|
| 31 |
+
except ImportError as e:
|
| 32 |
+
print(f"β οΈ Web search dependencies not available: {e}")
|
| 33 |
+
print("π¦ Install with: pip install beautifulsoup4 requests")
|
| 34 |
+
WEB_SEARCH_AVAILABLE = False
|
| 35 |
|
| 36 |
# Suppress warnings for cleaner output
|
| 37 |
warnings.filterwarnings("ignore")
|
|
|
|
| 743 |
self.search_count = 0
|
| 744 |
self.timeout = 10 # seconds
|
| 745 |
|
| 746 |
+
# Check if web search is available
|
| 747 |
+
if not WEB_SEARCH_AVAILABLE:
|
| 748 |
+
print("β οΈ Web search disabled - missing dependencies (beautifulsoup4, requests)")
|
| 749 |
+
print("π¦ Install with: pip install beautifulsoup4 requests")
|
| 750 |
+
return
|
| 751 |
+
|
| 752 |
# User-Agent for web requests
|
| 753 |
self.headers = {
|
| 754 |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
|
|
|
| 758 |
|
| 759 |
def needs_current_info(self, prompt: str, domain: str) -> bool:
|
| 760 |
"""Intelligent detection of queries requiring current/real-time information"""
|
| 761 |
+
if not WEB_SEARCH_AVAILABLE:
|
| 762 |
+
return False # No web search available
|
| 763 |
+
|
| 764 |
prompt_lower = prompt.lower()
|
| 765 |
|
| 766 |
# Time-sensitive indicators
|
|
|
|
| 876 |
|
| 877 |
def search_duckduckgo(self, query: str, max_results: int = 5) -> List[Dict[str, str]]:
|
| 878 |
"""Search using DuckDuckGo Instant Answer API (privacy-focused)"""
|
| 879 |
+
if not WEB_SEARCH_AVAILABLE:
|
| 880 |
+
print("π DuckDuckGo search unavailable - missing dependencies")
|
| 881 |
+
return []
|
| 882 |
+
|
| 883 |
try:
|
| 884 |
# DuckDuckGo Instant Answer API
|
| 885 |
url = "https://api.duckduckgo.com/"
|
|
|
|
| 924 |
|
| 925 |
def search_wikipedia(self, query: str, max_results: int = 3) -> List[Dict[str, str]]:
|
| 926 |
"""Search Wikipedia for factual information"""
|
| 927 |
+
if not WEB_SEARCH_AVAILABLE:
|
| 928 |
+
print("π Wikipedia search unavailable - missing dependencies")
|
| 929 |
+
return []
|
| 930 |
+
|
| 931 |
try:
|
| 932 |
# Simple Wikipedia search without the wikipedia library
|
| 933 |
search_url = "https://en.wikipedia.org/api/rest_v1/page/summary/"
|