Spaces:

umint
/

searchgpt

Paused

App Files Files Community

Update config.py

by igor04091968 - opened Sep 12

base: refs/heads/main

←

from: refs/pr/3

Discussion Files changed

+1642

-2

Files changed (32) hide show

.gitattributes +1 -1
Dockerfile +22 -1
README.md +65 -0
app.py +40 -0
assets/css/__init__.py +8 -0
assets/css/reasoning.py +31 -0
config.py +457 -0
requirements.txt +3 -0
src/client/__init__.py +8 -0
src/client/openai_client.py +17 -0
src/core/__init__.py +12 -0
src/core/web_configuration.py +13 -0
src/core/web_loader.py +188 -0
src/engine/__init__.py +8 -0
src/engine/browser_engine.py +139 -0
src/processor/__init__.py +8 -0
src/processor/message_processor.py +77 -0
src/processor/reasoning/__init__.py +12 -0
src/processor/reasoning/interface.py +18 -0
src/processor/reasoning/tool_reasoning.py +38 -0
src/processor/response/__init__.py +14 -0
src/processor/response/formatter.py +26 -0
src/processor/response/generator.py +51 -0
src/processor/response/setup.py +34 -0
src/processor/tools/__init__.py +14 -0
src/processor/tools/executor.py +16 -0
src/processor/tools/interaction.py +225 -0
src/processor/tools/parser.py +17 -0
src/tools/__init__.py +8 -0
src/tools/tool_manager.py +50 -0
src/utils/__init__.py +8 -0
src/utils/time.py +14 -0

.gitattributes CHANGED Viewed

@@ -32,4 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

Dockerfile CHANGED Viewed

	@@ -1 +1,22 @@
1	- ~~FROM hadadrjt/searchgpt:latest~~

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+# Use a specific container image for the app
+FROM python:latest
+# Set the main working directory inside the container
+WORKDIR /app
+# Copy all files into the container
+COPY . .
+# Install all dependencies
+RUN pip install -r requirements.txt
+# Open the port so the app can be accessed
+EXPOSE 7860
+# Start the app
+CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -1,10 +1,75 @@
 ---
 title: SearchGPT
 short_description: ChatGPT with real-time web search & URL reading capability
 emoji: ⚡
 colorFrom: blue
 colorTo: yellow
 sdk: docker
 app_port: 7860
 pinned: false
 ---

 ---
 title: SearchGPT
 short_description: ChatGPT with real-time web search & URL reading capability
+license: apache-2.0
 emoji: ⚡
 colorFrom: blue
 colorTo: yellow
 sdk: docker
 app_port: 7860
 pinned: false
+# Used to promote this Hugging Face Space
+models:
+- hadadrjt/JARVIS
+- agentica-org/DeepCoder-14B-Preview
+- agentica-org/DeepSWE-Preview
+- fka/awesome-chatgpt-prompts
+- black-forest-labs/FLUX.1-Kontext-dev
+- ChatDOC/OCRFlux-3B
+- deepseek-ai/DeepSeek-R1
+- deepseek-ai/DeepSeek-R1-0528
+- deepseek-ai/DeepSeek-R1-Distill-Llama-70B
+- deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
+- deepseek-ai/DeepSeek-R1-0528-Qwen3-8B
+- deepseek-ai/DeepSeek-V3-0324
+- google/gemma-3-1b-it
+- google/gemma-3-27b-it
+- google/gemma-3-4b-it
+- google/gemma-3n-E4B-it
+- google/gemma-3n-E4B-it-litert-preview
+- google/medsiglip-448
+- kyutai/tts-1.6b-en_fr
+- meta-llama/Llama-3.1-8B-Instruct
+- meta-llama/Llama-3.2-3B-Instruct
+- meta-llama/Llama-3.3-70B-Instruct
+- meta-llama/Llama-4-Maverick-17B-128E-Instruct
+- meta-llama/Llama-4-Scout-17B-16E-Instruct
+- microsoft/Phi-4-mini-instruct
+- mistralai/Devstral-Small-2505
+- mistralai/Mistral-Small-3.1-24B-Instruct-2503
+- openai/webgpt_comparisons
+- openai/whisper-large-v3-turbo
+- openai/gpt-oss-120b
+- openai/gpt-oss-20b
+- Qwen/QwQ-32B
+- Qwen/Qwen2.5-VL-32B-Instruct
+- Qwen/Qwen2.5-VL-3B-Instruct
+- Qwen/Qwen2.5-VL-72B-Instruct
+- Qwen/Qwen3-235B-A22B
+- THUDM/GLM-4.1V-9B-Thinking
+- tngtech/DeepSeek-TNG-R1T2-Chimera
+- moonshotai/Kimi-K2-Instruct
+- Qwen/Qwen3-235B-A22B-Instruct-2507
+- Qwen/Qwen3-Coder-480B-A35B-Instruct
+- Qwen/Qwen3-235B-A22B-Thinking-2507
+- zai-org/GLM-4.5
+- zai-org/GLM-4.5-Air
+- zai-org/GLM-4.5V
+- deepseek-ai/DeepSeek-V3.1
+- deepseek-ai/DeepSeek-V3.1-Base
+- microsoft/VibeVoice-1.5B
+- xai-org/grok-2
+- Qwen/Qwen-Image-Edit
+- ByteDance-Seed/Seed-OSS-36B-Instruct
+- google/gemma-3-270m
+- google/gemma-3-270m-it
+- openbmb/MiniCPM-V-4_5
+- tencent/Hunyuan-MT-7B
+- meituan-longcat/LongCat-Flash-Chat
+- Phr00t/WAN2.2-14B-Rapid-AllInOne
+- apple/FastVLM-0.5B
+- stepfun-ai/Step-Audio-2-mini
+# Used to promote this Hugging Face Space
+datasets:
+- fka/awesome-chatgpt-prompts
 ---

app.py ADDED Viewed

	@@ -0,0 +1,40 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+from src.processor.message_processor import process_user_request
+from config import DESCRIPTION
+import gradio as gr
+with gr.Blocks(fill_height=True, fill_width=True) as app:
+    with gr.Sidebar(): gr.HTML(DESCRIPTION)
+    gr.ChatInterface(
+        fn=process_user_request,
+        chatbot=gr.Chatbot(
+            label="SearchGPT | GPT-4.1 (Nano)",
+            type="messages",
+            show_copy_button=True,
+            scale=1
+        ),
+        type="messages",  # 2025-09-10: Shut up!
+        examples=[
+            ["What is UltimaX Intelligence?"],
+            ["https://wikipedia.org/wiki/Artificial_intelligence Read and summarize that"],
+            ["What's the latest AI development in 2025?"],
+            ["OpenAI GPT-5 vs DeepSeek V3.1"],
+            ["Find the source link for the GPT-OSS model"],
+            ["https://huggingface.co/papers Extract the most popular papers"],
+            ["How to run Gemma 3 (270M) on CPU only?"],
+            ["What are the latest trends this year?"],
+            ["What caused World War 1 and 2?"]
+        ],
+        cache_examples=False,
+        show_api=False,
+        concurrency_limit=5
+    )
+app.launch(
+    server_name="0.0.0.0",
+    pwa=True
+)

assets/css/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+from .reasoning import styles
+__all__ = ['styles']

assets/css/reasoning.py ADDED Viewed

	@@ -0,0 +1,31 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+def styles(reasoning: str, expanded: bool = False) -> str:
+    open_attr = "open" if expanded else ""
+    emoji = "&#129504;"
+    return f"""
+<details {open_attr} style="
+    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+">
+  <summary style="
+    font-weight: 700;
+    font-size: 14px !important;
+    cursor: pointer;
+    user-select: none;
+  ">
+    {emoji} Reasoning
+  </summary>
+  <div style="
+    margin-top: 6px;
+    padding-top: 6px;
+    font-size: 10px !important;
+    line-height: 1.7;
+    letter-spacing: 0.02em;
+  ">
+    {reasoning}
+  </div>
+</details>
+"""

config.py ADDED Viewed

	@@ -0,0 +1,457 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+#OPENAI_API_BASE_URL  # Endpoint. Not here -> Hugging Face Spaces secrets
+#OPENAI_API_KEY       # API Key.  Not here -> Hugging Face Spaces secrets
+MODEL = "gpt-4.1-nano"
+#MAX_TOKENS = 131072
+MAX_TOKENS = 262144
+SEARXNG_ENDPOINT = "https://searx.stream/search"  # See the endpoint list at https://searx.space
+BAIDU_ENDPOINT   = "https://www.baidu.com/s"
+READER_ENDPOINT  = "https://r.jina.ai/"
+REQUEST_TIMEOUT  = 300  # 5 minute
+INSTRUCTIONS_START = """
+You are ChatGPT, an AI assistant with mandatory real-time web search, URL content extraction, knowledge validation, and professional summarization capabilities.
+Your absolute rules:
+- You must always execute the provided tools (`web_search`, `read_url`) for every single user query or user request, without exception.
+- You are never allowed to answer directly from your internal knowledge, memory, or training data. Outdated or tool-bypassed answers are strictly forbidden.
+Core Principles:
+- Mandatory Tool Invocation: Every query or request, no matter how simple, factual, or complex, must trigger at least one `web_search` or `read_url`.
+- No Memory Reliance: Do not use prior conversation history, cached context, or built-in knowledge to generate answers. Always re-verify with tools.
+- Up-to-Date Grounding: All responses must be based only on real-time, verifiable data retrieved through tools.
+- Cross-Validation: Always compare findings across at least 3 independent, credible sources before producing a final answer.
+- Professional Output: Responses must be clear, structured, evidence-based, and neutral.
+Execution Workflow:
+1. Initial Web Search
+   - Immediately call `web_search` or `read_url` when a query or request arrives.
+   - Use multiple query or request variations and search engines (`google`, `bing`, `baidu`) for broader coverage.
+2. Result Selection
+   - Select up to 10 of the most relevant, credible, and content-rich results.
+   - Prioritize authoritative sources: academic publications, institutional reports, official documents, expert commentary.
+   - Deprioritize low-credibility, promotional, or unverified sources.
+   - Avoid over-reliance on any single source.
+3. Content Retrieval
+   - For each selected URL, use `read_url`.
+   - Extract key elements: facts, statistics, data points, expert opinions, and relevant arguments.
+   - Normalize terminology, refine phrasing, and remove redundancies for clarity and consistency.
+4. Cross-Validation
+   - Compare extracted information across at least 3 distinct sources.
+   - Identify convergences (agreement), divergences (contradictions), and gaps (missing data).
+   - Validate all numerical values, temporal references, and factual claims through multiple corroborations.
+5. Knowledge Integration
+   - Synthesize findings into a structured hierarchy:
+     - Overview → Key details → Supporting evidence → Citations.
+   - Emphasize the latest developments, trends, and their implications.
+   - Balance depth (for experts) with clarity (for general readers).
+6. Response Construction
+   - Always cite sources inline using `[Source Title/Article/Tags/Domain](Source URL or Source Links)`.
+   - Maintain a professional, precise, and neutral tone.
+   - Use clear formatting: headings, numbered lists, and bullet points.
+   - Ensure readability, logical progression, and accessibility.
+7. Ambiguity & Uncertainty Handling
+   - Explicitly flag incomplete, ambiguous, or conflicting data.
+   - Provide possible interpretations with transparent reasoning.
+   - Clearly note limitations where evidence is insufficient or weak.
+8. Quality & Consistency Assurance
+   - Always base answers strictly on tool-derived evidence.
+   - Guarantee logical flow, factual accuracy, and consistency in terminology.
+   - Maintain neutrality and avoid speculative claims.
+   - Never bypass tool execution for any query or request.
+Critical Instruction:
+- Every new query or request must trigger a `web_search` or `read_url`.
+- You must not generate answers from prior knowledge, conversation history, or cached data.
+- Always use Markdown format for URL sources with `[Source Title/Article/Tags/Domain](Source URL or Source Links)`.
+- If tools fail, you must state explicitly that no valid data could be retrieved.
+\n\n\n
+"""
+CONTENT_EXTRACTION = """
+<system>
+- Analyze the retrieved content in detail
+- Identify all critical facts, arguments, statistics, and relevant data
+- Collect all URLs, hyperlinks, references, and citations mentioned in the content
+- Evaluate credibility of sources, highlight potential biases or conflicts
+- Produce a structured, professional, and comprehensive summary
+- Emphasize clarity, accuracy, and logical flow
+- Include all discovered URLs in the final summary as [Source Title](URL)
+- Mark any uncertainties, contradictions, or missing information clearly
+</system>
+\n\n\n
+"""
+SEARCH_SELECTION = """
+<system>
+- For each search result, fetch the full content using read_url
+- Extract key information, main arguments, data points, and statistics
+- Capture every URL present in the content or references
+- Create a professional structured summary.
+- List each source at the end of the summary in the format [Source title](link)
+- Identify ambiguities or gaps in information
+- Ensure clarity, completeness, and high information density
+</system>
+\n\n\n
+"""
+INSTRUCTIONS_END = """
+You have just executed tools and obtained results. You MUST now provide a comprehensive answer based ONLY on the tool results.
+\n\n\n
+"""
+REASONING_STEPS = {
+    "web_search": {
+        "parsing": (
+            "I need to search for information about: {query}<br><br>"
+            "I'm analyzing the user's request and preparing to execute a web search. "
+            "The query I've identified is comprehensive and should yield relevant results. "
+            "I will use the {engine} search engine for this task as it provides reliable and up-to-date information.<br><br>"
+            "I'm now parsing the search parameters to ensure they are correctly formatted. "
+            "The search query has been validated and I'm checking that all required fields are present. "
+            "I need to make sure the search engine parameter is valid and supported by our system.<br><br>"
+            "I'm preparing the search request with the following configuration:<br>"
+            "- Search Query: {query}<br>"
+            "- Search Engine: {engine}<br><br>"
+            "I'm verifying that the network connection is stable and that the search service is accessible. "
+            "All preliminary checks have been completed successfully."
+        ),
+        "executing": (
+            "I'm now executing the web search for: {query}<br><br>"
+            "I'm connecting to the {engine} search service and sending the search request. "
+            "The connection has been established successfully and I'm waiting for the search results. "
+            "I'm processing multiple search result pages to gather comprehensive information.<br><br>"
+            "I'm analyzing the search results to identify the most relevant and authoritative sources. "
+            "The search engine is returning results and I'm filtering them based on relevance scores. "
+            "I'm extracting key information from each search result including titles, snippets, and URLs.<br><br>"
+            "I'm organizing the search results in order of relevance and checking for duplicate content. "
+            "The search process is progressing smoothly and I'm collecting valuable information. "
+            "I'm also verifying the credibility of the sources to ensure high-quality information.<br><br>"
+            "Current status: Processing search results...<br>"
+            "Results found: Multiple relevant sources identified<br>"
+            "Quality assessment: High relevance detected"
+        ),
+        "completed": (
+            "I have successfully completed the web search for: {query}<br><br>"
+            "I've retrieved comprehensive search results from {engine} and analyzed all the information. "
+            "The search yielded multiple relevant results that directly address the user's query. "
+            "I've extracted the most important information and organized it for processing.<br><br>"
+            "I've identified several high-quality sources with authoritative information. "
+            "The search results include recent and up-to-date content that is highly relevant. "
+            "I've filtered out any duplicate or low-quality results to ensure accuracy.<br><br>"
+            "I'm now processing the collected information to formulate a comprehensive response. "
+            "The search results provide sufficient detail to answer the user's question thoroughly. "
+            "I've verified the credibility of the sources and cross-referenced the information.<br><br>"
+            "Search Summary:<br>"
+            "- Total results processed: Multiple pages<br>"
+            "- Relevance score: High<br>"
+            "- Information quality: Verified and accurate<br>"
+            "- Sources: Authoritative and recent<br><br>"
+            "Preview of results:<br>{preview}"
+        ),
+        "error": (
+            "I encountered an issue while attempting to search for: {query}<br><br>"
+            "I tried to execute the web search but encountered an unexpected error. "
+            "The error occurred during the search process and I need to handle it appropriately. "
+            "I'm analyzing the error to understand what went wrong and how to proceed.<br><br>"
+            "Error details: {error}<br><br>"
+            "I'm attempting to diagnose the issue and considering alternative approaches. "
+            "The error might be due to network connectivity, service availability, or parameter issues. "
+            "I will try to recover from this error and provide the best possible response.<br><br>"
+            "I'm evaluating whether I can retry the search with modified parameters. "
+            "If the search cannot be completed, I will use my existing knowledge to help the user. "
+            "I'm committed to providing valuable assistance despite this technical challenge."
+        )
+    },
+    "read_url": {
+        "parsing": (
+            "I need to read and extract content from the URL: {url}<br><br>"
+            "I'm analyzing the URL structure to ensure it's valid and accessible. "
+            "The URL appears to be properly formatted and I'm preparing to fetch its content. "
+            "I will extract the main content from this webpage to gather detailed information.<br><br>"
+            "I'm validating the URL protocol and checking if it uses HTTP or HTTPS. "
+            "The domain seems legitimate and I'm preparing the request headers. "
+            "I need to ensure that the website allows automated content extraction.<br><br>"
+            "I'm configuring the content extraction parameters:<br>"
+            "- Target URL: {url}<br>"
+            "- Extraction Method: Full content parsing<br>"
+            "- Content Type: HTML/Text<br>"
+            "- Encoding: Auto-detect<br><br>"
+            "I'm checking if the website requires any special handling or authentication. "
+            "All preliminary validation checks have been completed successfully."
+        ),
+        "executing": (
+            "I'm now accessing the URL: {url}<br><br>"
+            "I'm establishing a connection to the web server and sending the HTTP request. "
+            "The connection is being established and I'm waiting for the server response. "
+            "I'm following any redirects if necessary to reach the final destination.<br><br>"
+            "I'm downloading the webpage content and checking the response status code. "
+            "The server is responding and I'm receiving the HTML content. "
+            "I'm monitoring the download progress and ensuring data integrity.<br><br>"
+            "I'm parsing the HTML structure to extract the main content. "
+            "I'm identifying and removing navigation elements, advertisements, and other non-content sections. "
+            "I'm focusing on extracting the primary article or information content.<br><br>"
+            "Current status: Extracting content...<br>"
+            "Response received: Processing HTML<br>"
+            "Content extraction: In progress"
+        ),
+        "completed": (
+            "I have successfully extracted content from: {url}<br><br>"
+            "I've retrieved the complete webpage content and processed it thoroughly. "
+            "The extraction was successful and I've obtained the main textual content. "
+            "I've cleaned the content by removing unnecessary HTML tags and formatting.<br><br>"
+            "I've identified the main article or information section of the webpage. "
+            "The content has been properly parsed and structured for analysis. "
+            "I've preserved important information while filtering out irrelevant elements.<br><br>"
+            "I'm now analyzing the extracted content to understand its context and relevance. "
+            "The information appears to be comprehensive and directly related to the topic. "
+            "I've verified that the content is complete and hasn't been truncated.<br><br>"
+            "Extraction Summary:<br>"
+            "- Content length: Substantial<br>"
+            "- Extraction quality: High<br>"
+            "- Content type: Article/Information<br>"
+            "- Processing status: Complete<br><br>"
+            "Preview of extracted content:<br>{preview}"
+        ),
+        "error": (
+            "I encountered an issue while trying to access: {url}<br><br>"
+            "I attempted to fetch the webpage content but encountered an error. "
+            "The error prevented me from successfully extracting the information. "
+            "I'm analyzing the error to understand the cause and find a solution.<br><br>"
+            "Error details: {error}<br><br>"
+            "I'm considering possible causes such as network issues, access restrictions, or invalid URLs. "
+            "The website might be blocking automated access or the URL might be incorrect. "
+            "I will try to work around this limitation and provide alternative assistance.<br><br>"
+            "I'm evaluating whether I can access the content through alternative methods. "
+            "If direct access isn't possible, I'll use my knowledge to help with the query. "
+            "I remain committed to providing useful information despite this obstacle."
+        )
+    }
+}
+REASONING_DEFAULT = "I'm processing the tool execution request..."
+REASONING_DELAY = 0.01  # 10 ms
+OS = [
+    "Windows NT 10.0; Win64; x64",
+    "Macintosh; Intel Mac OS X 10_15_7",
+    "X11; Linux x86_64",
+    "Windows NT 11.0; Win64; x64",
+    "Macintosh; Intel Mac OS X 11_6_2"
+]
+OCTETS = [
+     1,   2,   3,   4,   5,   8,  12,  13,  14,  15,
+    16,  17,  18,  19,  20,  23,  24,  34,  35,  36,
+    37,  38,  39,  40,  41,  42,  43,  44,  45,  46,
+    47,  48,  49,  50,  51,  52,  53,  54,  55,  56,
+    57,  58,  59,  60,  61,  62,  63,  64,  65,  66,
+    67,  68,  69,  70,  71,  72,  73,  74,  75,  76,
+    77,  78,  79,  80,  81,  82,  83,  84,  85,  86,
+    87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
+    97,  98,  99, 100, 101, 102, 103, 104, 105, 106,
+   107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
+   117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
+   128, 129, 130, 131, 132, 133, 134, 135, 136, 137,
+   138, 139, 140, 141, 142, 143, 144, 145, 146, 147,
+   148, 149, 150, 151, 152, 153, 154, 155, 156, 157,
+   158, 159, 160, 161, 162, 163, 164, 165, 166, 167,
+   168, 170, 171, 172, 173, 174, 175, 176, 177, 178,
+   179, 180, 181, 182, 183, 184, 185, 186, 187, 188,
+   189, 190, 191, 192, 193, 194, 195, 196, 197, 198,
+   199, 200, 201, 202, 203, 204, 205, 206, 207, 208,
+   209, 210, 211, 212, 213, 214, 215, 216, 217, 218,
+   219, 220, 221, 222, 223
+]
+BROWSERS = [
+    "Chrome",
+    "Firefox",
+    "Safari",
+    "Edge",
+    "Opera"
+]
+CHROME_VERSIONS = [
+    "120.0.0.0",
+    "119.0.0.0",
+    "118.0.0.0",
+    "117.0.0.0",
+    "116.0.0.0"
+]
+FIREFOX_VERSIONS = [
+    "121.0",
+    "120.0",
+    "119.0",
+    "118.0",
+    "117.0"
+]
+SAFARI_VERSIONS = [
+    "17.1",
+    "17.0",
+    "16.6",
+    "16.5",
+    "16.4",
+]
+EDGE_VERSIONS = [
+    "120.0.2210.91",
+    "119.0.2151.97",
+    "118.0.2088.76",
+    "117.0.2045.60",
+    "116.0.1938.81"
+]
+DOMAINS = [
+    "google.com",
+    "bing.com",
+    "yahoo.com",
+    "duckduckgo.com",
+    "baidu.com",
+    "yandex.com",
+    "facebook.com",
+    "twitter.com",
+    "linkedin.com",
+    "reddit.com",
+    "youtube.com",
+    "wikipedia.org",
+    "amazon.com",
+    "github.com",
+    "stackoverflow.com",
+    "medium.com",
+    "quora.com",
+    "pinterest.com",
+    "instagram.com",
+    "tumblr.com"
+]
+PROTOCOLS = [
+    "https://",
+    "https://www."
+]
+SEARCH_ENGINES = [
+    "https://www.google.com/search?q=",
+    "https://www.bing.com/search?q=",
+    "https://search.yahoo.com/search?p=",
+    "https://duckduckgo.com/?q=",
+    "https://www.baidu.com/s?wd=",
+    "https://yandex.com/search/?text=",
+    "https://www.google.co.uk/search?q=",
+    "https://www.google.ca/search?q=",
+    "https://www.google.com.au/search?q=",
+    "https://www.google.de/search?q=",
+    "https://www.google.fr/search?q=",
+    "https://www.google.co.jp/search?q=",
+    "https://www.google.com.br/search?q=",
+    "https://www.google.co.in/search?q=",
+    "https://www.google.ru/search?q=",
+    "https://www.google.it/search?q="
+]
+KEYWORDS = [
+    "news",
+    "weather",
+    "sports",
+    "technology",
+    "science",
+    "health",
+    "finance",
+    "entertainment",
+    "travel",
+    "food",
+    "education",
+    "business",
+    "politics",
+    "culture",
+    "history",
+    "music",
+    "movies",
+    "games",
+    "books",
+    "art"
+]
+COUNTRIES = [
+    "US", "GB", "CA", "AU", "DE", "FR", "JP", "BR", "IN", "RU",
+    "IT", "ES", "MX", "NL", "SE", "NO", "DK", "FI", "PL", "TR",
+    "KR", "SG", "HK", "TW", "TH", "ID", "MY", "PH", "VN", "AR",
+    "CL", "CO", "PE", "VE", "EG", "ZA", "NG", "KE", "MA", "DZ",
+    "TN", "IL", "AE", "SA", "QA", "KW", "BH", "OM", "JO", "LB"
+]
+LANGUAGES = [
+    "en-US", "en-GB", "en-CA", "en-AU", "de-DE", "fr-FR", "ja-JP",
+    "pt-BR", "hi-IN", "ru-RU", "it-IT", "es-ES", "es-MX", "nl-NL",
+    "sv-SE", "no-NO", "da-DK", "fi-FI", "pl-PL", "tr-TR", "ko-KR",
+    "zh-CN", "zh-TW", "th-TH", "id-ID", "ms-MY", "fil-PH", "vi-VN",
+    "es-AR", "es-CL", "es-CO", "es-PE", "es-VE", "ar-EG", "en-ZA",
+    "en-NG", "sw-KE", "ar-MA", "ar-DZ", "ar-TN", "he-IL", "ar-AE",
+    "ar-SA", "ar-QA", "ar-KW", "ar-BH", "ar-OM", "ar-JO", "ar-LB"
+]
+TIMEZONES = [
+    "America/New_York",
+    "America/Chicago",
+    "America/Los_Angeles",
+    "America/Denver",
+    "Europe/London",
+    "Europe/Paris",
+    "Europe/Berlin",
+    "Europe/Moscow",
+    "Asia/Tokyo",
+    "Asia/Shanghai",
+    "Asia/Hong_Kong",
+    "Asia/Singapore",
+    "Asia/Seoul",
+    "Asia/Mumbai",
+    "Asia/Dubai",
+    "Australia/Sydney",
+    "Australia/Melbourne",
+    "America/Toronto",
+    "America/Vancouver",
+    "America/Mexico_City",
+    "America/Sao_Paulo",
+    "America/Buenos_Aires",
+    "Africa/Cairo",
+    "Africa/Johannesburg",
+    "Africa/Lagos",
+    "Africa/Nairobi",
+    "Pacific/Auckland",
+    "Pacific/Honolulu"
+]
+DESCRIPTION = """
+<b>SearchGPT</b> is <b>ChatGPT</b> with real-time web search capabilities and the ability to read content directly from a URL.
+<br><br>
+This Space implements an agent-based system with <b><a href="https://www.gradio.app" target="_blank">Gradio</a></b>. It is integrated with
+<b><a href="https://docs.searxng.org" target="_blank">SearXNG</a></b>, which is then converted into a script tool or function for native execution.
+<br><br>
+The agent mode is inspired by the <b><a href="https://openwebui.com/t/hadad/deep_research" target="_blank">Deep Research</a></b> from
+<b><a href="https://docs.openwebui.com" target="_blank">OpenWebUI</a></b> tools script.
+<br><br>
+The <b>Deep Research</b> feature is also available on the primary Spaces of <b><a href="https://umint-openwebui.hf.space"
+target="_blank">UltimaX Intelligence</a></b>.
+<br><br>
+Please consider reading the <b><a href="https://huggingface.co/spaces/umint/ai/discussions/37#68b55209c51ca52ed299db4c"
+target="_blank">Terms of Use and Consequences of Violation</a></b> if you wish to proceed to the main Spaces.
+<br><br>
+<b>Like this project? Feel free to buy me a <a href="https://ko-fi.com/hadad" target="_blank">coffee</a></b>.
+"""  # Gradio

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio[oauth,mcp]
+openai
+aiohttp[speedups]

src/client/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+from .openai_client import initialize_client
+__all__ = ['initialize_client']

src/client/openai_client.py ADDED Viewed

	@@ -0,0 +1,17 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+import os
+from openai import OpenAI
+def initialize_client():
+    try:
+        client = OpenAI(
+            base_url=os.getenv("OPENAI_API_BASE_URL"),
+            api_key=os.getenv("OPENAI_API_KEY")
+        )
+        return client, None
+    except Exception as initialization_error:
+        return None, f"Failed to initialize client: {str(initialization_error)}"

src/core/__init__.py ADDED Viewed

	@@ -0,0 +1,12 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+from .web_loader import WebLoader
+from .web_configuration import WebConfiguration
+__all__ = [
+    'WebLoader',
+    'WebConfiguration'
+]

src/core/web_configuration.py ADDED Viewed

	@@ -0,0 +1,13 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+from config import SEARXNG_ENDPOINT, BAIDU_ENDPOINT, READER_ENDPOINT, REQUEST_TIMEOUT
+class WebConfiguration:
+    def __init__(self):
+        self.searxng_endpoint = SEARXNG_ENDPOINT
+        self.baidu_endpoint = BAIDU_ENDPOINT
+        self.content_reader_api = READER_ENDPOINT
+        self.request_timeout = REQUEST_TIMEOUT

src/core/web_loader.py ADDED Viewed

	@@ -0,0 +1,188 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+import random
+import threading
+import time
+from collections import deque
+from config import (
+    OS,
+    OCTETS,
+    BROWSERS,
+    CHROME_VERSIONS,
+    FIREFOX_VERSIONS,
+    SAFARI_VERSIONS,
+    EDGE_VERSIONS,
+    DOMAINS,
+    PROTOCOLS,
+    SEARCH_ENGINES,
+    KEYWORDS,
+    COUNTRIES,
+    LANGUAGES,
+    TIMEZONES
+)
+class WebLoader:
+    def __init__(self):
+        self.ipv4_pool = deque(maxlen=1000)
+        self.ipv6_pool = deque(maxlen=1000)
+        self.user_agent_pool = deque(maxlen=500)
+        self.origin_pool = deque(maxlen=500)
+        self.referrer_pool = deque(maxlen=500)
+        self.location_pool = deque(maxlen=500)
+        self.lock = threading.Lock()
+        self.running = True
+    def generate_ipv4(self):
+        while len(self.ipv4_pool) < 1000 and self.running:
+            octet = random.choice(OCTETS)
+            ip = f"{octet}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(1, 254)}"
+            with self.lock:
+                self.ipv4_pool.append(ip)
+            time.sleep(0.001)
+    def generate_ipv6(self):
+        while len(self.ipv6_pool) < 1000 and self.running:
+            segments = []
+            for _ in range(8):
+                segments.append(f"{random.randint(0, 65535):04x}")
+            ip = ":".join(segments)
+            with self.lock:
+                self.ipv6_pool.append(ip)
+            time.sleep(0.001)
+    def generate_user_agents(self):
+        os_list = OS
+        browsers = BROWSERS
+        chrome_versions = CHROME_VERSIONS
+        firefox_versions = FIREFOX_VERSIONS
+        safari_versions = SAFARI_VERSIONS
+        edge_versions = EDGE_VERSIONS
+        while len(self.user_agent_pool) < 500 and self.running:
+            browser = random.choice(browsers)
+            os_string = random.choice(os_list)
+            if browser == "Chrome":
+                version = random.choice(chrome_versions)
+                ua = f"Mozilla/5.0 ({os_string}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{version} Safari/537.36"
+            elif browser == "Firefox":
+                version = random.choice(firefox_versions)
+                ua = f"Mozilla/5.0 ({os_string}) Gecko/20100101 Firefox/{version}"
+            elif browser == "Safari":
+                version = random.choice(safari_versions)
+                webkit_version = f"{600 + random.randint(0, 15)}.{random.randint(1, 9)}.{random.randint(1, 20)}"
+                ua = f"Mozilla/5.0 ({os_string}) AppleWebKit/{webkit_version} (KHTML, like Gecko) Version/{version} Safari/{webkit_version}"
+            elif browser == "Edge":
+                version = random.choice(edge_versions)
+                ua = f"Mozilla/5.0 ({os_string}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{version.split('.')[0]}.0.0.0 Safari/537.36 Edg/{version}"
+            else:
+                version = f"{random.randint(70, 100)}.0.{random.randint(3000, 5000)}.{random.randint(50, 150)}"
+                ua = f"Mozilla/5.0 ({os_string}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{version} Safari/537.36 OPR/{random.randint(80, 106)}.0.0.0"
+            with self.lock:
+                self.user_agent_pool.append(ua)
+            time.sleep(0.002)
+    def generate_origins(self):
+        domains = DOMAINS
+        protocols = PROTOCOLS
+        while len(self.origin_pool) < 500 and self.running:
+            protocol = random.choice(protocols)
+            domain = random.choice(domains)
+            origin = f"{protocol}{domain}"
+            with self.lock:
+                self.origin_pool.append(origin)
+            time.sleep(0.002)
+    def generate_referrers(self):
+        search_engines = SEARCH_ENGINES
+        keywords = KEYWORDS
+        while len(self.referrer_pool) < 500 and self.running:
+            engine = random.choice(search_engines)
+            keyword = random.choice(keywords)
+            referrer = f"{engine}{keyword}"
+            with self.lock:
+                self.referrer_pool.append(referrer)
+            time.sleep(0.002)
+    def generate_locations(self):
+        countries = COUNTRIES
+        languages = LANGUAGES
+        timezones = TIMEZONES
+        while len(self.location_pool) < 500 and self.running:
+            country = random.choice(countries)
+            language = random.choice(languages)
+            timezone = random.choice(timezones)
+            location = {
+                "country": country,
+                "language": language,
+                "timezone": timezone
+            }
+            with self.lock:
+                self.location_pool.append(location)
+            time.sleep(0.002)
+    def get_ipv4(self):
+        with self.lock:
+            if self.ipv4_pool:
+                return self.ipv4_pool[random.randint(0, len(self.ipv4_pool) - 1)]
+        return f"{random.randint(1, 223)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(1, 254)}"
+    def get_ipv6(self):
+        with self.lock:
+            if self.ipv6_pool:
+                return self.ipv6_pool[random.randint(0, len(self.ipv6_pool) - 1)]
+        segments = [f"{random.randint(0, 65535):04x}" for _ in range(8)]
+        return ":".join(segments)
+    def get_user_agent(self):
+        with self.lock:
+            if self.user_agent_pool:
+                return self.user_agent_pool[random.randint(0, len(self.user_agent_pool) - 1)]
+        return "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
+    def get_origin(self):
+        with self.lock:
+            if self.origin_pool:
+                return self.origin_pool[random.randint(0, len(self.origin_pool) - 1)]
+        return "https://www.google.com"
+    def get_referrer(self):
+        with self.lock:
+            if self.referrer_pool:
+                return self.referrer_pool[random.randint(0, len(self.referrer_pool) - 1)]
+        return "https://www.google.com/search?q=search"
+    def get_location(self):
+        with self.lock:
+            if self.location_pool:
+                return self.location_pool[random.randint(0, len(self.location_pool) - 1)]
+        return {
+            "country": "US",
+            "language": "en-US",
+            "timezone": "America/New_York"
+        }
+    def start_engine(self):
+        threads = [
+            threading.Thread(target=self.generate_ipv4, daemon=True),
+            threading.Thread(target=self.generate_ipv6, daemon=True),
+            threading.Thread(target=self.generate_user_agents, daemon=True),
+            threading.Thread(target=self.generate_origins, daemon=True),
+            threading.Thread(target=self.generate_referrers, daemon=True),
+            threading.Thread(target=self.generate_locations, daemon=True)
+        ]
+        for thread in threads:
+            thread.start()
+    def stop(self):
+        self.running = False
+web_loader = WebLoader()
+web_loader.start_engine()

src/engine/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+from .browser_engine import BrowserEngine
+__all__ = ['BrowserEngine']

src/engine/browser_engine.py ADDED Viewed

	@@ -0,0 +1,139 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+import asyncio
+import aiohttp
+import requests
+from urllib.parse import quote
+from config import CONTENT_EXTRACTION, SEARCH_SELECTION
+from src.core.web_loader import web_loader
+class BrowserEngine:
+    def __init__(self, configuration):
+        self.config = configuration
+    def generate_headers(self):
+        ipv4 = web_loader.get_ipv4()
+        ipv6 = web_loader.get_ipv6()
+        user_agent = web_loader.get_user_agent()
+        origin = web_loader.get_origin()
+        referrer = web_loader.get_referrer()
+        location = web_loader.get_location()
+        return {
+            "User-Agent": user_agent,
+            "X-Forwarded-For": f"{ipv4}, {ipv6}",
+            "X-Real-IP": ipv4,
+            "X-Originating-IP": ipv4,
+            "X-Remote-IP": ipv4,
+            "X-Remote-Addr": ipv4,
+            "X-Client-IP": ipv4,
+            "X-Forwarded-Host": origin.replace("https://", "").replace("http://", ""),
+            "Origin": origin,
+            "Referer": referrer,
+            "Accept-Language": f"{location['language']},en;q=0.9",
+            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
+            "Accept-Encoding": "gzip, deflate, br",
+            "DNT": "1",
+            "Connection": "keep-alive",
+            "Upgrade-Insecure-Requests": "1",
+            "Sec-Fetch-Dest": "document",
+            "Sec-Fetch-Mode": "navigate",
+            "Sec-Fetch-Site": "cross-site",
+            "Sec-Fetch-User": "?1",
+            "Cache-Control": "max-age=0",
+            "X-Country": location['country'],
+            "X-Timezone": location['timezone']
+        }
+    def _build_search_url_and_selector(self, search_query: str, search_provider: str = "google"):
+        if search_provider == "baidu":
+            return (
+                f"{self.config.content_reader_api}{self.config.baidu_endpoint}?wd={quote(search_query)}",
+                "#content_left"
+            )
+        provider_prefix = "!go" if search_provider == "google" else "!bi"
+        return (
+            f"{self.config.content_reader_api}{self.config.searxng_endpoint}?q={quote(f'{provider_prefix} {search_query}')}",
+            "#urls"
+        )
+    async def _async_post(self, url: str, data: dict, headers: dict):
+        timeout = aiohttp.ClientTimeout(total=self.config.request_timeout)
+        async with aiohttp.ClientSession(timeout=timeout) as session:
+            async with session.post(url, data=data, headers=headers) as response:
+                text = await response.text()
+                if response.status >= 400:
+                    raise aiohttp.ClientResponseError(
+                        request_info=response.request_info,
+                        history=response.history,
+                        status=response.status,
+                        message=text,
+                        headers=response.headers
+                    )
+                return text
+    async def _async_get(self, url: str, headers: dict):
+        timeout = aiohttp.ClientTimeout(total=self.config.request_timeout)
+        async with aiohttp.ClientSession(timeout=timeout) as session:
+            async with session.get(url, headers=headers) as response:
+                text = await response.text()
+                if response.status >= 400:
+                    raise aiohttp.ClientResponseError(
+                        request_info=response.request_info,
+                        history=response.history,
+                        status=response.status,
+                        message=text,
+                        headers=response.headers
+                    )
+                return text
+    def _sync_post(self, url: str, data: dict, headers: dict):
+        response = requests.post(url, data=data, headers=headers, timeout=self.config.request_timeout)
+        response.raise_for_status()
+        return response.text
+    def _sync_get(self, url: str, headers: dict):
+        response = requests.get(url, headers=headers, timeout=self.config.request_timeout)
+        response.raise_for_status()
+        return response.text
+    async def async_extract_page_content(self, target_url: str) -> str:
+        headers = self.generate_headers()
+        payload = {"url": target_url}
+        extracted_content = await self._async_post(self.config.content_reader_api, payload, headers)
+        return f"{extracted_content}\n\n\n{CONTENT_EXTRACTION}\n\n\n"
+    def extract_page_content(self, target_url: str) -> str:
+        try:
+            return asyncio.run(self.async_extract_page_content(target_url))
+        except Exception:
+            try:
+                headers = self.generate_headers()
+                payload = {"url": target_url}
+                extracted_content = self._sync_post(self.config.content_reader_api, payload, headers)
+                return f"{extracted_content}\n\n\n{CONTENT_EXTRACTION}\n\n\n"
+            except Exception as error:
+                return f"Error reading URL: {str(error)}"
+    async def async_perform_search(self, search_query: str, search_provider: str = "google") -> str:
+        headers = self.generate_headers()
+        full_url, selector = self._build_search_url_and_selector(search_query, search_provider)
+        headers["X-Target-Selector"] = selector
+        search_results = await self._async_get(full_url, headers)
+        return f"{search_results}\n\n\n{SEARCH_SELECTION}\n\n\n"
+    def perform_search(self, search_query: str, search_provider: str = "google") -> str:
+        try:
+            return asyncio.run(self.async_perform_search(search_query, search_provider))
+        except Exception:
+            try:
+                headers = self.generate_headers()
+                full_url, selector = self._build_search_url_and_selector(search_query, search_provider)
+                headers["X-Target-Selector"] = selector
+                search_results = self._sync_get(full_url, headers)
+                return f"{search_results}\n\n\n{SEARCH_SELECTION}\n\n\n"
+            except Exception as error:
+                return f"Error during search: {str(error)}"

src/processor/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+from .message_processor import process_user_request
+__all__ = ['process_user_request']

src/processor/message_processor.py ADDED Viewed

	@@ -0,0 +1,77 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+import traceback
+from config import MODEL
+from src.core.web_configuration import WebConfiguration
+from src.engine.browser_engine import BrowserEngine
+from src.tools.tool_manager import construct_tool_definitions
+from src.client.openai_client import initialize_client
+from .response.setup import setup_response
+from .response.generator import generate_response
+from .tools.interaction import process_tool_interactions
+def process_user_request(user_message, chat_history):
+    if not isinstance(user_message, str) or not user_message.strip():
+        yield []
+        return
+    output_content = ""
+    try:
+        server, client_initialization_error = initialize_client()
+        if client_initialization_error:
+            output_content = client_initialization_error
+            yield output_content
+            return
+        search_configuration = WebConfiguration()
+        search_engine_instance = BrowserEngine(search_configuration)
+        available_tools = construct_tool_definitions()
+        conversation_messages = setup_response(
+            chat_history,
+            user_message
+        )
+        tool_response = ""
+        tools_done = False
+        for tool_update in process_tool_interactions(
+            server=server,
+            model_name=MODEL,
+            conversation_messages=conversation_messages,
+            tool_definitions=available_tools,
+            search_engine=search_engine_instance
+        ):
+            if isinstance(tool_update, str):
+                tool_response = tool_update
+                yield tool_response
+            else:
+                conversation_messages = tool_update[0]
+                tool_response = tool_update[1]
+                tools_done = tool_update[2]
+        if tool_response:
+            yield tool_response + "\n\n"
+        final_response_generator = generate_response(
+            server=server,
+            model_name=MODEL,
+            conversation_messages=conversation_messages,
+            tool_definitions=available_tools,
+            tools_done=tools_done
+        )
+        for final_response in final_response_generator:
+            if tool_response:
+                yield tool_response + "\n\n" + final_response
+            else:
+                yield final_response
+    except Exception as processing_error:
+        output_content += f"\nError: {str(processing_error)}\n"
+        output_content += traceback.format_exc()
+        yield output_content

src/processor/reasoning/__init__.py ADDED Viewed

	@@ -0,0 +1,12 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+from .interface import reasoning_interfaces
+from .tool_reasoning import tool_reasoning
+__all__ = [
+    'reasoning_interfaces',
+    'tool_reasoning'
+]

src/processor/reasoning/interface.py ADDED Viewed

	@@ -0,0 +1,18 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+def reasoning_interfaces(text, current_length=0):
+    if current_length < len(text):
+        reasoning_steps = text[:current_length]
+        if current_length > 0 and not reasoning_steps.endswith((
+            '<br>',
+            '<br><br>'
+        )):
+            reasoning_steps += '...'
+        return reasoning_steps
+    return text

src/processor/reasoning/tool_reasoning.py ADDED Viewed

	@@ -0,0 +1,38 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+from config import REASONING_STEPS, REASONING_DEFAULT
+def tool_reasoning(tool_name, tool_arguments, stage, error=None, result=None):
+    if tool_name == "web_search":
+        query = tool_arguments.get("query", "") if tool_arguments else ""
+        engine = tool_arguments.get("engine", "google") if tool_arguments else "google"
+        template = REASONING_STEPS.get("web_search", {}).get(stage)
+        if template:
+            if stage == "completed":
+                preview = result[:300] + "..." if result and len(result) > 300 else result
+                return template.format(query=query, engine=engine, preview=preview)
+            elif stage == "error":
+                return template.format(query=query, engine=engine, error=error)
+            else:
+                return template.format(query=query, engine=engine)
+    elif tool_name == "read_url":
+        url = tool_arguments.get("url", "") if tool_arguments else ""
+        template = REASONING_STEPS.get("read_url", {}).get(stage)
+        if template:
+            if stage == "completed":
+                preview = result[:300] + "..." if result and len(result) > 300 else result
+                return template.format(url=url, preview=preview)
+            elif stage == "error":
+                return template.format(url=url, error=error)
+            else:
+                return template.format(url=url)
+    return REASONING_DEFAULT

src/processor/response/__init__.py ADDED Viewed

	@@ -0,0 +1,14 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+from .setup import setup_response
+from .formatter import assistant_response
+from .generator import generate_response
+__all__ = [
+    'setup_response',
+    'assistant_response',
+    'generate_response'
+]

src/processor/response/formatter.py ADDED Viewed

	@@ -0,0 +1,26 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+def assistant_response(response_message):
+    extracted_tool_calls = []
+    if getattr(response_message, "tool_calls", None):
+        for tool_call in response_message.tool_calls:
+            extracted_tool_calls.append(
+                {
+                    "id": tool_call.id,
+                    "type": "function",
+                    "function": {
+                        "name": tool_call.function.name,
+                        "arguments": tool_call.function.arguments
+                    }
+                }
+            )
+    return {
+        "role": "assistant",
+        "content": response_message.content or "",
+        "tool_calls": extracted_tool_calls if extracted_tool_calls else None
+    }

src/processor/response/generator.py ADDED Viewed

	@@ -0,0 +1,51 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+import traceback
+from config import MAX_TOKENS, INSTRUCTIONS_END
+def generate_response(
+    server,
+    model_name,
+    conversation_messages,
+    tool_definitions,
+    tools_done=False
+):
+    response_generator = ""
+    if tools_done:
+        system_reminder = {
+            "role": "system",
+            "content": INSTRUCTIONS_END
+        }
+        conversation_messages.append(system_reminder)
+    try:
+        response = server.chat.completions.create(
+            model=model_name,
+            messages=conversation_messages,
+            tools=tool_definitions if not tools_done else None,
+            tool_choice="none",
+            max_tokens=MAX_TOKENS,
+            temperature=0.75,
+            stream=True
+        )
+        for data in response:
+            try:
+                raw_data = data.choices[0].delta.content or ""
+            except Exception:
+                raw_data = ""
+            if raw_data:
+                response_generator += raw_data
+                yield response_generator
+        yield response_generator
+    except Exception as response_error:
+        response_generator += f"\nError: {str(response_error)}\n"
+        response_generator += traceback.format_exc()
+        yield response_generator

src/processor/response/setup.py ADDED Viewed

	@@ -0,0 +1,34 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+from ...utils.time import get_current_time
+from config import INSTRUCTIONS_START
+def setup_response(conversation_history, user_input):
+    history = []
+    history.insert(
+        0,
+        {
+            "role": "system",
+            "content": (
+                f"Today is: {get_current_time()}"
+                + "\n\n\n"
+                + INSTRUCTIONS_START
+            )
+        }
+    )
+    if isinstance(conversation_history, list):
+        for history_item in conversation_history:
+            message_role = history_item.get("role")
+            message_content = history_item.get("content")
+            if message_role in ("user", "assistant") and isinstance(message_content, str):
+                history.append({"role": message_role, "content": message_content})
+    if isinstance(user_input, str) and user_input.strip():
+        history.append({"role": "user", "content": user_input})
+    return history

src/processor/tools/__init__.py ADDED Viewed

	@@ -0,0 +1,14 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+from .parser import extract_tool_parameters
+from .executor import invoke_tool_function
+from .interaction import process_tool_interactions
+__all__ = [
+    'extract_tool_parameters',
+    'invoke_tool_function',
+    'process_tool_interactions'
+]

src/processor/tools/executor.py ADDED Viewed

	@@ -0,0 +1,16 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+def invoke_tool_function(search_engine, function_name, function_params):
+    if function_name == "web_search":
+        return search_engine.perform_search(
+            search_query=function_params.get("query", ""),
+            search_provider=function_params.get("engine", "google")
+        )
+    if function_name == "read_url":
+        return search_engine.extract_page_content(
+            target_url=function_params.get("url", "")
+        )
+    return f"Unknown tool: {function_name}"

src/processor/tools/interaction.py ADDED Viewed

	@@ -0,0 +1,225 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+import time
+from assets.css.reasoning import styles
+from ..response.formatter import assistant_response
+from ..reasoning.interface import reasoning_interfaces
+from ..reasoning.tool_reasoning import tool_reasoning
+from .parser import extract_tool_parameters
+from .executor import invoke_tool_function
+from config import MAX_TOKENS, REASONING_DELAY
+def process_tool_interactions(server, model_name, conversation_messages, tool_definitions, search_engine):
+    maximum_iterations = 1
+    max_retry_limit = 10
+    retry_count = 0
+    logs_generator = ""
+    tool_results = []
+    execution_success = False
+    last_error = None
+    error_history = []
+    iteration_metrics = {
+        "attempts": 0,
+        "failures": 0,
+        "success_rate": 0,
+        "error_patterns": {},
+        "retry_delays": [
+            0.02,
+            0.03,
+            0.04,
+            0.05,
+            0.06,
+            0.07
+        ],
+        "backoff_multiplier": 1.0
+    }
+    while maximum_iterations <= max_retry_limit and not execution_success:
+        iteration_metrics["attempts"] += 1
+        current_iteration_successful = False
+        iteration_errors = []
+        for iteration_index in range(maximum_iterations):
+            try:
+                retry_delay = iteration_metrics["retry_delays"][min(retry_count, len(iteration_metrics["retry_delays"]) - 1)]
+                if retry_count > 0:
+                    time.sleep(retry_delay * iteration_metrics["backoff_multiplier"])
+                model_response = server.chat.completions.create(
+                    model=model_name,
+                    messages=conversation_messages,
+                    tools=tool_definitions,
+                    tool_choice="auto",
+                    max_tokens=MAX_TOKENS,
+                    temperature=0.6
+                )
+                response_choice = model_response.choices[0]
+                assistant_message = response_choice.message
+                formatted_assistant_message = assistant_response(assistant_message)
+                conversation_messages.append(
+                    {
+                        "role": formatted_assistant_message["role"],
+                        "content": formatted_assistant_message["content"],
+                        "tool_calls": formatted_assistant_message["tool_calls"]
+                    }
+                )
+                pending_tool_calls = assistant_message.tool_calls or []
+                if not pending_tool_calls:
+                    if logs_generator:
+                        logs_generator = styles(logs_generator.replace('<br>', '\n').strip(), expanded=False)
+                    execution_success = True
+                    current_iteration_successful = True
+                    break
+                tool_execution_errors = []
+                for tool_invocation in pending_tool_calls:
+                    tool_name = tool_invocation.function.name
+                    tool_arguments_raw = tool_invocation.function.arguments
+                    extracted_arguments, extraction_error = extract_tool_parameters(tool_arguments_raw)
+                    if extraction_error:
+                        error_key = f"{tool_name}_extraction"
+                        iteration_metrics["error_patterns"][error_key] = iteration_metrics["error_patterns"].get(error_key, 0) + 1
+                        tool_execution_errors.append({
+                            "tool": tool_name,
+                            "error": extraction_error,
+                            "type": "extraction"
+                        })
+                        reasoning_error = tool_reasoning(tool_name, None, "error", error=extraction_error)
+                        for i in range(0, len(reasoning_error), 10):
+                            logs_generator = styles(reasoning_interfaces(reasoning_error, i), expanded=True)
+                            yield logs_generator
+                            time.sleep(REASONING_DELAY)
+                        logs_generator = styles(reasoning_error, expanded=True)
+                        yield logs_generator
+                        tool_execution_result = extraction_error
+                    else:
+                        reasoning_status = tool_reasoning(tool_name, extracted_arguments, "parsing")
+                        for i in range(0, len(reasoning_status), 10):
+                            logs_generator = styles(reasoning_interfaces(reasoning_status, i), expanded=True)
+                            yield logs_generator
+                            time.sleep(REASONING_DELAY)
+                        reasoning_start = tool_reasoning(tool_name, extracted_arguments, "executing")
+                        for i in range(0, len(reasoning_start), 10):
+                            logs_generator = styles(reasoning_interfaces(reasoning_start, i), expanded=True)
+                            yield logs_generator
+                            time.sleep(REASONING_DELAY)
+                        try:
+                            tool_execution_result = invoke_tool_function(
+                                search_engine,
+                                tool_name,
+                                extracted_arguments
+                            )
+                            tool_results.append({
+                                "tool": tool_name,
+                                "arguments": extracted_arguments,
+                                "result": tool_execution_result,
+                                "iteration": maximum_iterations,
+                                "retry_count": retry_count
+                            })
+                            reasoning_done = tool_reasoning(tool_name, extracted_arguments, "completed", result=tool_execution_result)
+                            for i in range(0, len(reasoning_done), 10):
+                                logs_generator = styles(reasoning_interfaces(reasoning_done, i), expanded=True)
+                                yield logs_generator
+                                time.sleep(REASONING_DELAY)
+                            logs_generator = styles(reasoning_done, expanded=False)
+                            yield logs_generator
+                        except Exception as tool_error:
+                            error_key = f"{tool_name}_execution"
+                            iteration_metrics["error_patterns"][error_key] = iteration_metrics["error_patterns"].get(error_key, 0) + 1
+                            tool_execution_errors.append({
+                                "tool": tool_name,
+                                "error": str(tool_error),
+                                "type": "execution",
+                                "arguments": extracted_arguments
+                            })
+                            reasoning_error = tool_reasoning(tool_name, extracted_arguments, "error", error=str(tool_error))
+                            for i in range(0, len(reasoning_error), 10):
+                                logs_generator = styles(reasoning_interfaces(reasoning_error, i), expanded=True)
+                                yield logs_generator
+                                time.sleep(REASONING_DELAY)
+                            logs_generator = styles(reasoning_error, expanded=True)
+                            yield logs_generator
+                            tool_execution_result = str(tool_error)
+                    conversation_messages.append(
+                        {
+                            "role": "tool",
+                            "tool_call_id": tool_invocation.id,
+                            "name": tool_name,
+                            "content": tool_execution_result
+                        }
+                    )
+                if not tool_execution_errors:
+                    execution_success = True
+                    current_iteration_successful = True
+                    break
+                else:
+                    iteration_errors.extend(tool_execution_errors)
+            except Exception as model_error:
+                last_error = str(model_error)
+                error_history.append({
+                    "iteration": maximum_iterations,
+                    "error": last_error,
+                    "timestamp": time.time()
+                })
+                iteration_metrics["failures"] += 1
+                iteration_errors.append({
+                    "error": last_error,
+                    "type": "model"
+                })
+        if current_iteration_successful:
+            execution_success = True
+            break
+        else:
+            if iteration_errors:
+                error_history.extend(iteration_errors)
+            retry_count += 1
+            previous_iterations = maximum_iterations
+            if iteration_metrics["error_patterns"]:
+                frequent_errors = max(iteration_metrics["error_patterns"].values())
+                if frequent_errors > 3:
+                    maximum_iterations = min(maximum_iterations + 2, max_retry_limit)
+                else:
+                    maximum_iterations = min(maximum_iterations + 1, max_retry_limit)
+            else:
+                maximum_iterations = min(maximum_iterations + 1, max_retry_limit)
+            if maximum_iterations > previous_iterations:
+                retry_reasoning = f"Retrying with increased iterations: {maximum_iterations} (attempt {retry_count + 1})"
+                for i in range(0, len(retry_reasoning), 10):
+                    logs_generator = styles(reasoning_interfaces(retry_reasoning, i), expanded=True)
+                    yield logs_generator
+                    time.sleep(REASONING_DELAY)
+            if maximum_iterations >= max_retry_limit:
+                final_error = f"Maximum retry limit reached after {iteration_metrics['attempts']} attempts with {iteration_metrics['failures']} failures"
+                logs_generator = styles(final_error, expanded=True)
+                yield logs_generator
+                break
+    iteration_metrics["success_rate"] = (len(tool_results) / max(iteration_metrics["attempts"], 1)) * 100
+    if logs_generator:
+        logs_generator = styles(logs_generator.replace('<br>', '\n').strip(), expanded=False)
+    generator_results = len(tool_results) > 0
+    return conversation_messages, logs_generator, generator_results

src/processor/tools/parser.py ADDED Viewed

	@@ -0,0 +1,17 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+import json
+def extract_tool_parameters(raw_parameters, fallback_engine="google"):
+    try:
+        parsed_params = json.loads(raw_parameters or "{}")
+        if "engine" in parsed_params and parsed_params["engine"] not in ["google", "bing", "baidu"]:
+            parsed_params["engine"] = fallback_engine
+        if "engine" not in parsed_params:
+            parsed_params["engine"] = fallback_engine
+        return parsed_params, None
+    except Exception as parse_error:
+        return None, f"Invalid tool arguments: {str(parse_error)}"

src/tools/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+from .tool_manager import construct_tool_definitions
+__all__ = ['construct_tool_definitions']

src/tools/tool_manager.py ADDED Viewed

	@@ -0,0 +1,50 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+def construct_tool_definitions():
+    return [
+        {
+            "type": "function",
+            "function": {
+                "name": "web_search",
+                "description": "Perform a web search via SearXNG (Google or Bing) or Baidu.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "query": {
+                            "type": "string"
+                        },
+                        "engine": {
+                            "type": "string",
+                            "enum": [
+                                "google",
+                                "bing",
+                                "baidu"
+                            ],
+                            "default": "google",
+                        },
+                    },
+                    "required": ["query"],
+                },
+            },
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "read_url",
+                "description": "Fetch and extract main content from a URL.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "url": {
+                            "type": "string",
+                            "format": "uri"
+                        },
+                    },
+                    "required": ["url"],
+                },
+            },
+        }
+    ]

src/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+from .time import get_current_time
+__all__ = ['get_current_time']

src/utils/time.py ADDED Viewed

	@@ -0,0 +1,14 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+from datetime import datetime
+from zoneinfo import ZoneInfo
+def get_current_time() -> str:
+    return datetime.now(ZoneInfo(
+        "Asia/Jakarta"
+    )).strftime(
+        "%H:%M %Z. %A, %d %B %Y."
+    )