.gitattributes CHANGED
@@ -32,4 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
32
  *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Dockerfile CHANGED
@@ -1 +1,22 @@
1
- FROM hadadrjt/searchgpt:latest
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ # Use a specific container image for the app
7
+ FROM python:latest
8
+
9
+ # Set the main working directory inside the container
10
+ WORKDIR /app
11
+
12
+ # Copy all files into the container
13
+ COPY . .
14
+
15
+ # Install all dependencies
16
+ RUN pip install -r requirements.txt
17
+
18
+ # Open the port so the app can be accessed
19
+ EXPOSE 7860
20
+
21
+ # Start the app
22
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,10 +1,75 @@
1
  ---
2
  title: SearchGPT
3
  short_description: ChatGPT with real-time web search & URL reading capability
 
4
  emoji: ⚡
5
  colorFrom: blue
6
  colorTo: yellow
7
  sdk: docker
8
  app_port: 7860
9
  pinned: false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  ---
 
1
  ---
2
  title: SearchGPT
3
  short_description: ChatGPT with real-time web search & URL reading capability
4
+ license: apache-2.0
5
  emoji: ⚡
6
  colorFrom: blue
7
  colorTo: yellow
8
  sdk: docker
9
  app_port: 7860
10
  pinned: false
11
+ # Used to promote this Hugging Face Space
12
+ models:
13
+ - hadadrjt/JARVIS
14
+ - agentica-org/DeepCoder-14B-Preview
15
+ - agentica-org/DeepSWE-Preview
16
+ - fka/awesome-chatgpt-prompts
17
+ - black-forest-labs/FLUX.1-Kontext-dev
18
+ - ChatDOC/OCRFlux-3B
19
+ - deepseek-ai/DeepSeek-R1
20
+ - deepseek-ai/DeepSeek-R1-0528
21
+ - deepseek-ai/DeepSeek-R1-Distill-Llama-70B
22
+ - deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
23
+ - deepseek-ai/DeepSeek-R1-0528-Qwen3-8B
24
+ - deepseek-ai/DeepSeek-V3-0324
25
+ - google/gemma-3-1b-it
26
+ - google/gemma-3-27b-it
27
+ - google/gemma-3-4b-it
28
+ - google/gemma-3n-E4B-it
29
+ - google/gemma-3n-E4B-it-litert-preview
30
+ - google/medsiglip-448
31
+ - kyutai/tts-1.6b-en_fr
32
+ - meta-llama/Llama-3.1-8B-Instruct
33
+ - meta-llama/Llama-3.2-3B-Instruct
34
+ - meta-llama/Llama-3.3-70B-Instruct
35
+ - meta-llama/Llama-4-Maverick-17B-128E-Instruct
36
+ - meta-llama/Llama-4-Scout-17B-16E-Instruct
37
+ - microsoft/Phi-4-mini-instruct
38
+ - mistralai/Devstral-Small-2505
39
+ - mistralai/Mistral-Small-3.1-24B-Instruct-2503
40
+ - openai/webgpt_comparisons
41
+ - openai/whisper-large-v3-turbo
42
+ - openai/gpt-oss-120b
43
+ - openai/gpt-oss-20b
44
+ - Qwen/QwQ-32B
45
+ - Qwen/Qwen2.5-VL-32B-Instruct
46
+ - Qwen/Qwen2.5-VL-3B-Instruct
47
+ - Qwen/Qwen2.5-VL-72B-Instruct
48
+ - Qwen/Qwen3-235B-A22B
49
+ - THUDM/GLM-4.1V-9B-Thinking
50
+ - tngtech/DeepSeek-TNG-R1T2-Chimera
51
+ - moonshotai/Kimi-K2-Instruct
52
+ - Qwen/Qwen3-235B-A22B-Instruct-2507
53
+ - Qwen/Qwen3-Coder-480B-A35B-Instruct
54
+ - Qwen/Qwen3-235B-A22B-Thinking-2507
55
+ - zai-org/GLM-4.5
56
+ - zai-org/GLM-4.5-Air
57
+ - zai-org/GLM-4.5V
58
+ - deepseek-ai/DeepSeek-V3.1
59
+ - deepseek-ai/DeepSeek-V3.1-Base
60
+ - microsoft/VibeVoice-1.5B
61
+ - xai-org/grok-2
62
+ - Qwen/Qwen-Image-Edit
63
+ - ByteDance-Seed/Seed-OSS-36B-Instruct
64
+ - google/gemma-3-270m
65
+ - google/gemma-3-270m-it
66
+ - openbmb/MiniCPM-V-4_5
67
+ - tencent/Hunyuan-MT-7B
68
+ - meituan-longcat/LongCat-Flash-Chat
69
+ - Phr00t/WAN2.2-14B-Rapid-AllInOne
70
+ - apple/FastVLM-0.5B
71
+ - stepfun-ai/Step-Audio-2-mini
72
+ # Used to promote this Hugging Face Space
73
+ datasets:
74
+ - fka/awesome-chatgpt-prompts
75
  ---
app.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ from src.processor.message_processor import process_user_request
7
+ from config import DESCRIPTION
8
+ import gradio as gr
9
+
10
+ with gr.Blocks(fill_height=True, fill_width=True) as app:
11
+ with gr.Sidebar(): gr.HTML(DESCRIPTION)
12
+ gr.ChatInterface(
13
+ fn=process_user_request,
14
+ chatbot=gr.Chatbot(
15
+ label="SearchGPT | GPT-4.1 (Nano)",
16
+ type="messages",
17
+ show_copy_button=True,
18
+ scale=1
19
+ ),
20
+ type="messages", # 2025-09-10: Shut up!
21
+ examples=[
22
+ ["What is UltimaX Intelligence?"],
23
+ ["https://wikipedia.org/wiki/Artificial_intelligence Read and summarize that"],
24
+ ["What's the latest AI development in 2025?"],
25
+ ["OpenAI GPT-5 vs DeepSeek V3.1"],
26
+ ["Find the source link for the GPT-OSS model"],
27
+ ["https://huggingface.co/papers Extract the most popular papers"],
28
+ ["How to run Gemma 3 (270M) on CPU only?"],
29
+ ["What are the latest trends this year?"],
30
+ ["What caused World War 1 and 2?"]
31
+ ],
32
+ cache_examples=False,
33
+ show_api=False,
34
+ concurrency_limit=5
35
+ )
36
+
37
+ app.launch(
38
+ server_name="0.0.0.0",
39
+ pwa=True
40
+ )
assets/css/__init__.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ from .reasoning import styles
7
+
8
+ __all__ = ['styles']
assets/css/reasoning.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ def styles(reasoning: str, expanded: bool = False) -> str:
7
+ open_attr = "open" if expanded else ""
8
+ emoji = "&#129504;"
9
+ return f"""
10
+ <details {open_attr} style="
11
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
12
+ ">
13
+ <summary style="
14
+ font-weight: 700;
15
+ font-size: 14px !important;
16
+ cursor: pointer;
17
+ user-select: none;
18
+ ">
19
+ {emoji} Reasoning
20
+ </summary>
21
+ <div style="
22
+ margin-top: 6px;
23
+ padding-top: 6px;
24
+ font-size: 10px !important;
25
+ line-height: 1.7;
26
+ letter-spacing: 0.02em;
27
+ ">
28
+ {reasoning}
29
+ </div>
30
+ </details>
31
+ """
config.py ADDED
@@ -0,0 +1,457 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ #OPENAI_API_BASE_URL # Endpoint. Not here -> Hugging Face Spaces secrets
7
+ #OPENAI_API_KEY # API Key. Not here -> Hugging Face Spaces secrets
8
+
9
+ MODEL = "gpt-4.1-nano"
10
+
11
+ #MAX_TOKENS = 131072
12
+ MAX_TOKENS = 262144
13
+
14
+ SEARXNG_ENDPOINT = "https://searx.stream/search" # See the endpoint list at https://searx.space
15
+ BAIDU_ENDPOINT = "https://www.baidu.com/s"
16
+ READER_ENDPOINT = "https://r.jina.ai/"
17
+ REQUEST_TIMEOUT = 300 # 5 minute
18
+
19
+ INSTRUCTIONS_START = """
20
+ You are ChatGPT, an AI assistant with mandatory real-time web search, URL content extraction, knowledge validation, and professional summarization capabilities.
21
+
22
+ Your absolute rules:
23
+ - You must always execute the provided tools (`web_search`, `read_url`) for every single user query or user request, without exception.
24
+ - You are never allowed to answer directly from your internal knowledge, memory, or training data. Outdated or tool-bypassed answers are strictly forbidden.
25
+
26
+ Core Principles:
27
+ - Mandatory Tool Invocation: Every query or request, no matter how simple, factual, or complex, must trigger at least one `web_search` or `read_url`.
28
+ - No Memory Reliance: Do not use prior conversation history, cached context, or built-in knowledge to generate answers. Always re-verify with tools.
29
+ - Up-to-Date Grounding: All responses must be based only on real-time, verifiable data retrieved through tools.
30
+ - Cross-Validation: Always compare findings across at least 3 independent, credible sources before producing a final answer.
31
+ - Professional Output: Responses must be clear, structured, evidence-based, and neutral.
32
+
33
+ Execution Workflow:
34
+ 1. Initial Web Search
35
+ - Immediately call `web_search` or `read_url` when a query or request arrives.
36
+ - Use multiple query or request variations and search engines (`google`, `bing`, `baidu`) for broader coverage.
37
+
38
+ 2. Result Selection
39
+ - Select up to 10 of the most relevant, credible, and content-rich results.
40
+ - Prioritize authoritative sources: academic publications, institutional reports, official documents, expert commentary.
41
+ - Deprioritize low-credibility, promotional, or unverified sources.
42
+ - Avoid over-reliance on any single source.
43
+
44
+ 3. Content Retrieval
45
+ - For each selected URL, use `read_url`.
46
+ - Extract key elements: facts, statistics, data points, expert opinions, and relevant arguments.
47
+ - Normalize terminology, refine phrasing, and remove redundancies for clarity and consistency.
48
+
49
+ 4. Cross-Validation
50
+ - Compare extracted information across at least 3 distinct sources.
51
+ - Identify convergences (agreement), divergences (contradictions), and gaps (missing data).
52
+ - Validate all numerical values, temporal references, and factual claims through multiple corroborations.
53
+
54
+ 5. Knowledge Integration
55
+ - Synthesize findings into a structured hierarchy:
56
+ - Overview → Key details → Supporting evidence → Citations.
57
+ - Emphasize the latest developments, trends, and their implications.
58
+ - Balance depth (for experts) with clarity (for general readers).
59
+
60
+ 6. Response Construction
61
+ - Always cite sources inline using `[Source Title/Article/Tags/Domain](Source URL or Source Links)`.
62
+ - Maintain a professional, precise, and neutral tone.
63
+ - Use clear formatting: headings, numbered lists, and bullet points.
64
+ - Ensure readability, logical progression, and accessibility.
65
+
66
+ 7. Ambiguity & Uncertainty Handling
67
+ - Explicitly flag incomplete, ambiguous, or conflicting data.
68
+ - Provide possible interpretations with transparent reasoning.
69
+ - Clearly note limitations where evidence is insufficient or weak.
70
+
71
+ 8. Quality & Consistency Assurance
72
+ - Always base answers strictly on tool-derived evidence.
73
+ - Guarantee logical flow, factual accuracy, and consistency in terminology.
74
+ - Maintain neutrality and avoid speculative claims.
75
+ - Never bypass tool execution for any query or request.
76
+
77
+ Critical Instruction:
78
+ - Every new query or request must trigger a `web_search` or `read_url`.
79
+ - You must not generate answers from prior knowledge, conversation history, or cached data.
80
+ - Always use Markdown format for URL sources with `[Source Title/Article/Tags/Domain](Source URL or Source Links)`.
81
+ - If tools fail, you must state explicitly that no valid data could be retrieved.
82
+ \n\n\n
83
+ """
84
+
85
+ CONTENT_EXTRACTION = """
86
+ <system>
87
+ - Analyze the retrieved content in detail
88
+ - Identify all critical facts, arguments, statistics, and relevant data
89
+ - Collect all URLs, hyperlinks, references, and citations mentioned in the content
90
+ - Evaluate credibility of sources, highlight potential biases or conflicts
91
+ - Produce a structured, professional, and comprehensive summary
92
+ - Emphasize clarity, accuracy, and logical flow
93
+ - Include all discovered URLs in the final summary as [Source Title](URL)
94
+ - Mark any uncertainties, contradictions, or missing information clearly
95
+ </system>
96
+ \n\n\n
97
+ """
98
+
99
+ SEARCH_SELECTION = """
100
+ <system>
101
+ - For each search result, fetch the full content using read_url
102
+ - Extract key information, main arguments, data points, and statistics
103
+ - Capture every URL present in the content or references
104
+ - Create a professional structured summary.
105
+ - List each source at the end of the summary in the format [Source title](link)
106
+ - Identify ambiguities or gaps in information
107
+ - Ensure clarity, completeness, and high information density
108
+ </system>
109
+ \n\n\n
110
+ """
111
+
112
+ INSTRUCTIONS_END = """
113
+ You have just executed tools and obtained results. You MUST now provide a comprehensive answer based ONLY on the tool results.
114
+ \n\n\n
115
+ """
116
+
117
+ REASONING_STEPS = {
118
+ "web_search": {
119
+ "parsing": (
120
+ "I need to search for information about: {query}<br><br>"
121
+ "I'm analyzing the user's request and preparing to execute a web search. "
122
+ "The query I've identified is comprehensive and should yield relevant results. "
123
+ "I will use the {engine} search engine for this task as it provides reliable and up-to-date information.<br><br>"
124
+ "I'm now parsing the search parameters to ensure they are correctly formatted. "
125
+ "The search query has been validated and I'm checking that all required fields are present. "
126
+ "I need to make sure the search engine parameter is valid and supported by our system.<br><br>"
127
+ "I'm preparing the search request with the following configuration:<br>"
128
+ "- Search Query: {query}<br>"
129
+ "- Search Engine: {engine}<br><br>"
130
+ "I'm verifying that the network connection is stable and that the search service is accessible. "
131
+ "All preliminary checks have been completed successfully."
132
+ ),
133
+ "executing": (
134
+ "I'm now executing the web search for: {query}<br><br>"
135
+ "I'm connecting to the {engine} search service and sending the search request. "
136
+ "The connection has been established successfully and I'm waiting for the search results. "
137
+ "I'm processing multiple search result pages to gather comprehensive information.<br><br>"
138
+ "I'm analyzing the search results to identify the most relevant and authoritative sources. "
139
+ "The search engine is returning results and I'm filtering them based on relevance scores. "
140
+ "I'm extracting key information from each search result including titles, snippets, and URLs.<br><br>"
141
+ "I'm organizing the search results in order of relevance and checking for duplicate content. "
142
+ "The search process is progressing smoothly and I'm collecting valuable information. "
143
+ "I'm also verifying the credibility of the sources to ensure high-quality information.<br><br>"
144
+ "Current status: Processing search results...<br>"
145
+ "Results found: Multiple relevant sources identified<br>"
146
+ "Quality assessment: High relevance detected"
147
+ ),
148
+ "completed": (
149
+ "I have successfully completed the web search for: {query}<br><br>"
150
+ "I've retrieved comprehensive search results from {engine} and analyzed all the information. "
151
+ "The search yielded multiple relevant results that directly address the user's query. "
152
+ "I've extracted the most important information and organized it for processing.<br><br>"
153
+ "I've identified several high-quality sources with authoritative information. "
154
+ "The search results include recent and up-to-date content that is highly relevant. "
155
+ "I've filtered out any duplicate or low-quality results to ensure accuracy.<br><br>"
156
+ "I'm now processing the collected information to formulate a comprehensive response. "
157
+ "The search results provide sufficient detail to answer the user's question thoroughly. "
158
+ "I've verified the credibility of the sources and cross-referenced the information.<br><br>"
159
+ "Search Summary:<br>"
160
+ "- Total results processed: Multiple pages<br>"
161
+ "- Relevance score: High<br>"
162
+ "- Information quality: Verified and accurate<br>"
163
+ "- Sources: Authoritative and recent<br><br>"
164
+ "Preview of results:<br>{preview}"
165
+ ),
166
+ "error": (
167
+ "I encountered an issue while attempting to search for: {query}<br><br>"
168
+ "I tried to execute the web search but encountered an unexpected error. "
169
+ "The error occurred during the search process and I need to handle it appropriately. "
170
+ "I'm analyzing the error to understand what went wrong and how to proceed.<br><br>"
171
+ "Error details: {error}<br><br>"
172
+ "I'm attempting to diagnose the issue and considering alternative approaches. "
173
+ "The error might be due to network connectivity, service availability, or parameter issues. "
174
+ "I will try to recover from this error and provide the best possible response.<br><br>"
175
+ "I'm evaluating whether I can retry the search with modified parameters. "
176
+ "If the search cannot be completed, I will use my existing knowledge to help the user. "
177
+ "I'm committed to providing valuable assistance despite this technical challenge."
178
+ )
179
+ },
180
+ "read_url": {
181
+ "parsing": (
182
+ "I need to read and extract content from the URL: {url}<br><br>"
183
+ "I'm analyzing the URL structure to ensure it's valid and accessible. "
184
+ "The URL appears to be properly formatted and I'm preparing to fetch its content. "
185
+ "I will extract the main content from this webpage to gather detailed information.<br><br>"
186
+ "I'm validating the URL protocol and checking if it uses HTTP or HTTPS. "
187
+ "The domain seems legitimate and I'm preparing the request headers. "
188
+ "I need to ensure that the website allows automated content extraction.<br><br>"
189
+ "I'm configuring the content extraction parameters:<br>"
190
+ "- Target URL: {url}<br>"
191
+ "- Extraction Method: Full content parsing<br>"
192
+ "- Content Type: HTML/Text<br>"
193
+ "- Encoding: Auto-detect<br><br>"
194
+ "I'm checking if the website requires any special handling or authentication. "
195
+ "All preliminary validation checks have been completed successfully."
196
+ ),
197
+ "executing": (
198
+ "I'm now accessing the URL: {url}<br><br>"
199
+ "I'm establishing a connection to the web server and sending the HTTP request. "
200
+ "The connection is being established and I'm waiting for the server response. "
201
+ "I'm following any redirects if necessary to reach the final destination.<br><br>"
202
+ "I'm downloading the webpage content and checking the response status code. "
203
+ "The server is responding and I'm receiving the HTML content. "
204
+ "I'm monitoring the download progress and ensuring data integrity.<br><br>"
205
+ "I'm parsing the HTML structure to extract the main content. "
206
+ "I'm identifying and removing navigation elements, advertisements, and other non-content sections. "
207
+ "I'm focusing on extracting the primary article or information content.<br><br>"
208
+ "Current status: Extracting content...<br>"
209
+ "Response received: Processing HTML<br>"
210
+ "Content extraction: In progress"
211
+ ),
212
+ "completed": (
213
+ "I have successfully extracted content from: {url}<br><br>"
214
+ "I've retrieved the complete webpage content and processed it thoroughly. "
215
+ "The extraction was successful and I've obtained the main textual content. "
216
+ "I've cleaned the content by removing unnecessary HTML tags and formatting.<br><br>"
217
+ "I've identified the main article or information section of the webpage. "
218
+ "The content has been properly parsed and structured for analysis. "
219
+ "I've preserved important information while filtering out irrelevant elements.<br><br>"
220
+ "I'm now analyzing the extracted content to understand its context and relevance. "
221
+ "The information appears to be comprehensive and directly related to the topic. "
222
+ "I've verified that the content is complete and hasn't been truncated.<br><br>"
223
+ "Extraction Summary:<br>"
224
+ "- Content length: Substantial<br>"
225
+ "- Extraction quality: High<br>"
226
+ "- Content type: Article/Information<br>"
227
+ "- Processing status: Complete<br><br>"
228
+ "Preview of extracted content:<br>{preview}"
229
+ ),
230
+ "error": (
231
+ "I encountered an issue while trying to access: {url}<br><br>"
232
+ "I attempted to fetch the webpage content but encountered an error. "
233
+ "The error prevented me from successfully extracting the information. "
234
+ "I'm analyzing the error to understand the cause and find a solution.<br><br>"
235
+ "Error details: {error}<br><br>"
236
+ "I'm considering possible causes such as network issues, access restrictions, or invalid URLs. "
237
+ "The website might be blocking automated access or the URL might be incorrect. "
238
+ "I will try to work around this limitation and provide alternative assistance.<br><br>"
239
+ "I'm evaluating whether I can access the content through alternative methods. "
240
+ "If direct access isn't possible, I'll use my knowledge to help with the query. "
241
+ "I remain committed to providing useful information despite this obstacle."
242
+ )
243
+ }
244
+ }
245
+
246
+ REASONING_DEFAULT = "I'm processing the tool execution request..."
247
+
248
+ REASONING_DELAY = 0.01 # 10 ms
249
+
250
+ OS = [
251
+ "Windows NT 10.0; Win64; x64",
252
+ "Macintosh; Intel Mac OS X 10_15_7",
253
+ "X11; Linux x86_64",
254
+ "Windows NT 11.0; Win64; x64",
255
+ "Macintosh; Intel Mac OS X 11_6_2"
256
+ ]
257
+
258
+ OCTETS = [
259
+ 1, 2, 3, 4, 5, 8, 12, 13, 14, 15,
260
+ 16, 17, 18, 19, 20, 23, 24, 34, 35, 36,
261
+ 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
262
+ 47, 48, 49, 50, 51, 52, 53, 54, 55, 56,
263
+ 57, 58, 59, 60, 61, 62, 63, 64, 65, 66,
264
+ 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
265
+ 77, 78, 79, 80, 81, 82, 83, 84, 85, 86,
266
+ 87, 88, 89, 90, 91, 92, 93, 94, 95, 96,
267
+ 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
268
+ 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
269
+ 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
270
+ 128, 129, 130, 131, 132, 133, 134, 135, 136, 137,
271
+ 138, 139, 140, 141, 142, 143, 144, 145, 146, 147,
272
+ 148, 149, 150, 151, 152, 153, 154, 155, 156, 157,
273
+ 158, 159, 160, 161, 162, 163, 164, 165, 166, 167,
274
+ 168, 170, 171, 172, 173, 174, 175, 176, 177, 178,
275
+ 179, 180, 181, 182, 183, 184, 185, 186, 187, 188,
276
+ 189, 190, 191, 192, 193, 194, 195, 196, 197, 198,
277
+ 199, 200, 201, 202, 203, 204, 205, 206, 207, 208,
278
+ 209, 210, 211, 212, 213, 214, 215, 216, 217, 218,
279
+ 219, 220, 221, 222, 223
280
+ ]
281
+
282
+ BROWSERS = [
283
+ "Chrome",
284
+ "Firefox",
285
+ "Safari",
286
+ "Edge",
287
+ "Opera"
288
+ ]
289
+
290
+ CHROME_VERSIONS = [
291
+ "120.0.0.0",
292
+ "119.0.0.0",
293
+ "118.0.0.0",
294
+ "117.0.0.0",
295
+ "116.0.0.0"
296
+ ]
297
+
298
+ FIREFOX_VERSIONS = [
299
+ "121.0",
300
+ "120.0",
301
+ "119.0",
302
+ "118.0",
303
+ "117.0"
304
+ ]
305
+
306
+ SAFARI_VERSIONS = [
307
+ "17.1",
308
+ "17.0",
309
+ "16.6",
310
+ "16.5",
311
+ "16.4",
312
+ ]
313
+
314
+ EDGE_VERSIONS = [
315
+ "120.0.2210.91",
316
+ "119.0.2151.97",
317
+ "118.0.2088.76",
318
+ "117.0.2045.60",
319
+ "116.0.1938.81"
320
+ ]
321
+
322
+ DOMAINS = [
323
+ "google.com",
324
+ "bing.com",
325
+ "yahoo.com",
326
+ "duckduckgo.com",
327
+ "baidu.com",
328
+ "yandex.com",
329
+ "facebook.com",
330
+ "twitter.com",
331
+ "linkedin.com",
332
+ "reddit.com",
333
+ "youtube.com",
334
+ "wikipedia.org",
335
+ "amazon.com",
336
+ "github.com",
337
+ "stackoverflow.com",
338
+ "medium.com",
339
+ "quora.com",
340
+ "pinterest.com",
341
+ "instagram.com",
342
+ "tumblr.com"
343
+ ]
344
+
345
+ PROTOCOLS = [
346
+ "https://",
347
+ "https://www."
348
+ ]
349
+
350
+ SEARCH_ENGINES = [
351
+ "https://www.google.com/search?q=",
352
+ "https://www.bing.com/search?q=",
353
+ "https://search.yahoo.com/search?p=",
354
+ "https://duckduckgo.com/?q=",
355
+ "https://www.baidu.com/s?wd=",
356
+ "https://yandex.com/search/?text=",
357
+ "https://www.google.co.uk/search?q=",
358
+ "https://www.google.ca/search?q=",
359
+ "https://www.google.com.au/search?q=",
360
+ "https://www.google.de/search?q=",
361
+ "https://www.google.fr/search?q=",
362
+ "https://www.google.co.jp/search?q=",
363
+ "https://www.google.com.br/search?q=",
364
+ "https://www.google.co.in/search?q=",
365
+ "https://www.google.ru/search?q=",
366
+ "https://www.google.it/search?q="
367
+ ]
368
+
369
+ KEYWORDS = [
370
+ "news",
371
+ "weather",
372
+ "sports",
373
+ "technology",
374
+ "science",
375
+ "health",
376
+ "finance",
377
+ "entertainment",
378
+ "travel",
379
+ "food",
380
+ "education",
381
+ "business",
382
+ "politics",
383
+ "culture",
384
+ "history",
385
+ "music",
386
+ "movies",
387
+ "games",
388
+ "books",
389
+ "art"
390
+ ]
391
+
392
+ COUNTRIES = [
393
+ "US", "GB", "CA", "AU", "DE", "FR", "JP", "BR", "IN", "RU",
394
+ "IT", "ES", "MX", "NL", "SE", "NO", "DK", "FI", "PL", "TR",
395
+ "KR", "SG", "HK", "TW", "TH", "ID", "MY", "PH", "VN", "AR",
396
+ "CL", "CO", "PE", "VE", "EG", "ZA", "NG", "KE", "MA", "DZ",
397
+ "TN", "IL", "AE", "SA", "QA", "KW", "BH", "OM", "JO", "LB"
398
+ ]
399
+
400
+ LANGUAGES = [
401
+ "en-US", "en-GB", "en-CA", "en-AU", "de-DE", "fr-FR", "ja-JP",
402
+ "pt-BR", "hi-IN", "ru-RU", "it-IT", "es-ES", "es-MX", "nl-NL",
403
+ "sv-SE", "no-NO", "da-DK", "fi-FI", "pl-PL", "tr-TR", "ko-KR",
404
+ "zh-CN", "zh-TW", "th-TH", "id-ID", "ms-MY", "fil-PH", "vi-VN",
405
+ "es-AR", "es-CL", "es-CO", "es-PE", "es-VE", "ar-EG", "en-ZA",
406
+ "en-NG", "sw-KE", "ar-MA", "ar-DZ", "ar-TN", "he-IL", "ar-AE",
407
+ "ar-SA", "ar-QA", "ar-KW", "ar-BH", "ar-OM", "ar-JO", "ar-LB"
408
+ ]
409
+
410
+ TIMEZONES = [
411
+ "America/New_York",
412
+ "America/Chicago",
413
+ "America/Los_Angeles",
414
+ "America/Denver",
415
+ "Europe/London",
416
+ "Europe/Paris",
417
+ "Europe/Berlin",
418
+ "Europe/Moscow",
419
+ "Asia/Tokyo",
420
+ "Asia/Shanghai",
421
+ "Asia/Hong_Kong",
422
+ "Asia/Singapore",
423
+ "Asia/Seoul",
424
+ "Asia/Mumbai",
425
+ "Asia/Dubai",
426
+ "Australia/Sydney",
427
+ "Australia/Melbourne",
428
+ "America/Toronto",
429
+ "America/Vancouver",
430
+ "America/Mexico_City",
431
+ "America/Sao_Paulo",
432
+ "America/Buenos_Aires",
433
+ "Africa/Cairo",
434
+ "Africa/Johannesburg",
435
+ "Africa/Lagos",
436
+ "Africa/Nairobi",
437
+ "Pacific/Auckland",
438
+ "Pacific/Honolulu"
439
+ ]
440
+
441
+ DESCRIPTION = """
442
+ <b>SearchGPT</b> is <b>ChatGPT</b> with real-time web search capabilities and the ability to read content directly from a URL.
443
+ <br><br>
444
+ This Space implements an agent-based system with <b><a href="https://www.gradio.app" target="_blank">Gradio</a></b>. It is integrated with
445
+ <b><a href="https://docs.searxng.org" target="_blank">SearXNG</a></b>, which is then converted into a script tool or function for native execution.
446
+ <br><br>
447
+ The agent mode is inspired by the <b><a href="https://openwebui.com/t/hadad/deep_research" target="_blank">Deep Research</a></b> from
448
+ <b><a href="https://docs.openwebui.com" target="_blank">OpenWebUI</a></b> tools script.
449
+ <br><br>
450
+ The <b>Deep Research</b> feature is also available on the primary Spaces of <b><a href="https://umint-openwebui.hf.space"
451
+ target="_blank">UltimaX Intelligence</a></b>.
452
+ <br><br>
453
+ Please consider reading the <b><a href="https://huggingface.co/spaces/umint/ai/discussions/37#68b55209c51ca52ed299db4c"
454
+ target="_blank">Terms of Use and Consequences of Violation</a></b> if you wish to proceed to the main Spaces.
455
+ <br><br>
456
+ <b>Like this project? Feel free to buy me a <a href="https://ko-fi.com/hadad" target="_blank">coffee</a></b>.
457
+ """ # Gradio
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio[oauth,mcp]
2
+ openai
3
+ aiohttp[speedups]
src/client/__init__.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ from .openai_client import initialize_client
7
+
8
+ __all__ = ['initialize_client']
src/client/openai_client.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ import os
7
+ from openai import OpenAI
8
+
9
+ def initialize_client():
10
+ try:
11
+ client = OpenAI(
12
+ base_url=os.getenv("OPENAI_API_BASE_URL"),
13
+ api_key=os.getenv("OPENAI_API_KEY")
14
+ )
15
+ return client, None
16
+ except Exception as initialization_error:
17
+ return None, f"Failed to initialize client: {str(initialization_error)}"
src/core/__init__.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ from .web_loader import WebLoader
7
+ from .web_configuration import WebConfiguration
8
+
9
+ __all__ = [
10
+ 'WebLoader',
11
+ 'WebConfiguration'
12
+ ]
src/core/web_configuration.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ from config import SEARXNG_ENDPOINT, BAIDU_ENDPOINT, READER_ENDPOINT, REQUEST_TIMEOUT
7
+
8
+ class WebConfiguration:
9
+ def __init__(self):
10
+ self.searxng_endpoint = SEARXNG_ENDPOINT
11
+ self.baidu_endpoint = BAIDU_ENDPOINT
12
+ self.content_reader_api = READER_ENDPOINT
13
+ self.request_timeout = REQUEST_TIMEOUT
src/core/web_loader.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ import random
7
+ import threading
8
+ import time
9
+ from collections import deque
10
+ from config import (
11
+ OS,
12
+ OCTETS,
13
+ BROWSERS,
14
+ CHROME_VERSIONS,
15
+ FIREFOX_VERSIONS,
16
+ SAFARI_VERSIONS,
17
+ EDGE_VERSIONS,
18
+ DOMAINS,
19
+ PROTOCOLS,
20
+ SEARCH_ENGINES,
21
+ KEYWORDS,
22
+ COUNTRIES,
23
+ LANGUAGES,
24
+ TIMEZONES
25
+ )
26
+
27
+ class WebLoader:
28
+ def __init__(self):
29
+ self.ipv4_pool = deque(maxlen=1000)
30
+ self.ipv6_pool = deque(maxlen=1000)
31
+ self.user_agent_pool = deque(maxlen=500)
32
+ self.origin_pool = deque(maxlen=500)
33
+ self.referrer_pool = deque(maxlen=500)
34
+ self.location_pool = deque(maxlen=500)
35
+ self.lock = threading.Lock()
36
+ self.running = True
37
+
38
+ def generate_ipv4(self):
39
+ while len(self.ipv4_pool) < 1000 and self.running:
40
+ octet = random.choice(OCTETS)
41
+ ip = f"{octet}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(1, 254)}"
42
+ with self.lock:
43
+ self.ipv4_pool.append(ip)
44
+ time.sleep(0.001)
45
+
46
+ def generate_ipv6(self):
47
+ while len(self.ipv6_pool) < 1000 and self.running:
48
+ segments = []
49
+ for _ in range(8):
50
+ segments.append(f"{random.randint(0, 65535):04x}")
51
+ ip = ":".join(segments)
52
+ with self.lock:
53
+ self.ipv6_pool.append(ip)
54
+ time.sleep(0.001)
55
+
56
+ def generate_user_agents(self):
57
+ os_list = OS
58
+ browsers = BROWSERS
59
+ chrome_versions = CHROME_VERSIONS
60
+ firefox_versions = FIREFOX_VERSIONS
61
+ safari_versions = SAFARI_VERSIONS
62
+ edge_versions = EDGE_VERSIONS
63
+
64
+ while len(self.user_agent_pool) < 500 and self.running:
65
+ browser = random.choice(browsers)
66
+ os_string = random.choice(os_list)
67
+
68
+ if browser == "Chrome":
69
+ version = random.choice(chrome_versions)
70
+ ua = f"Mozilla/5.0 ({os_string}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{version} Safari/537.36"
71
+ elif browser == "Firefox":
72
+ version = random.choice(firefox_versions)
73
+ ua = f"Mozilla/5.0 ({os_string}) Gecko/20100101 Firefox/{version}"
74
+ elif browser == "Safari":
75
+ version = random.choice(safari_versions)
76
+ webkit_version = f"{600 + random.randint(0, 15)}.{random.randint(1, 9)}.{random.randint(1, 20)}"
77
+ ua = f"Mozilla/5.0 ({os_string}) AppleWebKit/{webkit_version} (KHTML, like Gecko) Version/{version} Safari/{webkit_version}"
78
+ elif browser == "Edge":
79
+ version = random.choice(edge_versions)
80
+ ua = f"Mozilla/5.0 ({os_string}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{version.split('.')[0]}.0.0.0 Safari/537.36 Edg/{version}"
81
+ else:
82
+ version = f"{random.randint(70, 100)}.0.{random.randint(3000, 5000)}.{random.randint(50, 150)}"
83
+ ua = f"Mozilla/5.0 ({os_string}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{version} Safari/537.36 OPR/{random.randint(80, 106)}.0.0.0"
84
+
85
+ with self.lock:
86
+ self.user_agent_pool.append(ua)
87
+ time.sleep(0.002)
88
+
89
+ def generate_origins(self):
90
+ domains = DOMAINS
91
+ protocols = PROTOCOLS
92
+
93
+ while len(self.origin_pool) < 500 and self.running:
94
+ protocol = random.choice(protocols)
95
+ domain = random.choice(domains)
96
+ origin = f"{protocol}{domain}"
97
+ with self.lock:
98
+ self.origin_pool.append(origin)
99
+ time.sleep(0.002)
100
+
101
+ def generate_referrers(self):
102
+ search_engines = SEARCH_ENGINES
103
+ keywords = KEYWORDS
104
+
105
+ while len(self.referrer_pool) < 500 and self.running:
106
+ engine = random.choice(search_engines)
107
+ keyword = random.choice(keywords)
108
+ referrer = f"{engine}{keyword}"
109
+ with self.lock:
110
+ self.referrer_pool.append(referrer)
111
+ time.sleep(0.002)
112
+
113
+ def generate_locations(self):
114
+ countries = COUNTRIES
115
+ languages = LANGUAGES
116
+ timezones = TIMEZONES
117
+
118
+ while len(self.location_pool) < 500 and self.running:
119
+ country = random.choice(countries)
120
+ language = random.choice(languages)
121
+ timezone = random.choice(timezones)
122
+ location = {
123
+ "country": country,
124
+ "language": language,
125
+ "timezone": timezone
126
+ }
127
+ with self.lock:
128
+ self.location_pool.append(location)
129
+ time.sleep(0.002)
130
+
131
+ def get_ipv4(self):
132
+ with self.lock:
133
+ if self.ipv4_pool:
134
+ return self.ipv4_pool[random.randint(0, len(self.ipv4_pool) - 1)]
135
+ return f"{random.randint(1, 223)}.{random.randint(0, 255)}.{random.randint(0, 255)}.{random.randint(1, 254)}"
136
+
137
+ def get_ipv6(self):
138
+ with self.lock:
139
+ if self.ipv6_pool:
140
+ return self.ipv6_pool[random.randint(0, len(self.ipv6_pool) - 1)]
141
+ segments = [f"{random.randint(0, 65535):04x}" for _ in range(8)]
142
+ return ":".join(segments)
143
+
144
+ def get_user_agent(self):
145
+ with self.lock:
146
+ if self.user_agent_pool:
147
+ return self.user_agent_pool[random.randint(0, len(self.user_agent_pool) - 1)]
148
+ return "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
149
+
150
+ def get_origin(self):
151
+ with self.lock:
152
+ if self.origin_pool:
153
+ return self.origin_pool[random.randint(0, len(self.origin_pool) - 1)]
154
+ return "https://www.google.com"
155
+
156
+ def get_referrer(self):
157
+ with self.lock:
158
+ if self.referrer_pool:
159
+ return self.referrer_pool[random.randint(0, len(self.referrer_pool) - 1)]
160
+ return "https://www.google.com/search?q=search"
161
+
162
+ def get_location(self):
163
+ with self.lock:
164
+ if self.location_pool:
165
+ return self.location_pool[random.randint(0, len(self.location_pool) - 1)]
166
+ return {
167
+ "country": "US",
168
+ "language": "en-US",
169
+ "timezone": "America/New_York"
170
+ }
171
+
172
+ def start_engine(self):
173
+ threads = [
174
+ threading.Thread(target=self.generate_ipv4, daemon=True),
175
+ threading.Thread(target=self.generate_ipv6, daemon=True),
176
+ threading.Thread(target=self.generate_user_agents, daemon=True),
177
+ threading.Thread(target=self.generate_origins, daemon=True),
178
+ threading.Thread(target=self.generate_referrers, daemon=True),
179
+ threading.Thread(target=self.generate_locations, daemon=True)
180
+ ]
181
+ for thread in threads:
182
+ thread.start()
183
+
184
+ def stop(self):
185
+ self.running = False
186
+
187
+ web_loader = WebLoader()
188
+ web_loader.start_engine()
src/engine/__init__.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ from .browser_engine import BrowserEngine
7
+
8
+ __all__ = ['BrowserEngine']
src/engine/browser_engine.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ import asyncio
7
+ import aiohttp
8
+ import requests
9
+ from urllib.parse import quote
10
+ from config import CONTENT_EXTRACTION, SEARCH_SELECTION
11
+ from src.core.web_loader import web_loader
12
+
13
+ class BrowserEngine:
14
+ def __init__(self, configuration):
15
+ self.config = configuration
16
+
17
+ def generate_headers(self):
18
+ ipv4 = web_loader.get_ipv4()
19
+ ipv6 = web_loader.get_ipv6()
20
+ user_agent = web_loader.get_user_agent()
21
+ origin = web_loader.get_origin()
22
+ referrer = web_loader.get_referrer()
23
+ location = web_loader.get_location()
24
+
25
+ return {
26
+ "User-Agent": user_agent,
27
+ "X-Forwarded-For": f"{ipv4}, {ipv6}",
28
+ "X-Real-IP": ipv4,
29
+ "X-Originating-IP": ipv4,
30
+ "X-Remote-IP": ipv4,
31
+ "X-Remote-Addr": ipv4,
32
+ "X-Client-IP": ipv4,
33
+ "X-Forwarded-Host": origin.replace("https://", "").replace("http://", ""),
34
+ "Origin": origin,
35
+ "Referer": referrer,
36
+ "Accept-Language": f"{location['language']},en;q=0.9",
37
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
38
+ "Accept-Encoding": "gzip, deflate, br",
39
+ "DNT": "1",
40
+ "Connection": "keep-alive",
41
+ "Upgrade-Insecure-Requests": "1",
42
+ "Sec-Fetch-Dest": "document",
43
+ "Sec-Fetch-Mode": "navigate",
44
+ "Sec-Fetch-Site": "cross-site",
45
+ "Sec-Fetch-User": "?1",
46
+ "Cache-Control": "max-age=0",
47
+ "X-Country": location['country'],
48
+ "X-Timezone": location['timezone']
49
+ }
50
+
51
+ def _build_search_url_and_selector(self, search_query: str, search_provider: str = "google"):
52
+ if search_provider == "baidu":
53
+ return (
54
+ f"{self.config.content_reader_api}{self.config.baidu_endpoint}?wd={quote(search_query)}",
55
+ "#content_left"
56
+ )
57
+ provider_prefix = "!go" if search_provider == "google" else "!bi"
58
+ return (
59
+ f"{self.config.content_reader_api}{self.config.searxng_endpoint}?q={quote(f'{provider_prefix} {search_query}')}",
60
+ "#urls"
61
+ )
62
+
63
+ async def _async_post(self, url: str, data: dict, headers: dict):
64
+ timeout = aiohttp.ClientTimeout(total=self.config.request_timeout)
65
+ async with aiohttp.ClientSession(timeout=timeout) as session:
66
+ async with session.post(url, data=data, headers=headers) as response:
67
+ text = await response.text()
68
+ if response.status >= 400:
69
+ raise aiohttp.ClientResponseError(
70
+ request_info=response.request_info,
71
+ history=response.history,
72
+ status=response.status,
73
+ message=text,
74
+ headers=response.headers
75
+ )
76
+ return text
77
+
78
+ async def _async_get(self, url: str, headers: dict):
79
+ timeout = aiohttp.ClientTimeout(total=self.config.request_timeout)
80
+ async with aiohttp.ClientSession(timeout=timeout) as session:
81
+ async with session.get(url, headers=headers) as response:
82
+ text = await response.text()
83
+ if response.status >= 400:
84
+ raise aiohttp.ClientResponseError(
85
+ request_info=response.request_info,
86
+ history=response.history,
87
+ status=response.status,
88
+ message=text,
89
+ headers=response.headers
90
+ )
91
+ return text
92
+
93
+ def _sync_post(self, url: str, data: dict, headers: dict):
94
+ response = requests.post(url, data=data, headers=headers, timeout=self.config.request_timeout)
95
+ response.raise_for_status()
96
+ return response.text
97
+
98
+ def _sync_get(self, url: str, headers: dict):
99
+ response = requests.get(url, headers=headers, timeout=self.config.request_timeout)
100
+ response.raise_for_status()
101
+ return response.text
102
+
103
+ async def async_extract_page_content(self, target_url: str) -> str:
104
+ headers = self.generate_headers()
105
+ payload = {"url": target_url}
106
+ extracted_content = await self._async_post(self.config.content_reader_api, payload, headers)
107
+ return f"{extracted_content}\n\n\n{CONTENT_EXTRACTION}\n\n\n"
108
+
109
+ def extract_page_content(self, target_url: str) -> str:
110
+ try:
111
+ return asyncio.run(self.async_extract_page_content(target_url))
112
+ except Exception:
113
+ try:
114
+ headers = self.generate_headers()
115
+ payload = {"url": target_url}
116
+ extracted_content = self._sync_post(self.config.content_reader_api, payload, headers)
117
+ return f"{extracted_content}\n\n\n{CONTENT_EXTRACTION}\n\n\n"
118
+ except Exception as error:
119
+ return f"Error reading URL: {str(error)}"
120
+
121
+ async def async_perform_search(self, search_query: str, search_provider: str = "google") -> str:
122
+ headers = self.generate_headers()
123
+ full_url, selector = self._build_search_url_and_selector(search_query, search_provider)
124
+ headers["X-Target-Selector"] = selector
125
+ search_results = await self._async_get(full_url, headers)
126
+ return f"{search_results}\n\n\n{SEARCH_SELECTION}\n\n\n"
127
+
128
+ def perform_search(self, search_query: str, search_provider: str = "google") -> str:
129
+ try:
130
+ return asyncio.run(self.async_perform_search(search_query, search_provider))
131
+ except Exception:
132
+ try:
133
+ headers = self.generate_headers()
134
+ full_url, selector = self._build_search_url_and_selector(search_query, search_provider)
135
+ headers["X-Target-Selector"] = selector
136
+ search_results = self._sync_get(full_url, headers)
137
+ return f"{search_results}\n\n\n{SEARCH_SELECTION}\n\n\n"
138
+ except Exception as error:
139
+ return f"Error during search: {str(error)}"
src/processor/__init__.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ from .message_processor import process_user_request
7
+
8
+ __all__ = ['process_user_request']
src/processor/message_processor.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ import traceback
7
+ from config import MODEL
8
+ from src.core.web_configuration import WebConfiguration
9
+ from src.engine.browser_engine import BrowserEngine
10
+ from src.tools.tool_manager import construct_tool_definitions
11
+ from src.client.openai_client import initialize_client
12
+ from .response.setup import setup_response
13
+ from .response.generator import generate_response
14
+ from .tools.interaction import process_tool_interactions
15
+
16
+ def process_user_request(user_message, chat_history):
17
+ if not isinstance(user_message, str) or not user_message.strip():
18
+ yield []
19
+ return
20
+
21
+ output_content = ""
22
+
23
+ try:
24
+ server, client_initialization_error = initialize_client()
25
+ if client_initialization_error:
26
+ output_content = client_initialization_error
27
+ yield output_content
28
+ return
29
+
30
+ search_configuration = WebConfiguration()
31
+ search_engine_instance = BrowserEngine(search_configuration)
32
+ available_tools = construct_tool_definitions()
33
+
34
+ conversation_messages = setup_response(
35
+ chat_history,
36
+ user_message
37
+ )
38
+
39
+ tool_response = ""
40
+ tools_done = False
41
+
42
+ for tool_update in process_tool_interactions(
43
+ server=server,
44
+ model_name=MODEL,
45
+ conversation_messages=conversation_messages,
46
+ tool_definitions=available_tools,
47
+ search_engine=search_engine_instance
48
+ ):
49
+ if isinstance(tool_update, str):
50
+ tool_response = tool_update
51
+ yield tool_response
52
+ else:
53
+ conversation_messages = tool_update[0]
54
+ tool_response = tool_update[1]
55
+ tools_done = tool_update[2]
56
+
57
+ if tool_response:
58
+ yield tool_response + "\n\n"
59
+
60
+ final_response_generator = generate_response(
61
+ server=server,
62
+ model_name=MODEL,
63
+ conversation_messages=conversation_messages,
64
+ tool_definitions=available_tools,
65
+ tools_done=tools_done
66
+ )
67
+
68
+ for final_response in final_response_generator:
69
+ if tool_response:
70
+ yield tool_response + "\n\n" + final_response
71
+ else:
72
+ yield final_response
73
+
74
+ except Exception as processing_error:
75
+ output_content += f"\nError: {str(processing_error)}\n"
76
+ output_content += traceback.format_exc()
77
+ yield output_content
src/processor/reasoning/__init__.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ from .interface import reasoning_interfaces
7
+ from .tool_reasoning import tool_reasoning
8
+
9
+ __all__ = [
10
+ 'reasoning_interfaces',
11
+ 'tool_reasoning'
12
+ ]
src/processor/reasoning/interface.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ def reasoning_interfaces(text, current_length=0):
7
+ if current_length < len(text):
8
+ reasoning_steps = text[:current_length]
9
+
10
+ if current_length > 0 and not reasoning_steps.endswith((
11
+ '<br>',
12
+ '<br><br>'
13
+ )):
14
+ reasoning_steps += '...'
15
+
16
+ return reasoning_steps
17
+
18
+ return text
src/processor/reasoning/tool_reasoning.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ from config import REASONING_STEPS, REASONING_DEFAULT
7
+
8
+ def tool_reasoning(tool_name, tool_arguments, stage, error=None, result=None):
9
+ if tool_name == "web_search":
10
+ query = tool_arguments.get("query", "") if tool_arguments else ""
11
+ engine = tool_arguments.get("engine", "google") if tool_arguments else "google"
12
+
13
+ template = REASONING_STEPS.get("web_search", {}).get(stage)
14
+
15
+ if template:
16
+ if stage == "completed":
17
+ preview = result[:300] + "..." if result and len(result) > 300 else result
18
+ return template.format(query=query, engine=engine, preview=preview)
19
+ elif stage == "error":
20
+ return template.format(query=query, engine=engine, error=error)
21
+ else:
22
+ return template.format(query=query, engine=engine)
23
+
24
+ elif tool_name == "read_url":
25
+ url = tool_arguments.get("url", "") if tool_arguments else ""
26
+
27
+ template = REASONING_STEPS.get("read_url", {}).get(stage)
28
+
29
+ if template:
30
+ if stage == "completed":
31
+ preview = result[:300] + "..." if result and len(result) > 300 else result
32
+ return template.format(url=url, preview=preview)
33
+ elif stage == "error":
34
+ return template.format(url=url, error=error)
35
+ else:
36
+ return template.format(url=url)
37
+
38
+ return REASONING_DEFAULT
src/processor/response/__init__.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ from .setup import setup_response
7
+ from .formatter import assistant_response
8
+ from .generator import generate_response
9
+
10
+ __all__ = [
11
+ 'setup_response',
12
+ 'assistant_response',
13
+ 'generate_response'
14
+ ]
src/processor/response/formatter.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ def assistant_response(response_message):
7
+ extracted_tool_calls = []
8
+
9
+ if getattr(response_message, "tool_calls", None):
10
+ for tool_call in response_message.tool_calls:
11
+ extracted_tool_calls.append(
12
+ {
13
+ "id": tool_call.id,
14
+ "type": "function",
15
+ "function": {
16
+ "name": tool_call.function.name,
17
+ "arguments": tool_call.function.arguments
18
+ }
19
+ }
20
+ )
21
+
22
+ return {
23
+ "role": "assistant",
24
+ "content": response_message.content or "",
25
+ "tool_calls": extracted_tool_calls if extracted_tool_calls else None
26
+ }
src/processor/response/generator.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ import traceback
7
+ from config import MAX_TOKENS, INSTRUCTIONS_END
8
+
9
+ def generate_response(
10
+ server,
11
+ model_name,
12
+ conversation_messages,
13
+ tool_definitions,
14
+ tools_done=False
15
+ ):
16
+ response_generator = ""
17
+
18
+ if tools_done:
19
+ system_reminder = {
20
+ "role": "system",
21
+ "content": INSTRUCTIONS_END
22
+ }
23
+ conversation_messages.append(system_reminder)
24
+
25
+ try:
26
+ response = server.chat.completions.create(
27
+ model=model_name,
28
+ messages=conversation_messages,
29
+ tools=tool_definitions if not tools_done else None,
30
+ tool_choice="none",
31
+ max_tokens=MAX_TOKENS,
32
+ temperature=0.75,
33
+ stream=True
34
+ )
35
+
36
+ for data in response:
37
+ try:
38
+ raw_data = data.choices[0].delta.content or ""
39
+ except Exception:
40
+ raw_data = ""
41
+
42
+ if raw_data:
43
+ response_generator += raw_data
44
+ yield response_generator
45
+
46
+ yield response_generator
47
+
48
+ except Exception as response_error:
49
+ response_generator += f"\nError: {str(response_error)}\n"
50
+ response_generator += traceback.format_exc()
51
+ yield response_generator
src/processor/response/setup.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ from ...utils.time import get_current_time
7
+ from config import INSTRUCTIONS_START
8
+
9
+ def setup_response(conversation_history, user_input):
10
+ history = []
11
+
12
+ history.insert(
13
+ 0,
14
+ {
15
+ "role": "system",
16
+ "content": (
17
+ f"Today is: {get_current_time()}"
18
+ + "\n\n\n"
19
+ + INSTRUCTIONS_START
20
+ )
21
+ }
22
+ )
23
+
24
+ if isinstance(conversation_history, list):
25
+ for history_item in conversation_history:
26
+ message_role = history_item.get("role")
27
+ message_content = history_item.get("content")
28
+ if message_role in ("user", "assistant") and isinstance(message_content, str):
29
+ history.append({"role": message_role, "content": message_content})
30
+
31
+ if isinstance(user_input, str) and user_input.strip():
32
+ history.append({"role": "user", "content": user_input})
33
+
34
+ return history
src/processor/tools/__init__.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ from .parser import extract_tool_parameters
7
+ from .executor import invoke_tool_function
8
+ from .interaction import process_tool_interactions
9
+
10
+ __all__ = [
11
+ 'extract_tool_parameters',
12
+ 'invoke_tool_function',
13
+ 'process_tool_interactions'
14
+ ]
src/processor/tools/executor.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ def invoke_tool_function(search_engine, function_name, function_params):
7
+ if function_name == "web_search":
8
+ return search_engine.perform_search(
9
+ search_query=function_params.get("query", ""),
10
+ search_provider=function_params.get("engine", "google")
11
+ )
12
+ if function_name == "read_url":
13
+ return search_engine.extract_page_content(
14
+ target_url=function_params.get("url", "")
15
+ )
16
+ return f"Unknown tool: {function_name}"
src/processor/tools/interaction.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ import time
7
+ from assets.css.reasoning import styles
8
+ from ..response.formatter import assistant_response
9
+ from ..reasoning.interface import reasoning_interfaces
10
+ from ..reasoning.tool_reasoning import tool_reasoning
11
+ from .parser import extract_tool_parameters
12
+ from .executor import invoke_tool_function
13
+ from config import MAX_TOKENS, REASONING_DELAY
14
+
15
+ def process_tool_interactions(server, model_name, conversation_messages, tool_definitions, search_engine):
16
+ maximum_iterations = 1
17
+ max_retry_limit = 10
18
+ retry_count = 0
19
+ logs_generator = ""
20
+ tool_results = []
21
+ execution_success = False
22
+ last_error = None
23
+ error_history = []
24
+ iteration_metrics = {
25
+ "attempts": 0,
26
+ "failures": 0,
27
+ "success_rate": 0,
28
+ "error_patterns": {},
29
+ "retry_delays": [
30
+ 0.02,
31
+ 0.03,
32
+ 0.04,
33
+ 0.05,
34
+ 0.06,
35
+ 0.07
36
+ ],
37
+ "backoff_multiplier": 1.0
38
+ }
39
+
40
+ while maximum_iterations <= max_retry_limit and not execution_success:
41
+ iteration_metrics["attempts"] += 1
42
+ current_iteration_successful = False
43
+ iteration_errors = []
44
+
45
+ for iteration_index in range(maximum_iterations):
46
+ try:
47
+ retry_delay = iteration_metrics["retry_delays"][min(retry_count, len(iteration_metrics["retry_delays"]) - 1)]
48
+ if retry_count > 0:
49
+ time.sleep(retry_delay * iteration_metrics["backoff_multiplier"])
50
+
51
+ model_response = server.chat.completions.create(
52
+ model=model_name,
53
+ messages=conversation_messages,
54
+ tools=tool_definitions,
55
+ tool_choice="auto",
56
+ max_tokens=MAX_TOKENS,
57
+ temperature=0.6
58
+ )
59
+
60
+ response_choice = model_response.choices[0]
61
+ assistant_message = response_choice.message
62
+ formatted_assistant_message = assistant_response(assistant_message)
63
+
64
+ conversation_messages.append(
65
+ {
66
+ "role": formatted_assistant_message["role"],
67
+ "content": formatted_assistant_message["content"],
68
+ "tool_calls": formatted_assistant_message["tool_calls"]
69
+ }
70
+ )
71
+
72
+ pending_tool_calls = assistant_message.tool_calls or []
73
+ if not pending_tool_calls:
74
+ if logs_generator:
75
+ logs_generator = styles(logs_generator.replace('<br>', '\n').strip(), expanded=False)
76
+ execution_success = True
77
+ current_iteration_successful = True
78
+ break
79
+
80
+ tool_execution_errors = []
81
+ for tool_invocation in pending_tool_calls:
82
+ tool_name = tool_invocation.function.name
83
+ tool_arguments_raw = tool_invocation.function.arguments
84
+
85
+ extracted_arguments, extraction_error = extract_tool_parameters(tool_arguments_raw)
86
+
87
+ if extraction_error:
88
+ error_key = f"{tool_name}_extraction"
89
+ iteration_metrics["error_patterns"][error_key] = iteration_metrics["error_patterns"].get(error_key, 0) + 1
90
+ tool_execution_errors.append({
91
+ "tool": tool_name,
92
+ "error": extraction_error,
93
+ "type": "extraction"
94
+ })
95
+
96
+ reasoning_error = tool_reasoning(tool_name, None, "error", error=extraction_error)
97
+ for i in range(0, len(reasoning_error), 10):
98
+ logs_generator = styles(reasoning_interfaces(reasoning_error, i), expanded=True)
99
+ yield logs_generator
100
+ time.sleep(REASONING_DELAY)
101
+ logs_generator = styles(reasoning_error, expanded=True)
102
+ yield logs_generator
103
+ tool_execution_result = extraction_error
104
+ else:
105
+ reasoning_status = tool_reasoning(tool_name, extracted_arguments, "parsing")
106
+ for i in range(0, len(reasoning_status), 10):
107
+ logs_generator = styles(reasoning_interfaces(reasoning_status, i), expanded=True)
108
+ yield logs_generator
109
+ time.sleep(REASONING_DELAY)
110
+
111
+ reasoning_start = tool_reasoning(tool_name, extracted_arguments, "executing")
112
+ for i in range(0, len(reasoning_start), 10):
113
+ logs_generator = styles(reasoning_interfaces(reasoning_start, i), expanded=True)
114
+ yield logs_generator
115
+ time.sleep(REASONING_DELAY)
116
+
117
+ try:
118
+ tool_execution_result = invoke_tool_function(
119
+ search_engine,
120
+ tool_name,
121
+ extracted_arguments
122
+ )
123
+ tool_results.append({
124
+ "tool": tool_name,
125
+ "arguments": extracted_arguments,
126
+ "result": tool_execution_result,
127
+ "iteration": maximum_iterations,
128
+ "retry_count": retry_count
129
+ })
130
+
131
+ reasoning_done = tool_reasoning(tool_name, extracted_arguments, "completed", result=tool_execution_result)
132
+ for i in range(0, len(reasoning_done), 10):
133
+ logs_generator = styles(reasoning_interfaces(reasoning_done, i), expanded=True)
134
+ yield logs_generator
135
+ time.sleep(REASONING_DELAY)
136
+ logs_generator = styles(reasoning_done, expanded=False)
137
+ yield logs_generator
138
+
139
+ except Exception as tool_error:
140
+ error_key = f"{tool_name}_execution"
141
+ iteration_metrics["error_patterns"][error_key] = iteration_metrics["error_patterns"].get(error_key, 0) + 1
142
+ tool_execution_errors.append({
143
+ "tool": tool_name,
144
+ "error": str(tool_error),
145
+ "type": "execution",
146
+ "arguments": extracted_arguments
147
+ })
148
+
149
+ reasoning_error = tool_reasoning(tool_name, extracted_arguments, "error", error=str(tool_error))
150
+ for i in range(0, len(reasoning_error), 10):
151
+ logs_generator = styles(reasoning_interfaces(reasoning_error, i), expanded=True)
152
+ yield logs_generator
153
+ time.sleep(REASONING_DELAY)
154
+ logs_generator = styles(reasoning_error, expanded=True)
155
+ yield logs_generator
156
+ tool_execution_result = str(tool_error)
157
+
158
+ conversation_messages.append(
159
+ {
160
+ "role": "tool",
161
+ "tool_call_id": tool_invocation.id,
162
+ "name": tool_name,
163
+ "content": tool_execution_result
164
+ }
165
+ )
166
+
167
+ if not tool_execution_errors:
168
+ execution_success = True
169
+ current_iteration_successful = True
170
+ break
171
+ else:
172
+ iteration_errors.extend(tool_execution_errors)
173
+
174
+ except Exception as model_error:
175
+ last_error = str(model_error)
176
+ error_history.append({
177
+ "iteration": maximum_iterations,
178
+ "error": last_error,
179
+ "timestamp": time.time()
180
+ })
181
+ iteration_metrics["failures"] += 1
182
+ iteration_errors.append({
183
+ "error": last_error,
184
+ "type": "model"
185
+ })
186
+
187
+ if current_iteration_successful:
188
+ execution_success = True
189
+ break
190
+ else:
191
+ if iteration_errors:
192
+ error_history.extend(iteration_errors)
193
+
194
+ retry_count += 1
195
+ previous_iterations = maximum_iterations
196
+
197
+ if iteration_metrics["error_patterns"]:
198
+ frequent_errors = max(iteration_metrics["error_patterns"].values())
199
+ if frequent_errors > 3:
200
+ maximum_iterations = min(maximum_iterations + 2, max_retry_limit)
201
+ else:
202
+ maximum_iterations = min(maximum_iterations + 1, max_retry_limit)
203
+ else:
204
+ maximum_iterations = min(maximum_iterations + 1, max_retry_limit)
205
+
206
+ if maximum_iterations > previous_iterations:
207
+ retry_reasoning = f"Retrying with increased iterations: {maximum_iterations} (attempt {retry_count + 1})"
208
+ for i in range(0, len(retry_reasoning), 10):
209
+ logs_generator = styles(reasoning_interfaces(retry_reasoning, i), expanded=True)
210
+ yield logs_generator
211
+ time.sleep(REASONING_DELAY)
212
+
213
+ if maximum_iterations >= max_retry_limit:
214
+ final_error = f"Maximum retry limit reached after {iteration_metrics['attempts']} attempts with {iteration_metrics['failures']} failures"
215
+ logs_generator = styles(final_error, expanded=True)
216
+ yield logs_generator
217
+ break
218
+
219
+ iteration_metrics["success_rate"] = (len(tool_results) / max(iteration_metrics["attempts"], 1)) * 100
220
+
221
+ if logs_generator:
222
+ logs_generator = styles(logs_generator.replace('<br>', '\n').strip(), expanded=False)
223
+
224
+ generator_results = len(tool_results) > 0
225
+ return conversation_messages, logs_generator, generator_results
src/processor/tools/parser.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ import json
7
+
8
+ def extract_tool_parameters(raw_parameters, fallback_engine="google"):
9
+ try:
10
+ parsed_params = json.loads(raw_parameters or "{}")
11
+ if "engine" in parsed_params and parsed_params["engine"] not in ["google", "bing", "baidu"]:
12
+ parsed_params["engine"] = fallback_engine
13
+ if "engine" not in parsed_params:
14
+ parsed_params["engine"] = fallback_engine
15
+ return parsed_params, None
16
+ except Exception as parse_error:
17
+ return None, f"Invalid tool arguments: {str(parse_error)}"
src/tools/__init__.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ from .tool_manager import construct_tool_definitions
7
+
8
+ __all__ = ['construct_tool_definitions']
src/tools/tool_manager.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ def construct_tool_definitions():
7
+ return [
8
+ {
9
+ "type": "function",
10
+ "function": {
11
+ "name": "web_search",
12
+ "description": "Perform a web search via SearXNG (Google or Bing) or Baidu.",
13
+ "parameters": {
14
+ "type": "object",
15
+ "properties": {
16
+ "query": {
17
+ "type": "string"
18
+ },
19
+ "engine": {
20
+ "type": "string",
21
+ "enum": [
22
+ "google",
23
+ "bing",
24
+ "baidu"
25
+ ],
26
+ "default": "google",
27
+ },
28
+ },
29
+ "required": ["query"],
30
+ },
31
+ },
32
+ },
33
+ {
34
+ "type": "function",
35
+ "function": {
36
+ "name": "read_url",
37
+ "description": "Fetch and extract main content from a URL.",
38
+ "parameters": {
39
+ "type": "object",
40
+ "properties": {
41
+ "url": {
42
+ "type": "string",
43
+ "format": "uri"
44
+ },
45
+ },
46
+ "required": ["url"],
47
+ },
48
+ },
49
+ }
50
+ ]
src/utils/__init__.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ from .time import get_current_time
7
+
8
+ __all__ = ['get_current_time']
src/utils/time.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+
6
+ from datetime import datetime
7
+ from zoneinfo import ZoneInfo
8
+
9
+ def get_current_time() -> str:
10
+ return datetime.now(ZoneInfo(
11
+ "Asia/Jakarta"
12
+ )).strftime(
13
+ "%H:%M %Z. %A, %d %B %Y."
14
+ )