File size: 26,739 Bytes
408c946
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bcca9d9
9ecb850
 
ed54077
ab7a901
9ecb850
 
 
 
1fa94c9
9ecb850
 
 
 
 
 
ed54077
 
9ecb850
 
 
 
 
 
 
ed54077
 
 
9ecb850
 
1fa94c9
ed54077
 
7abe91a
9ecb850
7abe91a
 
ed54077
 
 
9ecb850
 
7abe91a
ed54077
 
7abe91a
 
 
9ecb850
ed54077
 
9ecb850
7abe91a
9ecb850
 
ed54077
 
9ecb850
 
7abe91a
9ecb850
7abe91a
9ecb850
ed54077
9ecb850
7abe91a
 
 
ed54077
9ecb850
 
7abe91a
 
ed54077
9ecb850
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed54077
 
7abe91a
1fa94c9
 
ed54077
9ecb850
 
 
ed54077
9ecb850
bcca9d9
 
 
 
 
 
 
 
 
 
 
9ecb850
bcca9d9
9ecb850
 
 
 
 
 
 
 
 
 
 
 
 
bcca9d9
 
 
 
 
 
1fa94c9
bcca9d9
 
9ecb850
 
bcca9d9
 
9ecb850
 
 
 
 
 
 
 
bcca9d9
 
 
408c946
bcca9d9
9ecb850
bcca9d9
9ecb850
 
 
 
 
 
 
 
ab7a901
408c946
 
ceb70c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2a85ae5
ceb70c7
408c946
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ceb70c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
#
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
# SPDX-License-Identifier: Apache-2.0
#

#OPENAI_API_BASE_URL  # Endpoint. Not here -> Hugging Face Spaces secrets
#OPENAI_API_KEY       # API Key.  Not here -> Hugging Face Spaces secrets

MODEL = "gpt-4.1-nano"

SEARXNG_ENDPOINT = "https://searx.stream/search"  # See the endpoint list at https://searx.space
BAIDU_ENDPOINT   = "https://www.baidu.com/s"
READER_ENDPOINT  = "https://r.jina.ai/"
REQUEST_TIMEOUT  = 300  # 5 minute

INSTRUCTIONS_START = """
You are ChatGPT, an AI assistant with mandatory real-time web search, URL content extraction, knowledge validation, and professional summarization capabilities.


Your absolute rules:
- You must always execute the provided tools (`web_search`, `read_url`) for every single user query or user request, without exception.
- You are never allowed to answer directly from your internal knowledge, memory, or training data. Outdated or tool-bypassed answers are strictly forbidden.
- You must display all images found in sources using markdown format throughout your response. To obtain images from each source:  
  - If using only `web_search`:  
    - After executing or after calling `web_search` > Extract all URLs > Execute and call `read_url` > Collect all image links after executing `read_url`.

  - If using read_url directly:  
    - You only need to execute `read_url`.  

- Note: This applies to all queries and all requests.


Core Principles:
- Mandatory Tool Invocation: Every query or request, no matter how simple, factual, or complex, must trigger at least one `web_search` or `read_url`.
- No Memory Reliance: Do not use prior conversation history, cached context, or built-in knowledge to generate answers. Always re-verify with tools.
- Up-to-Date Grounding: All responses must be based only on real-time, verifiable data retrieved through tools.
- Cross-Validation: Always compare findings across at least 3 independent, credible sources before producing a final answer.
- Professional Output: Responses must be clear, structured, evidence-based, and neutral.
- Image Integration: Display all relevant images found in sources within appropriate paragraphs using markdown format.


Execution Workflow:
1. Initial Web Search
   - Immediately call `web_search` or `read_url` when a query or request arrives.
   - Use multiple query or request variations and search engines (`google`, `bing`, `baidu`) for broader coverage.
   - Then execute and call `read_url` for each retrieved URLs or links to obtain images.

2. Result Selection
   - Select up to 10 of the most relevant, credible, and content-rich results.
   - Prioritize authoritative sources including academic publications, institutional reports, official documents, and expert commentary.
   - Deprioritize low-credibility, promotional, or unverified sources.
   - Avoid over-reliance on any single source.

3. Content Retrieval
   - For each selected URL, use `read_url`.
   - Extract key elements including facts, statistics, data points, expert opinions, and relevant arguments.
   - Capture all image URLs present in the content, including those in HTML img tags, image galleries, and embedded media.
   - Normalize terminology, refine phrasing, and remove redundancies for clarity and consistency.

4. Cross-Validation
   - Compare extracted information across at least 3 distinct sources.
   - Identify convergences (agreement), divergences (contradictions), and gaps (missing data).
   - Validate all numerical values, temporal references, and factual claims through multiple corroborations.
   - Collect and verify all images from different sources for comprehensive visual documentation.

5. Knowledge Integration
   - Synthesize findings into a structured hierarchy from overview to key details to supporting evidence to citations.
   - Emphasize the latest developments, trends, and their implications.
   - Balance depth for experts with clarity for general readers.
   - Integrate relevant images within each section where they add value or illustrate points.

6. Response Construction
   - Always cite sources inline using `[Source Name/Title/Article/Tags/Domain](source_url_or_source_link)`.
   - Display images inline within relevant paragraphs using `![image_name](image_url_or_image_link)`.
   - Maintain a professional, precise, and neutral tone.
   - Use clear formatting with headings, numbered lists, and bullet points.
   - Ensure readability, logical progression, and accessibility.
   - Place images contextually near related text for maximum comprehension.

7. Ambiguity and Uncertainty Handling
   - Explicitly flag incomplete, ambiguous, or conflicting data.
   - Provide possible interpretations with transparent reasoning.
   - Clearly note limitations where evidence is insufficient or weak.

8. Quality and Consistency Assurance
   - Always base answers strictly on tool-derived evidence.
   - Guarantee logical flow, factual accuracy, and consistency in terminology.
   - Maintain neutrality and avoid speculative claims.
   - Never bypass tool execution for any query or request.
   - Verify all image links are properly formatted and functional.


Image Display Requirements:
- You must detect and display all images found in source content.
- You must automatically identify valid image links.
- You must extract image URLs from both HTML and Markdown sources:
  - For HTML, extract from `<img>`, `<picture>`, `<source>`, and data attributes.
  - For Markdown, extract from image syntax such as `![alt text](image_url "optional title")` or `![alt text](image_url)`. 
  - The extracted URLs may be absolute or relative, and you must capture them accurately.

- You must display each image using markdown format `![image_name](image_url_or_image_link)`.
- You must place images within relevant paragraphs where they provide context or illustration.
- You must include image captions or descriptions when available from the source.
- You must group related images together when they form a sequence or collection.
- You must ensure images are displayed throughout the response, not just at the end.
- Image format must:
  - `.jpg`
  - `.jpeg`
  - `.png`
  - `.webp`
  - `.svg`
  - `.ico`
  - `.gif`
  - `.bmp`

- If the sources do not contain a valid image link/URL, do not render and do not display them using markdown.


Critical Image Validation Instructions:
- Step 1: Check if URL ends with image extension
  - Before displaying any URL as an image, look at the very end of the URL string.
  - The URL must end with one of these exact patterns:
    - ends with `.jpg`
    - ends with `.jpeg`  
    - ends with `.png`
    - ends with `.gif`
    - ends with `.webp`
    - ends with `.svg`
    - ends with `.bmp`
    - ends with `.ico`

- Step 2: Examples of valid image URLs (do not render these):
  - These are valid because they end with image extensions:
    - `https://domain.com/photo.jpg`
    - `https://cdn.site.com/image.png`
    - `https://example.org/graphic.webp`
    - `https://site.net/icon.svg`

- Step 3: Examples of invalid URLs (never display as images):
  - These are not images because they don't end with image extensions:
    - `https://domain.com/page`
    - `https://site.com/article/123`
    - `https://example.com/view?id=456`
    - `https://cdn.com/image` (no extension)
    - `https://site.org/gallery`
    - `https://example.net/photo/view`

- Step 4: How to extract from raw HTML
  - When you see raw HTML like:
    - `<img src="https://example.com/photo.jpg">`
    - Extract: `https://example.com/photo.jpg`
    - Check: does it end with .jpg? Yes, so display it.

  - When you see:
    - `<img src="https://example.net/images/photo">`
    - Extract: `https://example.net/images/photo`
    - Check: does it end with an image extension? No, so don't display it.

- Step 5: Final validation before display
  - Ask yourself:
    - Does this URL end with `.jpg` or `.jpeg` or `.png` or `.gif` or `.webp` or `.svg` or `.bmp` or `.ico`?
    - If yes: display as `![image_name](image_url_or_image_link)`
    - If no: do not display as image

- Important:
  - Never display example URLs in your actual response
  - The examples above are only for your understanding


Critical Instruction:
- Every new query or request must trigger a `web_search` or `read_url`.
- For web search, you must always execute and call `web_search` > `read_url` applies to all queries and all requests to get image links.  
- Only execute and call `read_url` for new queries or new requests that contain URLs or links.
- You must not generate answers from prior knowledge, conversation history, or cached data.
- Always use Markdown format for URL sources with `[Source Name/Title/Article/Tags/Domain](source_url_or_source_link)`.
- Always use Markdown format for images with `![image_name](image_url_or_image_link)`.
- Images should be placed within relevant paragraphs to provide visual context and enhance understanding.
- If tools fail, you must state explicitly that no valid data could be retrieved.
- Never render example image URLs provided in instructions.
\n\n\n
"""

CONTENT_EXTRACTION = """
<system>
- Analyze the retrieved content in detail
- Identify all critical facts, arguments, statistics, and relevant data
- Collect all URLs, hyperlinks, references, and citations mentioned in the content
- Evaluate credibility of sources, highlight potential biases or conflicts
- Produce a structured, professional, and comprehensive summary
- Emphasize clarity, accuracy, and logical flow
- Include all discovered URLs in the final summary as `[Source Name/Title/Article/Tags/Domain](source_url_or_source_link)`
- Mark any uncertainties, contradictions, or missing information clearly


Image extraction from raw HTML:
- When you see HTML tags like <img src="URL">, extract the URL
- Check if the URL ends with: `.jpg` or `.jpeg` or `.png` or `.gif` or `.webp` or `.svg` or `.bmp` or `.ico`
- Only mark as image if it has valid extension at the end
- Look for these HTML patterns:
  - `<img src="..." />`
  - `<img data-src="..." />`
  - `<img srcset="..." />`
  - `<source srcset="..." />`

- Remember: URL must end with image extension to be valid
</system>
\n\n\n
"""

SEARCH_SELECTION = """
<system>
- For each search result, fetch the full content using `read_url`
- Extract key information, main arguments, data points, and statistics
- Capture every URL present in the content or references
- Create a professional structured summary
- List each source at the end of the summary in the format `[Source Name/Title/Article/Tags/Domain](source_url_or_source_link)`
- Identify ambiguities or gaps in information
- Ensure clarity, completeness, and high information density


Image identification in raw content:
- The raw HTML will contain many URLs
- Only URLs ending with image extensions are actual images
- Valid image extensions: `.jpg` or `.jpeg` or `.png` or `.gif` or `.webp` or `.svg` or `.bmp` or `.ico`
- If URL doesn't end with these extensions, it's not an image
- Don't guess or assume - only exact extension matches count
</system>
\n\n\n
"""

INSTRUCTIONS_END = """
\n\n\n
You have just executed tools and obtained results. You MUST now provide a comprehensive answer based ONLY on the tool results.


Final image display checklist:
- For each image URL you want to display, verify it ends with: `.jpg` or `.jpeg` or `.png` or `.gif` or `.webp` or `.svg` or `.bmp` or `.ico`
- If it doesn't end with these extensions, do not display it as an image
- Never display URLs without image extensions as images
- Never render example or demonstration image URLs from instructions
- State clearly if no valid images were found in the sources
\n\n\n
"""

REASONING_STEPS = {
    "web_search": {
        "parsing": (
            "I need to search for information about: {query}<br><br>"
            "I'm analyzing the user's request and preparing to execute a web search. "
            "The query I've identified is comprehensive and should yield relevant results. "
            "I will use the {engine} search engine for this task as it provides reliable and up-to-date information.<br><br>"
            "I'm now parsing the search parameters to ensure they are correctly formatted. "
            "The search query has been validated and I'm checking that all required fields are present. "
            "I need to make sure the search engine parameter is valid and supported by our system.<br><br>"
            "I'm preparing the search request with the following configuration:<br>"
            "- Search Query: {query}<br>"
            "- Search Engine: {engine}<br><br>"
            "I'm verifying that the network connection is stable and that the search service is accessible. "
            "All preliminary checks have been completed successfully."
        ),
        "executing": (
            "I'm now executing the web search for: {query}<br><br>"
            "I'm connecting to the {engine} search service and sending the search request. "
            "The connection has been established successfully and I'm waiting for the search results. "
            "I'm processing multiple search result pages to gather comprehensive information.<br><br>"
            "I'm analyzing the search results to identify the most relevant and authoritative sources. "
            "The search engine is returning results and I'm filtering them based on relevance scores. "
            "I'm extracting key information from each search result including titles, snippets, and URLs.<br><br>"
            "I'm organizing the search results in order of relevance and checking for duplicate content. "
            "The search process is progressing smoothly and I'm collecting valuable information. "
            "I'm also verifying the credibility of the sources to ensure high-quality information.<br><br>"
            "Current status: Processing search results...<br>"
            "Results found: Multiple relevant sources identified<br>"
            "Quality assessment: High relevance detected"
        ),
        "completed": (
            "I have successfully completed the web search for: {query}<br><br>"
            "I've retrieved comprehensive search results from {engine} and analyzed all the information. "
            "The search yielded multiple relevant results that directly address the user's query. "
            "I've extracted the most important information and organized it for processing.<br><br>"
            "I've identified several high-quality sources with authoritative information. "
            "The search results include recent and up-to-date content that is highly relevant. "
            "I've filtered out any duplicate or low-quality results to ensure accuracy.<br><br>"
            "I'm now processing the collected information to formulate a comprehensive response. "
            "The search results provide sufficient detail to answer the user's question thoroughly. "
            "I've verified the credibility of the sources and cross-referenced the information.<br><br>"
            "Search Summary:<br>"
            "- Total results processed: Multiple pages<br>"
            "- Relevance score: High<br>"
            "- Information quality: Verified and accurate<br>"
            "- Sources: Authoritative and recent<br><br>"
            "Preview of results:<br>{preview}"
        ),
        "error": (
            "I encountered an issue while attempting to search for: {query}<br><br>"
            "I tried to execute the web search but encountered an unexpected error. "
            "The error occurred during the search process and I need to handle it appropriately. "
            "I'm analyzing the error to understand what went wrong and how to proceed.<br><br>"
            "Error details: {error}<br><br>"
            "I'm attempting to diagnose the issue and considering alternative approaches. "
            "The error might be due to network connectivity, service availability, or parameter issues. "
            "I will try to recover from this error and provide the best possible response.<br><br>"
            "I'm evaluating whether I can retry the search with modified parameters. "
            "If the search cannot be completed, I will use my existing knowledge to help the user. "
            "I'm committed to providing valuable assistance despite this technical challenge."
        )
    },
    "read_url": {
        "parsing": (
            "I need to read and extract content from the URL: {url}<br><br>"
            "I'm analyzing the URL structure to ensure it's valid and accessible. "
            "The URL appears to be properly formatted and I'm preparing to fetch its content. "
            "I will extract the main content from this webpage to gather detailed information.<br><br>"
            "I'm validating the URL protocol and checking if it uses HTTP or HTTPS. "
            "The domain seems legitimate and I'm preparing the request headers. "
            "I need to ensure that the website allows automated content extraction.<br><br>"
            "I'm configuring the content extraction parameters:<br>"
            "- Target URL: {url}<br>"
            "- Extraction Method: Full content parsing<br>"
            "- Content Type: HTML/Text<br>"
            "- Encoding: Auto-detect<br><br>"
            "I'm checking if the website requires any special handling or authentication. "
            "All preliminary validation checks have been completed successfully."
        ),
        "executing": (
            "I'm now accessing the URL: {url}<br><br>"
            "I'm establishing a connection to the web server and sending the HTTP request. "
            "The connection is being established and I'm waiting for the server response. "
            "I'm following any redirects if necessary to reach the final destination.<br><br>"
            "I'm downloading the webpage content and checking the response status code. "
            "The server is responding and I'm receiving the HTML content. "
            "I'm monitoring the download progress and ensuring data integrity.<br><br>"
            "I'm parsing the HTML structure to extract the main content. "
            "I'm identifying and removing navigation elements, advertisements, and other non-content sections. "
            "I'm focusing on extracting the primary article or information content.<br><br>"
            "Current status: Extracting content...<br>"
            "Response received: Processing HTML<br>"
            "Content extraction: In progress"
        ),
        "completed": (
            "I have successfully extracted content from: {url}<br><br>"
            "I've retrieved the complete webpage content and processed it thoroughly. "
            "The extraction was successful and I've obtained the main textual content. "
            "I've cleaned the content by removing unnecessary HTML tags and formatting.<br><br>"
            "I've identified the main article or information section of the webpage. "
            "The content has been properly parsed and structured for analysis. "
            "I've preserved important information while filtering out irrelevant elements.<br><br>"
            "I'm now analyzing the extracted content to understand its context and relevance. "
            "The information appears to be comprehensive and directly related to the topic. "
            "I've verified that the content is complete and hasn't been truncated.<br><br>"
            "Extraction Summary:<br>"
            "- Content length: Substantial<br>"
            "- Extraction quality: High<br>"
            "- Content type: Article/Information<br>"
            "- Processing status: Complete<br><br>"
            "Preview of extracted content:<br>{preview}"
        ),
        "error": (
            "I encountered an issue while trying to access: {url}<br><br>"
            "I attempted to fetch the webpage content but encountered an error. "
            "The error prevented me from successfully extracting the information. "
            "I'm analyzing the error to understand the cause and find a solution.<br><br>"
            "Error details: {error}<br><br>"
            "I'm considering possible causes such as network issues, access restrictions, or invalid URLs. "
            "The website might be blocking automated access or the URL might be incorrect. "
            "I will try to work around this limitation and provide alternative assistance.<br><br>"
            "I'm evaluating whether I can access the content through alternative methods. "
            "If direct access isn't possible, I'll use my knowledge to help with the query. "
            "I remain committed to providing useful information despite this obstacle."
        )
    }
}

REASONING_DEFAULT = "I'm processing the tool execution request..."

REASONING_DELAY = 0.01  # 10 ms

OS = [
    "Windows NT 10.0; Win64; x64",
    "Macintosh; Intel Mac OS X 10_15_7",
    "X11; Linux x86_64",
    "Windows NT 11.0; Win64; x64",
    "Macintosh; Intel Mac OS X 11_6_2"
]

OCTETS = [
     1,   2,   3,   4,   5,   8,  12,  13,  14,  15,
    16,  17,  18,  19,  20,  23,  24,  34,  35,  36,
    37,  38,  39,  40,  41,  42,  43,  44,  45,  46,
    47,  48,  49,  50,  51,  52,  53,  54,  55,  56,
    57,  58,  59,  60,  61,  62,  63,  64,  65,  66,
    67,  68,  69,  70,  71,  72,  73,  74,  75,  76,
    77,  78,  79,  80,  81,  82,  83,  84,  85,  86,
    87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
    97,  98,  99, 100, 101, 102, 103, 104, 105, 106,
   107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
   117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
   128, 129, 130, 131, 132, 133, 134, 135, 136, 137,
   138, 139, 140, 141, 142, 143, 144, 145, 146, 147,
   148, 149, 150, 151, 152, 153, 154, 155, 156, 157,
   158, 159, 160, 161, 162, 163, 164, 165, 166, 167,
   168, 170, 171, 172, 173, 174, 175, 176, 177, 178,
   179, 180, 181, 182, 183, 184, 185, 186, 187, 188,
   189, 190, 191, 192, 193, 194, 195, 196, 197, 198,
   199, 200, 201, 202, 203, 204, 205, 206, 207, 208,
   209, 210, 211, 212, 213, 214, 215, 216, 217, 218,
   219, 220, 221, 222, 223
]

BROWSERS = [
    "Chrome",
    "Firefox",
    "Safari",
    "Edge",
    "Opera"
]

CHROME_VERSIONS = [
    "120.0.0.0",
    "119.0.0.0",
    "118.0.0.0",
    "117.0.0.0",
    "116.0.0.0"
]

FIREFOX_VERSIONS = [
    "121.0",
    "120.0",
    "119.0",
    "118.0",
    "117.0"
]

SAFARI_VERSIONS = [
    "17.1",
    "17.0",
    "16.6",
    "16.5",
    "16.4",
]

EDGE_VERSIONS = [
    "120.0.2210.91",
    "119.0.2151.97",
    "118.0.2088.76",
    "117.0.2045.60",
    "116.0.1938.81"
]

DOMAINS = [
    "google.com",
    "bing.com",
    "yahoo.com",
    "duckduckgo.com",
    "baidu.com",
    "yandex.com",
    "facebook.com",
    "twitter.com",
    "linkedin.com",
    "reddit.com",
    "youtube.com",
    "wikipedia.org",
    "amazon.com",
    "github.com",
    "stackoverflow.com",
    "medium.com",
    "quora.com",
    "pinterest.com",
    "instagram.com",
    "tumblr.com"
]

PROTOCOLS = [
    "https://",
    "https://www."
]

SEARCH_ENGINES = [
    "https://www.google.com/search?q=",
    "https://www.bing.com/search?q=",
    "https://search.yahoo.com/search?p=",
    "https://duckduckgo.com/?q=",
    "https://www.baidu.com/s?wd=",
    "https://yandex.com/search/?text=",
    "https://www.google.co.uk/search?q=",
    "https://www.google.ca/search?q=",
    "https://www.google.com.au/search?q=",
    "https://www.google.de/search?q=",
    "https://www.google.fr/search?q=",
    "https://www.google.co.jp/search?q=",
    "https://www.google.com.br/search?q=",
    "https://www.google.co.in/search?q=",
    "https://www.google.ru/search?q=",
    "https://www.google.it/search?q="
]

KEYWORDS = [
    "news",
    "weather",
    "sports",
    "technology",
    "science",
    "health",
    "finance",
    "entertainment",
    "travel",
    "food",
    "education",
    "business",
    "politics",
    "culture",
    "history",
    "music",
    "movies",
    "games",
    "books",
    "art"
]

COUNTRIES = [
    "US", "GB", "CA", "AU", "DE", "FR", "JP", "BR", "IN", "RU",
    "IT", "ES", "MX", "NL", "SE", "NO", "DK", "FI", "PL", "TR",
    "KR", "SG", "HK", "TW", "TH", "ID", "MY", "PH", "VN", "AR",
    "CL", "CO", "PE", "VE", "EG", "ZA", "NG", "KE", "MA", "DZ",
    "TN", "IL", "AE", "SA", "QA", "KW", "BH", "OM", "JO", "LB"
]

LANGUAGES = [
    "en-US", "en-GB", "en-CA", "en-AU", "de-DE", "fr-FR", "ja-JP",
    "pt-BR", "hi-IN", "ru-RU", "it-IT", "es-ES", "es-MX", "nl-NL",
    "sv-SE", "no-NO", "da-DK", "fi-FI", "pl-PL", "tr-TR", "ko-KR",
    "zh-CN", "zh-TW", "th-TH", "id-ID", "ms-MY", "fil-PH", "vi-VN",
    "es-AR", "es-CL", "es-CO", "es-PE", "es-VE", "ar-EG", "en-ZA",
    "en-NG", "sw-KE", "ar-MA", "ar-DZ", "ar-TN", "he-IL", "ar-AE",
    "ar-SA", "ar-QA", "ar-KW", "ar-BH", "ar-OM", "ar-JO", "ar-LB"
]

TIMEZONES = [
    "America/New_York",
    "America/Chicago",
    "America/Los_Angeles",
    "America/Denver",
    "Europe/London",
    "Europe/Paris",
    "Europe/Berlin",
    "Europe/Moscow",
    "Asia/Tokyo",
    "Asia/Shanghai",
    "Asia/Hong_Kong",
    "Asia/Singapore",
    "Asia/Seoul",
    "Asia/Mumbai",
    "Asia/Dubai",
    "Australia/Sydney",
    "Australia/Melbourne",
    "America/Toronto",
    "America/Vancouver",
    "America/Mexico_City",
    "America/Sao_Paulo",
    "America/Buenos_Aires",
    "Africa/Cairo",
    "Africa/Johannesburg",
    "Africa/Lagos",
    "Africa/Nairobi",
    "Pacific/Auckland",
    "Pacific/Honolulu"
]

DESCRIPTION = """
<b>SearchGPT</b> is <b>ChatGPT</b> with real-time web search capabilities and the ability to read content directly from a URL.
<br><br>
This Space implements an agent-based system with <b><a href="https://www.gradio.app" target="_blank">Gradio</a></b>. It is integrated with 
<b><a href="https://docs.searxng.org" target="_blank">SearXNG</a></b>, which is then converted into a script tool or function for native execution.
<br><br>
The agent mode is inspired by the <b><a href="https://openwebui.com/t/hadad/deep_research" target="_blank">Deep Research</a></b> from 
<b><a href="https://docs.openwebui.com" target="_blank">OpenWebUI</a></b> tools script.
<br><br>
The <b>Deep Research</b> feature is also available on the primary Spaces of <b><a href="https://umint-openwebui.hf.space" 
target="_blank">UltimaX Intelligence</a></b>.
<br><br>
Please consider reading the <b><a href="https://huggingface.co/spaces/umint/ai/discussions/37#68b55209c51ca52ed299db4c" 
target="_blank">Terms of Use and Consequences of Violation</a></b> if you wish to proceed to the main Spaces.
<br><br>
<b>Like this project? Feel free to buy me a <a href="https://ko-fi.com/hadad" target="_blank">coffee</a></b>.
"""  # Gradio