Spaces:

NihalGazi
/

Wikipedai

Sleeping

File size: 12,263 Bytes

83e5932
 
 
2d54bf6
b57eba3
272f19b
a204909
10a2f54
c01b030
a204909
 
 
7606f6c
 
b57eba3
 
 
7606f6c
bd67720
7606f6c
 
 
d49f133
83e5932
7606f6c
83e5932
d49f133
83e5932
 
d49f133
7606f6c
d49f133
83e5932
d49f133
2d54bf6
7606f6c
d49f133
83e5932
 
b57eba3
 
bd67720
b57eba3
f084062
 
 
18e6f38
 
 
b57eba3
 
 
 
 
7606f6c
b57eba3
 
 
f084062
b57eba3
 
 
f084062
b57eba3
 
 
 
7606f6c
 
229063a
7606f6c
 
10a2f54
7606f6c
 
 
5a059e9
b57eba3
7606f6c
 
83e5932
7606f6c
 
cd6d8b6
 
2d54bf6
7606f6c
cd6d8b6
83e5932
7606f6c
83e5932
7606f6c
 
 
 
 
2d54bf6
7606f6c
 
b57eba3
7606f6c
 
 
 
 
 
2d54bf6
 
83e5932
 
b57eba3
 
 
 
 
 
 
 
 
 
 
 
 
 
7606f6c
 
 
b57eba3
10a2f54
b57eba3
7606f6c
 
b57eba3
7606f6c
 
 
b57eba3
7606f6c
 
 
b57eba3
9d07372
 
b57eba3
9d07372
b57eba3
8d87ec8
9d07372
2d54bf6
 
b57eba3
2d54bf6
7606f6c
b57eba3
 
 
 
 
 
 
 
 
 
8d87ec8
 
b57eba3
 
 
 
 
 
 
 
 
 
 
 
 
8d87ec8
 
 
3875388
 
 
 
 
8d87ec8
3875388
 
8d87ec8
3875388
 
 
 
8d87ec8
3875388
 
8d87ec8
3875388
9d07372
8d87ec8
3875388
 
 
8d87ec8
 
 
 
 
7606f6c
 
 
10a2f54
9d07372
 
b57eba3
7606f6c
 
83e5932
b57eba3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83e5932
7606f6c
83e5932
b57eba3
 
 
 
10a2f54
 
 
7606f6c
 
9d07372
7606f6c
 
 
b57eba3

import re
import json
import requests
import html as html_lib
import time
from typing import Optional
from fastapi import FastAPI
from fastapi.responses import HTMLResponse, PlainTextResponse
import random

app = FastAPI()

POLLINATIONS_URL = "https://text.pollinations.ai/prompt/"

# ---- Prompt templates ----

# STEP 1: Get the article structure
HEADLINES_PROMPT = """
You are an AI that produces a table of contents, for a neutral, encyclopedic Wikipedia-style article.
Write about the topic: "{topic}".
Output ONLY valid JSON and NOTHING else. Do not add explanatory text, headers, markdown or code fences.
Format exactly:
{{
  "title": "string",
  "lead": "string",
  "sections": [
    {{
      "heading": "string",
      "subsections": [
        {{
          "subheading": "string"
        }}
      ]
    }}
  ],
  "last_edited": "string"   /* optional */
}}
"""

# STEP 2: Get all content for the structure in a single call
ARTICLE_PROMPT = """
You are an AI that writes a complete, neutral, and detailed encyclopedic Wikipedia-style article.
The topic is "{topic}".
You have been given a JSON structure containing headings and subheadings. Your task is to write the content for this structure.

Instructions:
1.  **Content Depth:** Write a detailed paragraph for each heading and subheading. Paragraphs for the main headings should be especially comprehensive, consisting of several sentences to provide a thorough overview of the section's topic.
2.  **Structure:** Do not invent new sections. Stick strictly to the provided input structure.
3.  **Output Format:** Output ONLY a valid JSON object and NOTHING else. The output JSON must have the exact same structure as the input, but with a "content" field added to each section and subsection.

Input Structure:
{structure_json}

Output Format Example:
{{
  "sections": [
    {{
      "heading": "History",
      "content": "The history of the topic is long and varied, with early concepts dating back to ancient philosophy. Key developments in the 20th century, particularly the work on [[Turing Machines]], laid the groundwork for the modern field.",
      "subsections": [
        {{
          "subheading": "Early developments",
          "content": "In the early days, developments were slow and often theoretical..."
        }}
      ]
    }}
  ]
}}
"""

# ---- In-memory raw log (topic -> list of (prompt, response)) ----
RAW_LOG = {}

# ---- Utility functions ----
def call_pollinations(prompt: str) -> str:
    """Call Pollinations and return the raw text response (no stripping)."""
    uri = POLLINATIONS_URL + requests.utils.requote_uri(prompt) + "?token=ZJyDM8G0LiZnNxFf&model=gemini&json=true&seed="+str(random.randint(0,999999))
    r = requests.get(uri, timeout=60) # Increased timeout for larger generation
    r.raise_for_status()
    return r.text  # preserve raw

def extract_json(text: str) -> dict:
    """Extract and parse the first JSON object found between first '{' and last '}'."""
    start = text.find("{")
    end = text.rfind("}") + 1
    if start == -1 or end == -1 or start >= end:
        raise ValueError("No JSON object found in AI response.\n\nRaw (truncated):\n" + text[:2000])
    json_str = text[start:end]
    try:
        return json.loads(json_str)
    except Exception as e:
        raise ValueError(f"Failed to parse JSON: {e}\n\nExtracted (truncated):\n{json_str[:2000]}\n\nRaw (truncated):\n{text[:2000]}")

def log_raw(topic: str, prompt: str, response: str):
    """Append a prompt/response pair to RAW_LOG for the topic."""
    RAW_LOG.setdefault(topic, []).append((prompt, response))

# ---- Two-step generation functions ----
def generate_headlines(topic: str) -> dict:
    """Step 1: Get the article structure (TOC)."""
    prompt = HEADLINES_PROMPT.format(topic=topic)
    resp = call_pollinations(prompt)
    log_raw(topic, prompt, resp)
    data = extract_json(resp)
    # Normalize structure
    data.setdefault("title", topic.replace("_", " "))
    data.setdefault("lead", data.get("lead", ""))
    data.setdefault("sections", data.get("sections", []))
    return data

def generate_article_content(topic: str, toc_structure: dict) -> dict:
    """Step 2: Generate all content for the given structure in one call."""
    # Create a clean version of the structure for the prompt
    structure_for_prompt = {
        "sections": [
            {
                "heading": s.get("heading"),
                "subsections": s.get("subsections", [])
            } for s in toc_structure.get("sections", [])
        ]
    }
    structure_json = json.dumps(structure_for_prompt, indent=2)
    
    prompt = ARTICLE_PROMPT.format(topic=topic, structure_json=structure_json)
    resp = call_pollinations(prompt)
    log_raw(topic, prompt, resp)
    data = extract_json(resp)
    return data

# ---- Renderer ----
def esc(s): return html_lib.escape(s) if isinstance(s, str) else ""

def render_page(article: dict, execution_time: Optional[float] = None) -> str:
    """Render final HTML page from the fully-populated article JSON."""
    title = esc(article.get("title", "Untitled"))
    lead = esc(article.get("lead", ""))
    
    css = """body{font-family:sans-serif;margin:0;background:#f6f6f7;color:#202122}#container{display:flex;min-height:100vh}#left-sidebar{width:18%;padding:1.2em;background:#f6f6f7;border-right:1px solid #a7d7f9;box-sizing:border-box}#main-content{width:82%;padding:1.6em;background:#fff;box-sizing:border-box}header{display:flex;justify-content:space-between;align-items:center;border-bottom:1px solid #a7d7f9;padding-bottom:.6em;margin-bottom:1em}#main-title{font-family:Georgia,serif;font-size:2em;margin:0 0 .2em 0;font-weight:normal}.site-sub{color:#54595d;margin-top:0;font-size:.95em}h2{font-size:1.3em;margin-top:1.2em;border-bottom:1px solid #a2a9b1;padding-bottom:.2em;font-weight:normal}h3{font-size:1.05em;margin-top:.8em}p{line-height:1.6}#toc{background:#f8f9fa;border:1px solid #a2a9b1;padding:1em;margin-bottom:1em;display:inline-block}footer{margin-top:2em;border-top:1px solid #a2a9b1;padding-top:1em;color:#54595d;font-size:.85em}.references ol{padding-left:1.2em}"""

    parts = [
        "<!doctype html><html lang='en'><head><meta charset='utf-8'>",
        f"<title>{title} - Wikipedai</title>",
        "<link rel='icon' href='https://huggingface.co/spaces/NihalGazi/Wikipedai/resolve/main/wikipedai.png'>",
        f"<style>{css}</style></head><body><div id='container'><div id='left-sidebar'>",
        "<div style='text-align:center;margin-bottom:1em;'><a href='/'><img src='https://huggingface.co/spaces/NihalGazi/Wikipedai/resolve/main/wikipedai_logo.png' alt='logo' style='width:90px'></a></div>",
        "<div style='margin-bottom:1em;'><strong>Main menu</strong><ul style='padding-left:1em;'><li><a href='#'>Main page</a></li><li><a href='#'>Contents</a></li><li><a href='#'>Random article</a></li></ul></div></div>",
        "<div id='main-content'><header><div><a href='#'>Article</a> • <a href='#'>Talk</a></div><div><input placeholder='Search' id='search_bar' style='padding:.4em;border:1px solid #a2a9b1'></div></header>",
        f"<main><h1 id='main-title'>{title}</h1><p class='site-sub'>From Wikipedai, the free encyclopedai</p>",
    ]

    if lead: parts.append(f"<p><strong>{lead}</strong></p>")

    if article.get("sections"):
        parts.append("<div id='toc'><h2>Contents</h2><ul>")
        for i, sec in enumerate(article.get("sections", []), 1):
            parts.append(f"<li><a href='#sec{i}'>{i}. {esc(sec.get('heading',''))}</a></li>")
            if sec.get("subsections"):
                parts.append("<ul>")
                for j, sub in enumerate(sec.get("subsections", []), 1):
                    parts.append(f"<li><a href='#sec{i}_sub{j}'>{i}.{j} {esc(sub.get('subheading',''))}</a></li>")
                parts.append("</ul>")
        parts.append("</ul></div>")



    for i, sec in enumerate(article.get("sections", []), 1):
        parts.append(f"<h2 id='sec{i}'><span class='mw-headline'>{esc(sec.get('heading',''))}</span></h2>")
        if sec.get("content"): parts.append(f"<p>{esc(sec.get('content',''))}</p>")
        for j, sub in enumerate(sec.get("subsections", []) or [], 1):
            parts.append(f"<h3 id='sec{i}_sub{j}'><span class='mw-headline'>{esc(sub.get('subheading',''))}</span></h3>")
            if sub.get("content"): parts.append(f"<p>{esc(sub.get('content',''))}</p>")

    footer_parts = []
    if article.get("last_edited"): footer_parts.append(f"This page was last edited on {esc(article.get('last_edited', ''))}")
    if execution_time is not None: footer_parts.append(f"Page generated in {execution_time:.2f} seconds")
    footer_content = " • ".join(footer_parts)

    parts.append(f"</main><footer>{footer_content}</footer></div></div></body></html>")

    js = """
        <script>
        document.getElementById('search_bar').addEventListener('keydown', function(event) {
            // Check if the key pressed was 'Enter'
            if (event.key === 'Enter') {
                // Prevent any default action
                event.preventDefault();

                // Get the user's query from the input field
                const query = document.getElementById('search_bar').value;

                // If the query is empty, do nothing
                if (!query) {
                    return;
                }

                // URI-encode the query to handle special characters safely
                const encodedQuery = encodeURIComponent(query);

                // Construct the final URL for the API
                const apiUrl = `https://nihalgazi-wikipedai.hf.space/wikipedai/${encodedQuery}`;

                // Redirect the browser to the API URL
                window.location.href = apiUrl;
            }
        });
        </script>
    """

    parts.append(js)
    return "\n".join(parts)

# ---- API Routes ----

@app.get("/wikipedai/{topic}", response_class=HTMLResponse)
def wikipedai(topic: str):
    start_time = time.time()
    RAW_LOG[topic] = []

    try:
        # Step 1: Get the article structure (title, lead, headings)
        article_structure = generate_headlines(topic)

        # Step 2: Get all content for that structure in a single API call
        article_content = generate_article_content(topic, article_structure)
        
        # Step 3: Merge the content back into the original structure
        # This assumes the AI returned the sections in the same order, which it should.
        content_sections = article_content.get("sections", [])
        for i, section_structure in enumerate(article_structure.get("sections", [])):
            if i < len(content_sections):
                # Add content to the main section
                section_structure["content"] = content_sections[i].get("content", "[Content not generated]")
                
                # Add content to subsections
                content_subsections = content_sections[i].get("subsections", [])
                for j, sub_structure in enumerate(section_structure.get("subsections", [])):
                    if j < len(content_subsections):
                        sub_structure["content"] = content_subsections[j].get("content", "[Content not generated]")

        # Final render
        elapsed_time = time.time() - start_time
        html = render_page(article_structure, execution_time=elapsed_time)
        return HTMLResponse(content=html, status_code=200)

    except Exception as e:
        # Capture the full traceback for better debugging
        import traceback
        error_details = f"Error: {e}\n\nTraceback:\n{traceback.format_exc()}"
        return HTMLResponse(content=f"<h1>Error</h1><pre>{html_lib.escape(error_details)}</pre>", status_code=500)

@app.get("/raw/{topic}", response_class=PlainTextResponse)
def raw(topic: str):
    entries = RAW_LOG.get(topic, [])
    if not entries:
        return PlainTextResponse(f"No raw log found for topic '{topic}'. Try calling /wikipedai/{topic} first.", status_code=404)

    out_lines = []
    for idx, (prompt, resp) in enumerate(entries, start=1):
        out_lines.append(f"--- Input [{idx}] ---\n{prompt}\n\n--- AI response [{idx}] ---\n{resp}\n")
    return PlainTextResponse("\n".join(out_lines), status_code=200)