File size: 12,263 Bytes
83e5932 2d54bf6 b57eba3 272f19b a204909 10a2f54 c01b030 a204909 7606f6c b57eba3 7606f6c bd67720 7606f6c d49f133 83e5932 7606f6c 83e5932 d49f133 83e5932 d49f133 7606f6c d49f133 83e5932 d49f133 2d54bf6 7606f6c d49f133 83e5932 b57eba3 bd67720 b57eba3 f084062 18e6f38 b57eba3 7606f6c b57eba3 f084062 b57eba3 f084062 b57eba3 7606f6c 229063a 7606f6c 10a2f54 7606f6c 5a059e9 b57eba3 7606f6c 83e5932 7606f6c cd6d8b6 2d54bf6 7606f6c cd6d8b6 83e5932 7606f6c 83e5932 7606f6c 2d54bf6 7606f6c b57eba3 7606f6c 2d54bf6 83e5932 b57eba3 7606f6c b57eba3 10a2f54 b57eba3 7606f6c b57eba3 7606f6c b57eba3 7606f6c b57eba3 9d07372 b57eba3 9d07372 b57eba3 8d87ec8 9d07372 2d54bf6 b57eba3 2d54bf6 7606f6c b57eba3 8d87ec8 b57eba3 8d87ec8 3875388 8d87ec8 3875388 8d87ec8 3875388 8d87ec8 3875388 8d87ec8 3875388 9d07372 8d87ec8 3875388 8d87ec8 7606f6c 10a2f54 9d07372 b57eba3 7606f6c 83e5932 b57eba3 83e5932 7606f6c 83e5932 b57eba3 10a2f54 7606f6c 9d07372 7606f6c b57eba3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 |
import re
import json
import requests
import html as html_lib
import time
from typing import Optional
from fastapi import FastAPI
from fastapi.responses import HTMLResponse, PlainTextResponse
import random
app = FastAPI()
POLLINATIONS_URL = "https://text.pollinations.ai/prompt/"
# ---- Prompt templates ----
# STEP 1: Get the article structure
HEADLINES_PROMPT = """
You are an AI that produces a table of contents, for a neutral, encyclopedic Wikipedia-style article.
Write about the topic: "{topic}".
Output ONLY valid JSON and NOTHING else. Do not add explanatory text, headers, markdown or code fences.
Format exactly:
{{
"title": "string",
"lead": "string",
"sections": [
{{
"heading": "string",
"subsections": [
{{
"subheading": "string"
}}
]
}}
],
"last_edited": "string" /* optional */
}}
"""
# STEP 2: Get all content for the structure in a single call
ARTICLE_PROMPT = """
You are an AI that writes a complete, neutral, and detailed encyclopedic Wikipedia-style article.
The topic is "{topic}".
You have been given a JSON structure containing headings and subheadings. Your task is to write the content for this structure.
Instructions:
1. **Content Depth:** Write a detailed paragraph for each heading and subheading. Paragraphs for the main headings should be especially comprehensive, consisting of several sentences to provide a thorough overview of the section's topic.
2. **Structure:** Do not invent new sections. Stick strictly to the provided input structure.
3. **Output Format:** Output ONLY a valid JSON object and NOTHING else. The output JSON must have the exact same structure as the input, but with a "content" field added to each section and subsection.
Input Structure:
{structure_json}
Output Format Example:
{{
"sections": [
{{
"heading": "History",
"content": "The history of the topic is long and varied, with early concepts dating back to ancient philosophy. Key developments in the 20th century, particularly the work on [[Turing Machines]], laid the groundwork for the modern field.",
"subsections": [
{{
"subheading": "Early developments",
"content": "In the early days, developments were slow and often theoretical..."
}}
]
}}
]
}}
"""
# ---- In-memory raw log (topic -> list of (prompt, response)) ----
RAW_LOG = {}
# ---- Utility functions ----
def call_pollinations(prompt: str) -> str:
"""Call Pollinations and return the raw text response (no stripping)."""
uri = POLLINATIONS_URL + requests.utils.requote_uri(prompt) + "?token=ZJyDM8G0LiZnNxFf&model=gemini&json=true&seed="+str(random.randint(0,999999))
r = requests.get(uri, timeout=60) # Increased timeout for larger generation
r.raise_for_status()
return r.text # preserve raw
def extract_json(text: str) -> dict:
"""Extract and parse the first JSON object found between first '{' and last '}'."""
start = text.find("{")
end = text.rfind("}") + 1
if start == -1 or end == -1 or start >= end:
raise ValueError("No JSON object found in AI response.\n\nRaw (truncated):\n" + text[:2000])
json_str = text[start:end]
try:
return json.loads(json_str)
except Exception as e:
raise ValueError(f"Failed to parse JSON: {e}\n\nExtracted (truncated):\n{json_str[:2000]}\n\nRaw (truncated):\n{text[:2000]}")
def log_raw(topic: str, prompt: str, response: str):
"""Append a prompt/response pair to RAW_LOG for the topic."""
RAW_LOG.setdefault(topic, []).append((prompt, response))
# ---- Two-step generation functions ----
def generate_headlines(topic: str) -> dict:
"""Step 1: Get the article structure (TOC)."""
prompt = HEADLINES_PROMPT.format(topic=topic)
resp = call_pollinations(prompt)
log_raw(topic, prompt, resp)
data = extract_json(resp)
# Normalize structure
data.setdefault("title", topic.replace("_", " "))
data.setdefault("lead", data.get("lead", ""))
data.setdefault("sections", data.get("sections", []))
return data
def generate_article_content(topic: str, toc_structure: dict) -> dict:
"""Step 2: Generate all content for the given structure in one call."""
# Create a clean version of the structure for the prompt
structure_for_prompt = {
"sections": [
{
"heading": s.get("heading"),
"subsections": s.get("subsections", [])
} for s in toc_structure.get("sections", [])
]
}
structure_json = json.dumps(structure_for_prompt, indent=2)
prompt = ARTICLE_PROMPT.format(topic=topic, structure_json=structure_json)
resp = call_pollinations(prompt)
log_raw(topic, prompt, resp)
data = extract_json(resp)
return data
# ---- Renderer ----
def esc(s): return html_lib.escape(s) if isinstance(s, str) else ""
def render_page(article: dict, execution_time: Optional[float] = None) -> str:
"""Render final HTML page from the fully-populated article JSON."""
title = esc(article.get("title", "Untitled"))
lead = esc(article.get("lead", ""))
css = """body{font-family:sans-serif;margin:0;background:#f6f6f7;color:#202122}#container{display:flex;min-height:100vh}#left-sidebar{width:18%;padding:1.2em;background:#f6f6f7;border-right:1px solid #a7d7f9;box-sizing:border-box}#main-content{width:82%;padding:1.6em;background:#fff;box-sizing:border-box}header{display:flex;justify-content:space-between;align-items:center;border-bottom:1px solid #a7d7f9;padding-bottom:.6em;margin-bottom:1em}#main-title{font-family:Georgia,serif;font-size:2em;margin:0 0 .2em 0;font-weight:normal}.site-sub{color:#54595d;margin-top:0;font-size:.95em}h2{font-size:1.3em;margin-top:1.2em;border-bottom:1px solid #a2a9b1;padding-bottom:.2em;font-weight:normal}h3{font-size:1.05em;margin-top:.8em}p{line-height:1.6}#toc{background:#f8f9fa;border:1px solid #a2a9b1;padding:1em;margin-bottom:1em;display:inline-block}footer{margin-top:2em;border-top:1px solid #a2a9b1;padding-top:1em;color:#54595d;font-size:.85em}.references ol{padding-left:1.2em}"""
parts = [
"<!doctype html><html lang='en'><head><meta charset='utf-8'>",
f"<title>{title} - Wikipedai</title>",
"<link rel='icon' href='https://huggingface.co/spaces/NihalGazi/Wikipedai/resolve/main/wikipedai.png'>",
f"<style>{css}</style></head><body><div id='container'><div id='left-sidebar'>",
"<div style='text-align:center;margin-bottom:1em;'><a href='/'><img src='https://huggingface.co/spaces/NihalGazi/Wikipedai/resolve/main/wikipedai_logo.png' alt='logo' style='width:90px'></a></div>",
"<div style='margin-bottom:1em;'><strong>Main menu</strong><ul style='padding-left:1em;'><li><a href='#'>Main page</a></li><li><a href='#'>Contents</a></li><li><a href='#'>Random article</a></li></ul></div></div>",
"<div id='main-content'><header><div><a href='#'>Article</a> • <a href='#'>Talk</a></div><div><input placeholder='Search' id='search_bar' style='padding:.4em;border:1px solid #a2a9b1'></div></header>",
f"<main><h1 id='main-title'>{title}</h1><p class='site-sub'>From Wikipedai, the free encyclopedai</p>",
]
if lead: parts.append(f"<p><strong>{lead}</strong></p>")
if article.get("sections"):
parts.append("<div id='toc'><h2>Contents</h2><ul>")
for i, sec in enumerate(article.get("sections", []), 1):
parts.append(f"<li><a href='#sec{i}'>{i}. {esc(sec.get('heading',''))}</a></li>")
if sec.get("subsections"):
parts.append("<ul>")
for j, sub in enumerate(sec.get("subsections", []), 1):
parts.append(f"<li><a href='#sec{i}_sub{j}'>{i}.{j} {esc(sub.get('subheading',''))}</a></li>")
parts.append("</ul>")
parts.append("</ul></div>")
for i, sec in enumerate(article.get("sections", []), 1):
parts.append(f"<h2 id='sec{i}'><span class='mw-headline'>{esc(sec.get('heading',''))}</span></h2>")
if sec.get("content"): parts.append(f"<p>{esc(sec.get('content',''))}</p>")
for j, sub in enumerate(sec.get("subsections", []) or [], 1):
parts.append(f"<h3 id='sec{i}_sub{j}'><span class='mw-headline'>{esc(sub.get('subheading',''))}</span></h3>")
if sub.get("content"): parts.append(f"<p>{esc(sub.get('content',''))}</p>")
footer_parts = []
if article.get("last_edited"): footer_parts.append(f"This page was last edited on {esc(article.get('last_edited', ''))}")
if execution_time is not None: footer_parts.append(f"Page generated in {execution_time:.2f} seconds")
footer_content = " • ".join(footer_parts)
parts.append(f"</main><footer>{footer_content}</footer></div></div></body></html>")
js = """
<script>
document.getElementById('search_bar').addEventListener('keydown', function(event) {
// Check if the key pressed was 'Enter'
if (event.key === 'Enter') {
// Prevent any default action
event.preventDefault();
// Get the user's query from the input field
const query = document.getElementById('search_bar').value;
// If the query is empty, do nothing
if (!query) {
return;
}
// URI-encode the query to handle special characters safely
const encodedQuery = encodeURIComponent(query);
// Construct the final URL for the API
const apiUrl = `https://nihalgazi-wikipedai.hf.space/wikipedai/${encodedQuery}`;
// Redirect the browser to the API URL
window.location.href = apiUrl;
}
});
</script>
"""
parts.append(js)
return "\n".join(parts)
# ---- API Routes ----
@app.get("/wikipedai/{topic}", response_class=HTMLResponse)
def wikipedai(topic: str):
start_time = time.time()
RAW_LOG[topic] = []
try:
# Step 1: Get the article structure (title, lead, headings)
article_structure = generate_headlines(topic)
# Step 2: Get all content for that structure in a single API call
article_content = generate_article_content(topic, article_structure)
# Step 3: Merge the content back into the original structure
# This assumes the AI returned the sections in the same order, which it should.
content_sections = article_content.get("sections", [])
for i, section_structure in enumerate(article_structure.get("sections", [])):
if i < len(content_sections):
# Add content to the main section
section_structure["content"] = content_sections[i].get("content", "[Content not generated]")
# Add content to subsections
content_subsections = content_sections[i].get("subsections", [])
for j, sub_structure in enumerate(section_structure.get("subsections", [])):
if j < len(content_subsections):
sub_structure["content"] = content_subsections[j].get("content", "[Content not generated]")
# Final render
elapsed_time = time.time() - start_time
html = render_page(article_structure, execution_time=elapsed_time)
return HTMLResponse(content=html, status_code=200)
except Exception as e:
# Capture the full traceback for better debugging
import traceback
error_details = f"Error: {e}\n\nTraceback:\n{traceback.format_exc()}"
return HTMLResponse(content=f"<h1>Error</h1><pre>{html_lib.escape(error_details)}</pre>", status_code=500)
@app.get("/raw/{topic}", response_class=PlainTextResponse)
def raw(topic: str):
entries = RAW_LOG.get(topic, [])
if not entries:
return PlainTextResponse(f"No raw log found for topic '{topic}'. Try calling /wikipedai/{topic} first.", status_code=404)
out_lines = []
for idx, (prompt, resp) in enumerate(entries, start=1):
out_lines.append(f"--- Input [{idx}] ---\n{prompt}\n\n--- AI response [{idx}] ---\n{resp}\n")
return PlainTextResponse("\n".join(out_lines), status_code=200) |