rkihacker commited on
Commit
4b17916
·
verified ·
1 Parent(s): 8c635da

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +85 -0
main.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from fastapi import FastAPI, HTTPException
3
+ import requests
4
+ from bs4 import BeautifulSoup
5
+ import aiohttp
6
+
7
+ # --- Configuration ---
8
+ # It's recommended to use environment variables for sensitive data like API keys.
9
+ # Replace with your actual API key and endpoint.
10
+ LLM_API_URL = os.getenv("LLM_API_URL", "https://api.inference.net/v1/chat/completions")
11
+ LLM_API_KEY = os.getenv("LLM_API_KEY", "inference-00050468cc1c4a20bd5ca0997c752329") # Replace with your key
12
+ LLM_MODEL = "meta-llama/llama-3.1-8b-instruct/fp-8"
13
+
14
+ app = FastAPI(
15
+ title="Web Scraper and AI Processor",
16
+ description="An API to scrape web content and process it with a large language model.",
17
+ version="1.0.0"
18
+ )
19
+
20
+ async def scrape_url(session, url: str):
21
+ """Asynchronously scrapes the text content from a given URL."""
22
+ try:
23
+ async with session.get(url, timeout=10) as response:
24
+ response.raise_for_status()
25
+ html_content = await response.text()
26
+ soup = BeautifulSoup(html_content, "html.parser")
27
+ # Remove script and style elements
28
+ for script_or_style in soup(["script", "style"]):
29
+ script_or_style.decompose()
30
+ # Get text and clean it up
31
+ text = soup.get_text()
32
+ lines = (line.strip() for line in text.splitlines())
33
+ chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
34
+ return " ".join(chunk for chunk in chunks if chunk)
35
+ except requests.exceptions.RequestException as e:
36
+ raise HTTPException(status_code=400, detail=f"Error fetching the URL: {e}")
37
+
38
+ async def process_with_llm(session, content: str, query: str):
39
+ """Sends the scraped content and a query to the LLM for processing."""
40
+ headers = {
41
+ "Content-Type": "application/json",
42
+ "Authorization": f"Bearer {LLM_API_KEY}",
43
+ }
44
+ data = {
45
+ "messages": [
46
+ {
47
+ "role": "system",
48
+ "content": "You are a helpful assistant that analyzes web content."
49
+ },
50
+ {
51
+ "role": "user",
52
+ "content": f"Based on the following content, please answer this question: '{query}'\n\nContent:\n{content}"
53
+ }
54
+ ],
55
+ "model": LLM_MODEL,
56
+ "stream": False # Set to False for a single response
57
+ }
58
+ try:
59
+ async with session.post(LLM_API_URL, headers=headers, json=data, timeout=30) as response:
60
+ response.raise_for_status()
61
+ return await response.json()
62
+ except aiohttp.ClientError as e:
63
+ raise HTTPException(status_code=500, detail=f"Error communicating with the LLM API: {e}")
64
+
65
+ @app.post("/scrape-and-process/")
66
+ async def scrape_and_process(url: str, query: str):
67
+ """
68
+ Scrapes a URL, sends the content to a large language model with a query,
69
+ and returns the model's response.
70
+ """
71
+ async with aiohttp.ClientSession() as session:
72
+ scraped_content = await scrape_url(session, url)
73
+ if not scraped_content:
74
+ raise HTTPException(status_code=404, detail="Could not scrape any content from the URL.")
75
+
76
+ llm_response = await process_with_llm(session, scraped_content, query)
77
+ return llm_response
78
+
79
+ @app.get("/")
80
+ def read_root():
81
+ return {"message": "Welcome to the Web Scraper and AI Processor API."}
82
+
83
+ if __name__ == "__main__":
84
+ import uvicorn
85
+ uvicorn.run(app, host="0.0.0.0", port=8000)