Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -104,15 +104,8 @@ def fetch_html_content(url, timeout=10):
|
|
| 104 |
}
|
| 105 |
response = requests.get(url, headers=headers, timeout=timeout, verify=False)
|
| 106 |
response.raise_for_status()
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
for script in soup(["script", "style", "meta", "noscript", "header", "footer"]):
|
| 110 |
-
script.decompose()
|
| 111 |
-
|
| 112 |
-
text = soup.get_text(separator=' ')
|
| 113 |
-
clean_text = " ".join(text.split())
|
| 114 |
-
|
| 115 |
-
return clean_text, response.status_code
|
| 116 |
except requests.exceptions.RequestException as e:
|
| 117 |
return None, f"Request error: {str(e)}"
|
| 118 |
except Exception as e:
|
|
|
|
| 104 |
}
|
| 105 |
response = requests.get(url, headers=headers, timeout=timeout, verify=False)
|
| 106 |
response.raise_for_status()
|
| 107 |
+
|
| 108 |
+
return response.text, response.status_code
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
except requests.exceptions.RequestException as e:
|
| 110 |
return None, f"Request error: {str(e)}"
|
| 111 |
except Exception as e:
|