dungeon29 commited on
Commit
36d2903
·
verified ·
1 Parent(s): 13abf72

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -9
app.py CHANGED
@@ -104,15 +104,8 @@ def fetch_html_content(url, timeout=10):
104
  }
105
  response = requests.get(url, headers=headers, timeout=timeout, verify=False)
106
  response.raise_for_status()
107
-
108
- soup = BeautifulSoup(response.text, 'html.parser')
109
- for script in soup(["script", "style", "meta", "noscript", "header", "footer"]):
110
- script.decompose()
111
-
112
- text = soup.get_text(separator=' ')
113
- clean_text = " ".join(text.split())
114
-
115
- return clean_text, response.status_code
116
  except requests.exceptions.RequestException as e:
117
  return None, f"Request error: {str(e)}"
118
  except Exception as e:
 
104
  }
105
  response = requests.get(url, headers=headers, timeout=timeout, verify=False)
106
  response.raise_for_status()
107
+
108
+ return response.text, response.status_code
 
 
 
 
 
 
 
109
  except requests.exceptions.RequestException as e:
110
  return None, f"Request error: {str(e)}"
111
  except Exception as e: