Spaces:
Build error
Build error
Update scraper.py
Browse files- scraper.py +5 -0
scraper.py
CHANGED
|
@@ -64,14 +64,19 @@ def scrape_courses_json(base_url, num_pages=5):
|
|
| 64 |
data = []
|
| 65 |
for page_num in range(1, num_pages + 1):
|
| 66 |
page_url = f"{base_url}{page_num}"
|
|
|
|
|
|
|
| 67 |
course_links = get_course_links(page_url)
|
|
|
|
| 68 |
|
| 69 |
for link in course_links:
|
| 70 |
try:
|
|
|
|
| 71 |
course_data = parse_course_page(link)
|
| 72 |
data.append(course_data)
|
| 73 |
time.sleep(1) # Be courteous to the server
|
| 74 |
except Exception as e:
|
| 75 |
print(f"Failed to scrape {link}: {e}")
|
| 76 |
|
|
|
|
| 77 |
return data
|
|
|
|
| 64 |
data = []
|
| 65 |
for page_num in range(1, num_pages + 1):
|
| 66 |
page_url = f"{base_url}{page_num}"
|
| 67 |
+
print(f"Scraping page {page_num}: {page_url}")
|
| 68 |
+
|
| 69 |
course_links = get_course_links(page_url)
|
| 70 |
+
print(f"Found {len(course_links)} courses on page {page_num}")
|
| 71 |
|
| 72 |
for link in course_links:
|
| 73 |
try:
|
| 74 |
+
print(f"Scraping course: {link}")
|
| 75 |
course_data = parse_course_page(link)
|
| 76 |
data.append(course_data)
|
| 77 |
time.sleep(1) # Be courteous to the server
|
| 78 |
except Exception as e:
|
| 79 |
print(f"Failed to scrape {link}: {e}")
|
| 80 |
|
| 81 |
+
print("Scraping completed.")
|
| 82 |
return data
|