import feedparser import requests import streamlit as st from tqdm import tqdm from pathlib import Path @st.cache def get_matadata(): METADATA_MAP = {} transcript_path = Path("making_sense_transcripts/") MAKING_SENSE_RSS = "https://wakingup.libsyn.com/rss" response = requests.get(MAKING_SENSE_RSS) rss_feed = feedparser.parse(response.content) podcast_thumbnail = rss_feed.feed["image"]["href"] for episode in tqdm(rss_feed.entries, total=len(rss_feed.entries)): title = episode["title"] desc = episode["description"] episode_id = episode["id"] thumbnail = episode.get("image", {}).get("href", None) if not thumbnail: thumbnail = podcast_thumbnail if "/" not in episode_id: episode_path = transcript_path / (episode_id + ".txt") else: episode_path = transcript_path / (episode_id.replace("/", "_") + ".txt") episode_path = str(episode_path) METADATA_MAP[episode_path] = { "title": title, "episode_id": episode_id, "thumbnail": thumbnail, } return METADATA_MAP