Spaces:
Sleeping
Sleeping
| from pydantic import BaseModel | |
| from typing import List, Dict, Self | |
| from bs4 import BeautifulSoup | |
| import re | |
| import feedparser | |
| from tqdm import tqdm | |
| import requests | |
| import time | |
| feeds = [ | |
| "https://www.dealnews.com/c142/Electronics/?rss=1", | |
| "https://www.dealnews.com/c39/Computers/?rss=1", | |
| "https://www.dealnews.com/c238/Automotive/?rss=1", | |
| "https://www.dealnews.com/f1912/Smart-Home/?rss=1", | |
| "https://www.dealnews.com/c196/Home-Garden/?rss=1", | |
| ] | |
| def extract(html_snippet: str) -> str: | |
| """ | |
| Use Beautiful Soup to clean up this HTML snippet and extract useful text | |
| """ | |
| soup = BeautifulSoup(html_snippet, 'html.parser') | |
| snippet_div = soup.find('div', class_='snippet summary') | |
| if snippet_div: | |
| description = snippet_div.get_text(strip=True) | |
| description = BeautifulSoup(description, 'html.parser').get_text() | |
| description = re.sub('<[^<]+?>', '', description) | |
| result = description.strip() | |
| else: | |
| result = html_snippet | |
| return result.replace('\n', ' ') | |
| class ScrapedDeal: | |
| """ | |
| A class to represent a Deal retrieved from an RSS feed | |
| """ | |
| category: str | |
| title: str | |
| summary: str | |
| url: str | |
| details: str | |
| features: str | |
| def __init__(self, entry: Dict[str, str]): | |
| """ | |
| Populate this instance based on the provided dict | |
| """ | |
| self.title = entry['title'] | |
| self.summary = extract(entry['summary']) | |
| self.url = entry['links'][0]['href'] | |
| stuff = requests.get(self.url).content | |
| soup = BeautifulSoup(stuff, 'html.parser') | |
| content = soup.find('div', class_='content-section').get_text() | |
| content = content.replace('\nmore', '').replace('\n', ' ') | |
| if "Features" in content: | |
| self.details, self.features = content.split("Features") | |
| else: | |
| self.details = content | |
| self.features = "" | |
| def __repr__(self): | |
| """ | |
| Return a string to describe this deal | |
| """ | |
| return f"<{self.title}>" | |
| def describe(self): | |
| """ | |
| Return a longer string to describe this deal for use in calling a model | |
| """ | |
| return f"Title: {self.title}\nDetails: {self.details.strip()}\nFeatures: {self.features.strip()}\nURL: {self.url}" | |
| def fetch(cls, show_progress : bool = False) -> List[Self]: | |
| """ | |
| Retrieve all deals from the selected RSS feeds | |
| """ | |
| deals = [] | |
| feed_iter = tqdm(feeds) if show_progress else feeds | |
| for feed_url in feed_iter: | |
| feed = feedparser.parse(feed_url) | |
| for entry in feed.entries[:10]: | |
| deals.append(cls(entry)) | |
| time.sleep(0.5) | |
| return deals | |
| class Deal(BaseModel): | |
| """ | |
| A class to Represent a Deal with a summary description | |
| """ | |
| product_description: str | |
| price: float | |
| url: str | |
| class DealSelection(BaseModel): | |
| """ | |
| A class to Represent a list of Deals | |
| """ | |
| deals: List[Deal] | |
| class Opportunity(BaseModel): | |
| """ | |
| A class to represent a possible opportunity: a Deal where we estimate | |
| it should cost more than it's being offered | |
| """ | |
| deal: Deal | |
| estimate: float | |
| discount: float |