Spaces:

mamogasr
/

llm_engineering

Sleeping

App Files Files Community

llm_engineering / week8 /agents /deals.py

mamogasr

Upload folder using huggingface_hub

5fdb69e verified 7 months ago

raw

history blame contribute delete

3.27 kB

	from pydantic import BaseModel
	from typing import List, Dict, Self
	from bs4 import BeautifulSoup
	import re
	import feedparser
	from tqdm import tqdm
	import requests
	import time

	feeds = [
	"https://www.dealnews.com/c142/Electronics/?rss=1",
	"https://www.dealnews.com/c39/Computers/?rss=1",
	"https://www.dealnews.com/c238/Automotive/?rss=1",
	"https://www.dealnews.com/f1912/Smart-Home/?rss=1",
	"https://www.dealnews.com/c196/Home-Garden/?rss=1",
	]

	def extract(html_snippet: str) -> str:
	"""
	Use Beautiful Soup to clean up this HTML snippet and extract useful text
	"""
	soup = BeautifulSoup(html_snippet, 'html.parser')
	snippet_div = soup.find('div', class_='snippet summary')

	if snippet_div:
	description = snippet_div.get_text(strip=True)
	description = BeautifulSoup(description, 'html.parser').get_text()
	description = re.sub('<[^<]+?>', '', description)
	result = description.strip()
	else:
	result = html_snippet
	return result.replace('\n', ' ')

	class ScrapedDeal:
	"""
	A class to represent a Deal retrieved from an RSS feed
	"""
	category: str
	title: str
	summary: str
	url: str
	details: str
	features: str

	def __init__(self, entry: Dict[str, str]):
	"""
	Populate this instance based on the provided dict
	"""
	self.title = entry['title']
	self.summary = extract(entry['summary'])
	self.url = entry['links'][0]['href']
	stuff = requests.get(self.url).content
	soup = BeautifulSoup(stuff, 'html.parser')
	content = soup.find('div', class_='content-section').get_text()
	content = content.replace('\nmore', '').replace('\n', ' ')
	if "Features" in content:
	self.details, self.features = content.split("Features")
	else:
	self.details = content
	self.features = ""

	def __repr__(self):
	"""
	Return a string to describe this deal
	"""
	return f"<{self.title}>"

	def describe(self):
	"""
	Return a longer string to describe this deal for use in calling a model
	"""
	return f"Title: {self.title}\nDetails: {self.details.strip()}\nFeatures: {self.features.strip()}\nURL: {self.url}"

	@classmethod
	def fetch(cls, show_progress : bool = False) -> List[Self]:
	"""
	Retrieve all deals from the selected RSS feeds
	"""
	deals = []
	feed_iter = tqdm(feeds) if show_progress else feeds
	for feed_url in feed_iter:
	feed = feedparser.parse(feed_url)
	for entry in feed.entries[:10]:
	deals.append(cls(entry))
	time.sleep(0.5)
	return deals

	class Deal(BaseModel):
	"""
	A class to Represent a Deal with a summary description
	"""
	product_description: str
	price: float
	url: str

	class DealSelection(BaseModel):
	"""
	A class to Represent a list of Deals
	"""
	deals: List[Deal]

	class Opportunity(BaseModel):
	"""
	A class to represent a possible opportunity: a Deal where we estimate
	it should cost more than it's being offered
	"""
	deal: Deal
	estimate: float
	discount: float