# Compare websites

In [None]:
# imports

import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI

# If you get an error running this cell, then please head over to the troubleshooting notebook!

In [None]:
# Load environment variables 

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

In [None]:
openai = OpenAI()

# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.
# If it STILL doesn't work (horrors!) then please see the Troubleshooting notebook in this folder for full instructions

## Website class

In [None]:
# A class to represent a Webpage
# If you're not familiar with Classes, check out the "Intermediate Python" notebook

# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:

    def __init__(self, url):
        """
        Create this Website object from the given url using the BeautifulSoup library
        """
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

In [None]:
# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish."

system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

In [None]:
# A function that writes a User Prompt that asks for summaries of websites:

def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.text
    return user_prompt

## Website messages function

In [None]:
# See how this function creates exactly the format above

def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

## Website summary

In [None]:
# And now: call the OpenAI API. You will get very familiar with this!

def summarize(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model = "gpt-4o-mini",
        messages = messages_for(website)
    )
    return response.choices[0].message.content

# A function to display this nicely in the Jupyter output, using markdown

def display_summary(summary):    
    display(Markdown(summary))

In [None]:
w1 = "https://cnn.com"
summary1 = summarize(w1)
display_summary(summary1)

In [None]:
w2 = "https://www.foxnews.com"
summary2 = summarize(w2)
display_summary(summary2)

## Comparison between two websites

In [None]:
system_prompt_compare = """You are a weblsite analyst that compares the summaries of two websites
and provides a compare and contrast bewtween the two. 
Respond in markdown."""

def user_prompt_for_compare(summary1, summary2):
    user_prompt = f"You are asked to compare this summary of a website {summary1}\n\n"
    user_prompt += f"\nWith the summary of this second website {summary2}\n\n"
    user_prompt += "please provide a short comparison of the two websites. \
List the similarities and differences in bullet point format.\n\n"    
    return user_prompt

In [None]:
def messages_for_compare():
    return [
        {"role": "system", "content": system_prompt_compare},
        {"role": "user", "content": user_prompt_for_compare(summary1, summary2)}
    ]

In [None]:
def compare():    
    response = openai.chat.completions.create(
        model = "gpt-4o-mini",
        messages = messages_for_compare()
    )
    return response.choices[0].message.content

In [None]:
display_summary(compare())