Spaces:
Sleeping
Sleeping
File size: 3,697 Bytes
5bdae9e 255ebde 5bdae9e a29b87f 255ebde 5e0460c 255ebde 5e0460c 255ebde 5bdae9e 5e0460c 5bdae9e 5e0460c 5bdae9e 255ebde 5e0460c 5bdae9e 255ebde 5e0460c 5bdae9e 5e0460c 5bdae9e 5e0460c 255ebde 5bdae9e 5e0460c 5bdae9e 5e0460c 0d6de03 5bdae9e 5e0460c 5bdae9e 5e0460c 5bdae9e 5e0460c 5bdae9e 0d6de03 5bdae9e 5e0460c 0d6de03 5bdae9e 255ebde 5bdae9e 0d6de03 5e0460c 5bdae9e 255ebde 5bdae9e 255ebde 5e0460c 5bdae9e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
# web_summarizer_app.py
import os
import gradio as gr
import requests
from bs4 import BeautifulSoup
from dotenv import load_dotenv
# Import the Google Generative AI library
import google.generativeai as genai
# Load environment variables
load_dotenv()
# Get the Gemini API key
gemini_api_key = os.getenv("GEMINI_API_KEY")
# Configure the generative AI model
# Ensure GEMINI_API_KEY is set in your .env file
if not gemini_api_key:
raise ValueError("GEMINI_API_KEY environment variable not set.")
genai.configure(api_key=gemini_api_key)
# Use the specified Gemini model
# You can change 'gemini-1.5-flash-latest' if needed, but the user requested 1.5 Flash
model_name = os.getenv("GEMINI_MODEL", "gemini-1.5-flash-latest")
model = genai.GenerativeModel(model_name)
# π Web Scraper
def scrape_text_from_url(url):
"""
Scrapes visible text content from a given URL.
Limits the text length to 5000 characters to avoid large inputs.
"""
try:
response = requests.get(url, timeout=10)
# Raise an exception for bad status codes (4xx or 5xx)
response.raise_for_status()
soup = BeautifulSoup(response.content, "html.parser")
# Remove scripts and style tags
for tag in soup(["script", "style"]):
tag.decompose()
# Extract visible text and join into a single string
text = " ".join(chunk.strip() for chunk in soup.stripped_strings)
return text[:5000] # limit to avoid token overflow
except requests.exceptions.RequestException as e:
return f"β Error fetching the page: {str(e)}"
except Exception as e:
return f"β An unexpected error occurred during scraping: {str(e)}"
# π§ LLM Summarizer (using Gemini)
def summarize_with_gemini(text):
"""
Summarizes the provided text using the configured Gemini model.
"""
if not text or "β" in text:
return "Cannot summarize due to scraping error or empty text."
try:
# Use the generate_content method for Gemini
response = model.generate_content(f"Please summarize the following content:\n\n{text}")
# Access the text content from the response
return response.text.strip()
except Exception as e:
return f"β Error from Gemini API: {str(e)}"
# π Combined Function
def scrape_and_summarize(url):
"""
Combines scraping and summarizing functionalities.
"""
raw_text = scrape_text_from_url(url)
# Check if scraping failed before attempting summarization
if "β" in raw_text:
return raw_text, "Summarization skipped due to scraping error."
summary = summarize_with_gemini(raw_text)
return raw_text, summary
# π¨ Gradio UI
with gr.Blocks(title="π Web Summarizer with AI") as demo:
gr.Markdown("## π§ π Web Article Summarizer")
gr.Markdown("Enter a webpage URL below. The AI will scrape and summarize the content using Gemini 1.5 Flash.")
with gr.Row():
url_input = gr.Textbox(label="π Enter URL", placeholder="https://example.com", scale=4)
btn = gr.Button("Summarize", variant="primary")
with gr.Row():
with gr.Column(scale=1):
raw_output = gr.Textbox(label="π Raw Scraped Text", lines=15, interactive=False)
with gr.Column(scale=1):
summary_output = gr.Textbox(label="π AI Summary", lines=15, interactive=False)
# Link the button click event to the combined function
btn.click(scrape_and_summarize, inputs=[url_input], outputs=[raw_output, summary_output])
# π Launch app
if __name__ == "__main__":
# You can set share=True to create a public link (be cautious with API keys)
demo.launch()
|