# web_summarizer_app.py import os import gradio as gr import requests from bs4 import BeautifulSoup from dotenv import load_dotenv # Import the Google Generative AI library import google.generativeai as genai # Load environment variables load_dotenv() # Get the Gemini API key gemini_api_key = os.getenv("GEMINI_API_KEY") # Configure the generative AI model # Ensure GEMINI_API_KEY is set in your .env file if not gemini_api_key: raise ValueError("GEMINI_API_KEY environment variable not set.") genai.configure(api_key=gemini_api_key) # Use the specified Gemini model # You can change 'gemini-1.5-flash-latest' if needed, but the user requested 1.5 Flash model_name = os.getenv("GEMINI_MODEL", "gemini-1.5-flash-latest") model = genai.GenerativeModel(model_name) # 🌐 Web Scraper def scrape_text_from_url(url): """ Scrapes visible text content from a given URL. Limits the text length to 5000 characters to avoid large inputs. """ try: response = requests.get(url, timeout=10) # Raise an exception for bad status codes (4xx or 5xx) response.raise_for_status() soup = BeautifulSoup(response.content, "html.parser") # Remove scripts and style tags for tag in soup(["script", "style"]): tag.decompose() # Extract visible text and join into a single string text = " ".join(chunk.strip() for chunk in soup.stripped_strings) return text[:5000] # limit to avoid token overflow except requests.exceptions.RequestException as e: return f"❌ Error fetching the page: {str(e)}" except Exception as e: return f"❌ An unexpected error occurred during scraping: {str(e)}" # 🧠 LLM Summarizer (using Gemini) def summarize_with_gemini(text): """ Summarizes the provided text using the configured Gemini model. """ if not text or "❌" in text: return "Cannot summarize due to scraping error or empty text." try: # Use the generate_content method for Gemini response = model.generate_content(f"Please summarize the following content:\n\n{text}") # Access the text content from the response return response.text.strip() except Exception as e: return f"❌ Error from Gemini API: {str(e)}" # 🔁 Combined Function def scrape_and_summarize(url): """ Combines scraping and summarizing functionalities. """ raw_text = scrape_text_from_url(url) # Check if scraping failed before attempting summarization if "❌" in raw_text: return raw_text, "Summarization skipped due to scraping error." summary = summarize_with_gemini(raw_text) return raw_text, summary # 🎨 Gradio UI with gr.Blocks(title="🔎 Web Summarizer with AI") as demo: gr.Markdown("## 🧠🌐 Web Article Summarizer") gr.Markdown("Enter a webpage URL below. The AI will scrape and summarize the content using Gemini 1.5 Flash.") with gr.Row(): url_input = gr.Textbox(label="🔗 Enter URL", placeholder="https://example.com", scale=4) btn = gr.Button("Summarize", variant="primary") with gr.Row(): with gr.Column(scale=1): raw_output = gr.Textbox(label="📝 Raw Scraped Text", lines=15, interactive=False) with gr.Column(scale=1): summary_output = gr.Textbox(label="📄 AI Summary", lines=15, interactive=False) # Link the button click event to the combined function btn.click(scrape_and_summarize, inputs=[url_input], outputs=[raw_output, summary_output]) # 🚀 Launch app if __name__ == "__main__": # You can set share=True to create a public link (be cautious with API keys) demo.launch()