Spaces:
Sleeping
Sleeping
| # web_summarizer_app.py | |
| import os | |
| import gradio as gr | |
| import requests | |
| from bs4 import BeautifulSoup | |
| from dotenv import load_dotenv | |
| # Import the Google Generative AI library | |
| import google.generativeai as genai | |
| # Load environment variables | |
| load_dotenv() | |
| # Get the Gemini API key | |
| gemini_api_key = os.getenv("GEMINI_API_KEY") | |
| # Configure the generative AI model | |
| # Ensure GEMINI_API_KEY is set in your .env file | |
| if not gemini_api_key: | |
| raise ValueError("GEMINI_API_KEY environment variable not set.") | |
| genai.configure(api_key=gemini_api_key) | |
| # Use the specified Gemini model | |
| # You can change 'gemini-1.5-flash-latest' if needed, but the user requested 1.5 Flash | |
| model_name = os.getenv("GEMINI_MODEL", "gemini-1.5-flash-latest") | |
| model = genai.GenerativeModel(model_name) | |
| # π Web Scraper | |
| def scrape_text_from_url(url): | |
| """ | |
| Scrapes visible text content from a given URL. | |
| Limits the text length to 5000 characters to avoid large inputs. | |
| """ | |
| try: | |
| response = requests.get(url, timeout=10) | |
| # Raise an exception for bad status codes (4xx or 5xx) | |
| response.raise_for_status() | |
| soup = BeautifulSoup(response.content, "html.parser") | |
| # Remove scripts and style tags | |
| for tag in soup(["script", "style"]): | |
| tag.decompose() | |
| # Extract visible text and join into a single string | |
| text = " ".join(chunk.strip() for chunk in soup.stripped_strings) | |
| return text[:5000] # limit to avoid token overflow | |
| except requests.exceptions.RequestException as e: | |
| return f"β Error fetching the page: {str(e)}" | |
| except Exception as e: | |
| return f"β An unexpected error occurred during scraping: {str(e)}" | |
| # π§ LLM Summarizer (using Gemini) | |
| def summarize_with_gemini(text): | |
| """ | |
| Summarizes the provided text using the configured Gemini model. | |
| """ | |
| if not text or "β" in text: | |
| return "Cannot summarize due to scraping error or empty text." | |
| try: | |
| # Use the generate_content method for Gemini | |
| response = model.generate_content(f"Please summarize the following content:\n\n{text}") | |
| # Access the text content from the response | |
| return response.text.strip() | |
| except Exception as e: | |
| return f"β Error from Gemini API: {str(e)}" | |
| # π Combined Function | |
| def scrape_and_summarize(url): | |
| """ | |
| Combines scraping and summarizing functionalities. | |
| """ | |
| raw_text = scrape_text_from_url(url) | |
| # Check if scraping failed before attempting summarization | |
| if "β" in raw_text: | |
| return raw_text, "Summarization skipped due to scraping error." | |
| summary = summarize_with_gemini(raw_text) | |
| return raw_text, summary | |
| # π¨ Gradio UI | |
| with gr.Blocks(title="π Web Summarizer with AI") as demo: | |
| gr.Markdown("## π§ π Web Article Summarizer") | |
| gr.Markdown("Enter a webpage URL below. The AI will scrape and summarize the content using Gemini 1.5 Flash.") | |
| with gr.Row(): | |
| url_input = gr.Textbox(label="π Enter URL", placeholder="https://example.com", scale=4) | |
| btn = gr.Button("Summarize", variant="primary") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| raw_output = gr.Textbox(label="π Raw Scraped Text", lines=15, interactive=False) | |
| with gr.Column(scale=1): | |
| summary_output = gr.Textbox(label="π AI Summary", lines=15, interactive=False) | |
| # Link the button click event to the combined function | |
| btn.click(scrape_and_summarize, inputs=[url_input], outputs=[raw_output, summary_output]) | |
| # π Launch app | |
| if __name__ == "__main__": | |
| # You can set share=True to create a public link (be cautious with API keys) | |
| demo.launch() | |