import gradio as gr import whisper import PyPDF2 import docx from transformers import pipeline import io import tempfile import os import numpy as np class TextSummarizer: def __init__(self): self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn") # Ensure whisper uses a writable cache directory cache_dir = "/code/cache" self.whisper_model = whisper.load_model("base", download_root=cache_dir) def extract_text_from_pdf(self, pdf_file): """Extract text from a PDF file object""" try: reader = PyPDF2.PdfReader(pdf_file) text = "" for page in reader.pages: text += page.extract_text() or "" return text except Exception as e: return f"Error reading PDF: {str(e)}" def extract_text_from_docx(self, docx_file): """Extract text from a DOCX file object""" try: doc = docx.Document(docx_file) text = "" for paragraph in doc.paragraphs: text += paragraph.text + "\n" return text except Exception as e: return f"Error reading DOCX: {str(e)}" def process_text_file(self, txt_file): """Extract text from a TXT file object""" try: # The file from Gradio is a temporary file, we can read it directly with open(txt_file.name, 'r', encoding='utf-8') as f: return f.read() except Exception as e: return f"Error reading TXT file: {str(e)}" def transcribe_audio(self, audio_file): """Transcribe audio file to text using Whisper""" try: result = self.whisper_model.transcribe(audio_file) return result["text"] except Exception as e: return f"Error transcribing audio: {str(e)}" def summarize_text(self, text, max_length=150, min_length=50): """Summarize text using BART model""" try: if len(text.strip()) < 50: return "Text is too short to summarize." summary = self.summarizer(text, max_length=max_length, min_length=min_length, do_sample=False) return summary[0]['summary_text'] except Exception as e: return f"Error summarizing text: {str(e)}" def process_file(self, file, summary_length): """Process uploaded file and return summary""" if file is None: return "No file uploaded." file_path = file.name file_extension = os.path.splitext(file_path)[1].lower() max_length = {"Short": 100, "Medium": 150, "Long": 250}[summary_length] min_length = max_length // 3 text_extractors = { ".txt": self.process_text_file, ".pdf": self.extract_text_from_pdf, ".docx": self.extract_text_from_docx, } audio_transcribers = { ".mp3": self.transcribe_audio, ".wav": self.transcribe_audio, ".m4a": self.transcribe_audio, ".flac": self.transcribe_audio, } if file_extension in text_extractors: text = text_extractors[file_extension](file) elif file_extension in audio_transcribers: text = audio_transcribers[file_extension](file_path) else: return f"Unsupported file format: {file_extension}" if isinstance(text, str) and text.startswith("Error"): return text summary = self.summarize_text(text, max_length, min_length) return f"**Original Text Length:** {len(text)} characters\n\n**Summary:**\n{summary}" def transcribe_stream(self, audio_chunk, current_transcript): """Transcribe a stream of audio chunks and append to the transcript.""" if audio_chunk is None: return current_transcript, current_transcript try: sample_rate, data = audio_chunk # Convert from int16 to float32 data = data.astype(np.float32) / 32768.0 # Transcribe the audio chunk result = self.whisper_model.transcribe(data, fp16=False) new_text = result['text'] updated_transcript = current_transcript + new_text + " " return updated_transcript, updated_transcript except Exception as e: return f"Error during transcription: {str(e)}", current_transcript def convert_file_to_text(self, file): """Extract text from any supported file format.""" if file is None: return "No file uploaded for conversion." file_path = file.name file_extension = os.path.splitext(file_path)[1].lower() text_extractors = { ".txt": self.process_text_file, ".pdf": self.extract_text_from_pdf, ".docx": self.extract_text_from_docx, } audio_transcribers = { ".mp3": self.transcribe_audio, ".wav": self.transcribe_audio, ".m4a": self.transcribe_audio, ".flac": self.transcribe_audio, } if file_extension in text_extractors: return text_extractors[file_extension](file) elif file_extension in audio_transcribers: return audio_transcribers[file_extension](file_path) else: return f"Unsupported file format for conversion: {file_extension}" def create_interface(): summarizer = TextSummarizer() with gr.Blocks(title="Text Summarization Dashboard") as interface: gr.Markdown("Text Summarization Dashboard") gr.Markdown("Manage files, and interact with specialized AI agents for various tasks.") # State component to store the uploaded file uploaded_file_state = gr.State(None) with gr.Tabs(): with gr.TabItem("📄 File Management & Conversion"): with gr.Row(): with gr.Column(scale=1): gr.Markdown("### Upload File") file_input = gr.File( label="Select a file", file_types=[".txt", ".pdf", ".docx", ".mp3", ".wav", ".m4a", ".flac"] ) uploaded_file_name = gr.Textbox(label="Current File", interactive=False) def store_file(file): if file: return file, file.name return None, "No file uploaded" file_input.upload( fn=store_file, inputs=[file_input], outputs=[uploaded_file_state, uploaded_file_name] ) with gr.Column(scale=1): gr.Markdown("### Convert to TXT") gr.Markdown("Supported formats for conversion to .txt: `.pdf`, `.docx`, `.mp3`, `.wav`, `.m4a`, `.flac`") convert_btn = gr.Button("Convert to TXT", variant="secondary") conversion_output = gr.Textbox( label="Conversion Output", placeholder="Converted text will appear here...", lines=8, interactive=False ) convert_btn.click( fn=summarizer.convert_file_to_text, inputs=[uploaded_file_state], outputs=[conversion_output] ) with gr.TabItem("✍️ Meeting Summarization"): gr.Markdown("### Meeting Summarization") gr.Markdown("Generate summaries from your meeting transcripts and other documents.") with gr.Row(): with gr.Column(scale=1): summary_length = gr.Dropdown( choices=["Short", "Medium", "Long"], value="Medium", label="Summary Length", info="Short: ~300 words, Medium: ~500+ words, Long: ~1000+ words" ) submit_btn = gr.Button("Generate Summary", variant="primary") with gr.Column(scale=2): output = gr.Textbox( label="Summary Output", lines=10, placeholder="Your summary will appear here..." ) with gr.Accordion("⚙️ Model Settings", open=False): gr.Markdown("### Model Selection & Fine-Tuning") gr.Markdown("Choose different models and configure their parameters.") with gr.Row(): gr.Dropdown( label="Select Summarization Model", choices=["facebook/bart-large-cnn", "t5-small", "google/pegasus-xsum"], value="facebook/bart-large-cnn" ) with gr.Accordion("Fine-Tuning Options", open=False): gr.Slider(label="Min Tokens", minimum=10, maximum=200, step=5, value=50) gr.Slider(label="Max Tokens", minimum=50, maximum=500, step=10, value=150) gr.Slider(label="Temperature", minimum=0.1, maximum=1.5, step=0.1, value=0.7) gr.Slider(label="Top-K", minimum=0, maximum=100, step=1, value=50, info="0 to disable") gr.Slider(label="Top-P (Nucleus Sampling)", minimum=0.0, maximum=1.0, step=0.05, value=0.95, info="0 to disable") gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, step=0.1, value=1.2) gr.Slider(label="Number of Beams", minimum=1, maximum=8, step=1, value=4) with gr.TabItem("🔴 Live Meeting Recording & Summarization"): gr.Markdown("### Live Meeting Transcription & Summarization") gr.Markdown("Record audio from your microphone, get a live transcript, and generate a summary.") live_transcript_state = gr.State("") with gr.Row(): with gr.Column(scale=1): audio_input = gr.Audio( label="Live Audio", sources="microphone", streaming=True, ) with gr.Column(scale=2): live_transcript_output = gr.Textbox( label="Live Transcript", placeholder="Transcript will appear here...", lines=15, ) with gr.Row(): with gr.Column(scale=1): live_summary_length = gr.Dropdown( choices=["Short", "Medium", "Long"], value="Medium", label="Summary Length" ) live_summary_btn = gr.Button("Generate Summary", variant="primary") with gr.Column(scale=2): live_summary_output = gr.Textbox( label="Meeting Summary", placeholder="Summary will appear here...", lines=5, ) audio_input.stream( fn=summarizer.transcribe_stream, inputs=[audio_input, live_transcript_state], outputs=[live_transcript_output, live_transcript_state], ) def generate_live_summary(transcript, length_option): max_len = {"Short": 100, "Medium": 150, "Long": 250}[length_option] min_len = max_len // 3 return summarizer.summarize_text(transcript, max_length=max_len, min_length=min_len) live_summary_btn.click( fn=generate_live_summary, inputs=[live_transcript_output, live_summary_length], outputs=[live_summary_output], ) submit_btn.click( fn=summarizer.process_file, inputs=[uploaded_file_state, summary_length], outputs=output ) return interface if __name__ == "__main__": interface = create_interface() interface.launch(server_name="0.0.0.0", server_port=7860, share=True)