Spaces:
Sleeping
Sleeping
| # Here are the imports | |
| import PyPDF2 | |
| import re | |
| import torch | |
| from transformers import pipeline | |
| from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub | |
| from fairseq.models.text_to_speech.hub_interface import TTSHubInterface | |
| import gradio as gr | |
| import io | |
| import numpy as np | |
| import soundfile as sf | |
| import tempfile | |
| # Here is the code | |
| # Function to extract and clean abstract from PDF | |
| def extract_and_clean_abstract(uploaded_file): | |
| if uploaded_file is None: | |
| return "No file uploaded." | |
| # Read the file using its temporary file path | |
| with open(uploaded_file.name, 'rb') as file: | |
| reader = PyPDF2.PdfReader(file) | |
| full_text = "" | |
| for page in reader.pages: | |
| full_text += page.extract_text() | |
| # Find the abstract | |
| pattern = r"(Abstract|ABSTRACT|abstract)(.*?)(Introduction|INTRODUCTION|introduction|1|Keywords|KEYWORDS|keywords)" | |
| match = re.search(pattern, full_text, re.DOTALL) | |
| if match: | |
| abstract = match.group(2).strip() | |
| else: | |
| return "Abstract not found." | |
| # Clean the abstract | |
| cleaned_abstract = abstract.replace('\n', ' ').replace('- ', '') | |
| return cleaned_abstract | |
| # Function to summarize text | |
| def summarize_text(text): | |
| # Initialize the summarization pipeline with the summarization model | |
| summarizer = pipeline( | |
| "summarization", | |
| "pszemraj/led-base-book-summary", | |
| device=0 if torch.cuda.is_available() else -1, | |
| ) | |
| # Generate the summary | |
| result = summarizer( | |
| text, | |
| min_length=8, | |
| max_length=25, | |
| no_repeat_ngram_size=3, | |
| encoder_no_repeat_ngram_size=3, | |
| repetition_penalty=3.5, | |
| num_beams=4, | |
| do_sample=False, | |
| early_stopping=True, | |
| ) | |
| # Extract the first sentence from the summary | |
| first_sentence = re.split(r'(?<=[.:;!?])\s', result[0]['summary_text'])[0] | |
| return first_sentence | |
| # Function for text-to-speech | |
| def text_to_speech(text): | |
| # Check if CUDA is available and set the device accordingly | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| # Load the TTS model and task from Hugging Face Hub | |
| models, cfg, task = load_model_ensemble_and_task_from_hf_hub( | |
| "facebook/fastspeech2-en-ljspeech", # Or another TTS model of your choice | |
| arg_overrides={"vocoder": "hifigan", "fp16": False} | |
| ) | |
| # Ensure the model is on the correct device | |
| model = models[0].to(device) | |
| # Update the config with the data config from the task | |
| TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg) | |
| # Build the generator | |
| generator = task.build_generator([model], cfg) | |
| # Get the model input from the text | |
| sample = TTSHubInterface.get_model_input(task, text) | |
| sample["net_input"]["src_tokens"] = sample["net_input"]["src_tokens"].to(device) | |
| sample["net_input"]["src_lengths"] = sample["net_input"]["src_lengths"].to(device) | |
| # Generate the waveform | |
| wav, rate = TTSHubInterface.get_prediction(task, model, generator, sample) | |
| # Move the waveform to CPU if it's on GPU | |
| if wav.is_cuda: | |
| wav = wav.cpu() | |
| # Write the waveform to a temporary file and return the file path | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file: | |
| sf.write(tmp_file.name, wav.numpy(), rate) | |
| return tmp_file.name | |
| def process_pdf(uploaded_file): | |
| """ | |
| Process the uploaded PDF file to extract, summarize the abstract, and convert it to speech. | |
| """ | |
| abstract = extract_and_clean_abstract(uploaded_file) | |
| summary = summarize_text(abstract) | |
| audio_output = text_to_speech(summary) | |
| return audio_output | |
| # Create Gradio interface | |
| iface = gr.Interface( | |
| fn=process_pdf, | |
| inputs=gr.File(label="Upload PDF"), | |
| outputs=gr.Audio(label="Audio Summary"), | |
| title="PDF Abstract Summary to Speech", | |
| description="Upload only a PDF file that has an abstract. The model will extract its abstract, summarize it, and converts the summary to speech.", | |
| examples=[["Article 11 Hidden Technical Debt in Machine Learning Systems.pdf"], ["Article 7 Efficient Estimation of Word Representations in Vector Space.pdf"],["Article 6 BloombergGPT_ A Large Language Model for Finance.pdf"]] | |
| ) | |
| # Run the Gradio app | |
| iface.launch() | |