summarization / app.py
Prathap's picture
Update app.py
74a03fa
raw
history blame
2.65 kB
from transformers import pipeline
import base64
import time
from bs4 import BeautifulSoup
import requests
import streamlit as st
import warnings
warnings.filterwarnings("ignore")
timestr = time.strftime("%Y%m%d-%H%M%S")
st.markdown(' Created by **_Prathap_**. :baby_chick:')
st.title("Automatic text summarization")
@st.cache(allow_output_mutation=True)
def pipen():
summarizer = pipeline("summarization")
return summarizer
def text_downloader(raw_text):
b64 = base64.b64encode(raw_text.encode()).decode()
new_filename = "new_text_file_{}_.txt".format(timestr)
st.markdown("#### Download File ###")
href = f'<a href="data:file/txt;base64,{b64}" download="{new_filename}">Click Here!!</a>'
st.markdown(href,unsafe_allow_html=True)
url = st.text_input('Paste URL ⤵️')
if st.button("Submit"):
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
results = soup.find_all(['h1', 'p'])
text = [result.text for result in results]
ARTICLE = ' '.join(text)
max_chunk = 400
ARTICLE = ARTICLE.replace('.', '.<eos>')
ARTICLE = ARTICLE.replace('?', '?<eos>')
ARTICLE = ARTICLE.replace('!', '!<eos>')
sentences = ARTICLE.split('<eos>')
current_chunk = 0
chunks = []
for sentence in sentences:
if len(chunks) == current_chunk + 1:
if len(chunks[current_chunk]) + len(sentence.split(' '))<= max_chunk:
chunks[current_chunk].extend(sentence.split(' '))
else:
current_chunk += 1
chunks.append(sentence.split(' '))
else:
print(current_chunk)
chunks.append(sentence.split(' '))
for chunk_id in range(len(chunks)):
chunks[chunk_id] = ' '.join(chunks[chunk_id])
with st.spinner("Loading the Model into the memory...."):
model=pipen()
res = model(chunks, max_length=50, min_length=30, do_sample=False)
text = ' '.join([summ['summary_text'] for summ in res])
st.write("Success")
st.write(text)
text_downloader(text)
if st.button("Contact"):
st.write("Hi there, I'm Prathap 👋. 2+ years Applied Deep Learning experience")
st.write("✅ [LinkedIn](https://linkedin.com/in/prathapreddyk)")
st.write(" 📚[Github](https://github.com/Pratap517)")
st.write(" 📗Analyze Csv files in one step [Click Here](https://data-analyse-prathap.herokuapp.com)")
st.write(" 😷 Face Mask Detection App [Click Here](https://mask-detection-5a800.firebaseapp.com/)")