Prathap commited on
Commit
029a54e
·
1 Parent(s): 052b49e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -0
app.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ @author: prathap
3
+ """
4
+ from transformers import pipeline
5
+ from bs4 import BeautifulSoup
6
+ import torch
7
+ import requests
8
+ import pandas as pd
9
+ import streamlit as st
10
+ import warnings
11
+ warnings.filterwarnings("ignore")
12
+ st.title("Automatic summarization")
13
+
14
+ @st.cache(allow_output_mutation=True)
15
+ def pipen():
16
+ summarizer = pipeline("summarization")
17
+ return summarizer
18
+
19
+
20
+
21
+
22
+
23
+ url = st.text_input('The URL link')
24
+
25
+ if st.button("Submit"):
26
+ r = requests.get(url)
27
+ soup = BeautifulSoup(r.text, 'html.parser')
28
+ results = soup.find_all(['h1', 'p'])
29
+ text = [result.text for result in results]
30
+ ARTICLE = ' '.join(text)
31
+ max_chunk = 400
32
+ ARTICLE = ARTICLE.replace('.', '.<eos>')
33
+ ARTICLE = ARTICLE.replace('?', '?<eos>')
34
+ ARTICLE = ARTICLE.replace('!', '!<eos>')
35
+
36
+
37
+
38
+ sentences = ARTICLE.split('<eos>')
39
+ current_chunk = 0
40
+ chunks = []
41
+ for sentence in sentences:
42
+ if len(chunks) == current_chunk + 1:
43
+ if len(chunks[current_chunk]) + len(sentence.split(' '))<= max_chunk:
44
+ chunks[current_chunk].extend(sentence.split(' '))
45
+ else:
46
+ current_chunk += 1
47
+ chunks.append(sentence.split(' '))
48
+ else:
49
+ print(current_chunk)
50
+ chunks.append(sentence.split(' '))
51
+
52
+ for chunk_id in range(len(chunks)):
53
+ chunks[chunk_id] = ' '.join(chunks[chunk_id])
54
+
55
+ with st.spinner("Loading the Model into the memory...."):
56
+ model=pipen()
57
+ res = model(chunks, max_length=120, min_length=30, do_sample=False)
58
+ text = ' '.join([summ['summary_text'] for summ in res])
59
+ print(len(text))
60
+
61
+ st.write("Success")
62
+ st.write(text)
63
+ with open('stsummary.txt', 'w') as f:
64
+ f.write(text)