JamesKingsley commited on
Commit
a4b6d10
·
1 Parent(s): 6f88b44

Add nltk.download for wordnet resource

Browse files
Files changed (1) hide show
  1. app.py +8 -3
app.py CHANGED
@@ -12,13 +12,17 @@ from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
12
  import os
13
  import time
14
  import zipfile
 
 
 
 
15
 
16
  # Load models and label mapping
17
  lda = joblib.load("lda_model.joblib")
18
  vectorizer = joblib.load("vectorizer.joblib")
19
  auto_labels = joblib.load("topic_labels.joblib")
20
 
21
- #Optional topic summaries
22
  topic_summaries = {
23
  "Politics & Gun Rights": "Discussions about government policies, laws, gun control, and rights.",
24
  "Computing & Hardware": "Technical issues and terms related to computer hardware and drivers.",
@@ -32,7 +36,7 @@ topic_summaries = {
32
  "Middle East Politics & Conflicts": "Topics involving Israel, Armenia, conflict regions."
33
  }
34
 
35
- #Tokenizer and lemmatizer
36
  tokenizer = TreebankWordTokenizer()
37
  lemmatizer = WordNetLemmatizer()
38
 
@@ -85,7 +89,8 @@ def cleanup_old_predictions(directory=".", extension=".txt", max_age_minutes=10)
85
  def download_log():
86
  zip_filename = "lda_predictions_log.zip"
87
  with zipfile.ZipFile(zip_filename, "w", zipfile.ZIP_DEFLATED) as zipf:
88
- zipf.write("lda_predictions_log.csv")
 
89
  return zip_filename
90
 
91
  def save_feedback(text, feedback):
 
12
  import os
13
  import time
14
  import zipfile
15
+ import nltk
16
+
17
+ # Download wordnet resource to avoid LookupError
18
+ nltk.download('wordnet')
19
 
20
  # Load models and label mapping
21
  lda = joblib.load("lda_model.joblib")
22
  vectorizer = joblib.load("vectorizer.joblib")
23
  auto_labels = joblib.load("topic_labels.joblib")
24
 
25
+ # Optional topic summaries
26
  topic_summaries = {
27
  "Politics & Gun Rights": "Discussions about government policies, laws, gun control, and rights.",
28
  "Computing & Hardware": "Technical issues and terms related to computer hardware and drivers.",
 
36
  "Middle East Politics & Conflicts": "Topics involving Israel, Armenia, conflict regions."
37
  }
38
 
39
+ # Tokenizer and lemmatizer
40
  tokenizer = TreebankWordTokenizer()
41
  lemmatizer = WordNetLemmatizer()
42
 
 
89
  def download_log():
90
  zip_filename = "lda_predictions_log.zip"
91
  with zipfile.ZipFile(zip_filename, "w", zipfile.ZIP_DEFLATED) as zipf:
92
+ if os.path.exists("lda_predictions_log.csv"):
93
+ zipf.write("lda_predictions_log.csv")
94
  return zip_filename
95
 
96
  def save_feedback(text, feedback):