Spaces:

JamesKingsley
/

Topic_Modeling_LDA

Sleeping

JamesKingsley commited on Jun 3

Commit

a4b6d10

1 Parent(s): 6f88b44

Add nltk.download for wordnet resource

Files changed (1) hide show

app.py CHANGED Viewed

@@ -12,13 +12,17 @@ from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
 import os
 import time
 import zipfile
 # Load models and label mapping
 lda = joblib.load("lda_model.joblib")
 vectorizer = joblib.load("vectorizer.joblib")
 auto_labels = joblib.load("topic_labels.joblib")
-#Optional topic summaries
 topic_summaries = {
     "Politics & Gun Rights": "Discussions about government policies, laws, gun control, and rights.",
     "Computing & Hardware": "Technical issues and terms related to computer hardware and drivers.",
@@ -32,7 +36,7 @@ topic_summaries = {
     "Middle East Politics & Conflicts": "Topics involving Israel, Armenia, conflict regions."
 }
-#Tokenizer and lemmatizer
 tokenizer = TreebankWordTokenizer()
 lemmatizer = WordNetLemmatizer()
@@ -85,7 +89,8 @@ def cleanup_old_predictions(directory=".", extension=".txt", max_age_minutes=10)
 def download_log():
     zip_filename = "lda_predictions_log.zip"
     with zipfile.ZipFile(zip_filename, "w", zipfile.ZIP_DEFLATED) as zipf:
-        zipf.write("lda_predictions_log.csv")
     return zip_filename
 def save_feedback(text, feedback):

 import os
 import time
 import zipfile
+import nltk
+# Download wordnet resource to avoid LookupError
+nltk.download('wordnet')
 # Load models and label mapping
 lda = joblib.load("lda_model.joblib")
 vectorizer = joblib.load("vectorizer.joblib")
 auto_labels = joblib.load("topic_labels.joblib")
+# Optional topic summaries
 topic_summaries = {
     "Politics & Gun Rights": "Discussions about government policies, laws, gun control, and rights.",
     "Computing & Hardware": "Technical issues and terms related to computer hardware and drivers.",
     "Middle East Politics & Conflicts": "Topics involving Israel, Armenia, conflict regions."
 }
+# Tokenizer and lemmatizer
 tokenizer = TreebankWordTokenizer()
 lemmatizer = WordNetLemmatizer()
 def download_log():
     zip_filename = "lda_predictions_log.zip"
     with zipfile.ZipFile(zip_filename, "w", zipfile.ZIP_DEFLATED) as zipf:
+        if os.path.exists("lda_predictions_log.csv"):
+            zipf.write("lda_predictions_log.csv")
     return zip_filename
 def save_feedback(text, feedback):