Spaces:

WordLift
/

entity-linking

Running

App Files Files

cyberandy commited on Jul 16

Commit

813c7ba

verified ·

1 Parent(s): ad44c67

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -27

app.py CHANGED Viewed

@@ -74,10 +74,14 @@ else:
 @st.cache_resource  # 👈 Add the caching decorator
 def load_model(selected_language, model_name=None, entity_set=None):
-    # Suppress warnings during model loading
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         try:
             # This block handles the spaCy models for German and English
             if selected_language == "German":
@@ -87,12 +91,9 @@ def load_model(selected_language, model_name=None, entity_set=None):
                     st.info("Downloading German language model... This may take a moment.")
                     spacy.cli.download("de_core_news_lg")
                     nlp_model_de = spacy.load("de_core_news_lg")
                 if "entityfishing" not in nlp_model_de.pipe_names:
-                    try:
-                        nlp_model_de.add_pipe("entityfishing")
-                    except Exception as e:
-                        st.warning(f"Entity-fishing not available, using basic NER only: {e}")
                 return nlp_model_de
             elif selected_language == "English - spaCy":
@@ -102,52 +103,58 @@ def load_model(selected_language, model_name=None, entity_set=None):
                     st.info("Downloading English language model... This may take a moment.")
                     spacy.cli.download("en_core_web_sm")
                     nlp_model_en = spacy.load("en_core_web_sm")
                 if "entityfishing" not in nlp_model_en.pipe_names:
-                    try:
-                        nlp_model_en.add_pipe("entityfishing")
-                    except Exception as e:
-                        st.warning(f"Entity-fishing not available, using basic NER only: {e}")
                 return nlp_model_en
             # This block handles the ReFinED model and the "add_special_tokens" error
             else:
                 try:
-                    # First, attempt to load the model as usual
                     return Refined.from_pretrained(model_name=model_name, entity_set=entity_set)
                 except Exception as e:
-                    # If the specific "add_special_tokens" error occurs, apply the fix
                     if "add_special_tokens" in str(e):
-                        st.warning("Conflict detected. Applying fix by modifying tokenizer config...")
-                        # Define a local path to save/load the fixed model
                         local_model_path = f"./{model_name}-{entity_set}-fixed"
-                        # Download tokenizer, modify config, and save locally
-                        tokenizer = AutoTokenizer.from_pretrained(model_name)
-                        tokenizer.save_pretrained(local_model_path)
                         config_path = os.path.join(local_model_path, "tokenizer_config.json")
                         with open(config_path, "r") as f:
                             config_data = json.load(f)
-                        # Remove the conflicting parameter
-                        config_data.pop("add_special_tokens", None)
                         with open(config_path, "w") as f:
                             json.dump(config_data, f, indent=2)
-                        # Now, load the model from the local, fixed path
-                        st.success("Fix applied. Loading model from local cache.")
                         return Refined.from_pretrained(model_name=local_model_path, entity_set=entity_set)
                     else:
-                        # If it's a different error, raise it
-                        raise e
         except Exception as e:
-            st.error(f"Error loading model: {e}")
             return None
 # Use the cached model

 @st.cache_resource  # 👈 Add the caching decorator
 def load_model(selected_language, model_name=None, entity_set=None):
+    # This dictionary maps the easy names to their full Hugging Face Hub IDs
+    model_mapping = {
+        "aida_model": "amazon-science/ReFinED-aida-model",
+        "wikipedia_model_with_numbers": "amazon-science/ReFinED-wikipedia-model"
+    }
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         try:
             # This block handles the spaCy models for German and English
             if selected_language == "German":
                     st.info("Downloading German language model... This may take a moment.")
                     spacy.cli.download("de_core_news_lg")
                     nlp_model_de = spacy.load("de_core_news_lg")
                 if "entityfishing" not in nlp_model_de.pipe_names:
+                    try: nlp_model_de.add_pipe("entityfishing")
+                    except Exception as e: st.warning(f"Entity-fishing not available: {e}")
                 return nlp_model_de
             elif selected_language == "English - spaCy":
                     st.info("Downloading English language model... This may take a moment.")
                     spacy.cli.download("en_core_web_sm")
                     nlp_model_en = spacy.load("en_core_web_sm")
                 if "entityfishing" not in nlp_model_en.pipe_names:
+                    try: nlp_model_en.add_pipe("entityfishing")
+                    except Exception as e: st.warning(f"Entity-fishing not available: {e}")
                 return nlp_model_en
             # This block handles the ReFinED model and the "add_special_tokens" error
             else:
                 try:
                     return Refined.from_pretrained(model_name=model_name, entity_set=entity_set)
                 except Exception as e:
                     if "add_special_tokens" in str(e):
+                        st.warning("Conflict detected. Applying fix by downloading and patching model...")
+                        # 1. Get the REAL model name from our mapping
+                        real_model_name = model_mapping.get(model_name)
+                        if not real_model_name:
+                            st.error(f"Unknown model alias: {model_name}")
+                            return None
+                        # 2. Define a local path to save the fixed model
                         local_model_path = f"./{model_name}-{entity_set}-fixed"
+                        # 3. Download the tokenizer and the model using the REAL name
+                        st.info(f"Downloading model files for {real_model_name}...")
+                        tokenizer = AutoTokenizer.from_pretrained(real_model_name)
+                        model_files = AutoModelForSeq2SeqLM.from_pretrained(real_model_name)
+                        # 4. Save them to the local directory
+                        tokenizer.save_pretrained(local_model_path)
+                        model_files.save_pretrained(local_model_path)
+                        st.info("Model files downloaded.")
+                        # 5. Patch the tokenizer config file
                         config_path = os.path.join(local_model_path, "tokenizer_config.json")
                         with open(config_path, "r") as f:
                             config_data = json.load(f)
+                        config_data.pop("add_special_tokens", None) # Remove the conflicting key
                         with open(config_path, "w") as f:
                             json.dump(config_data, f, indent=2)
+                        # 6. Load the model from the local, fixed path
+                        st.success("Patch applied. Loading model from local cache...")
                         return Refined.from_pretrained(model_name=local_model_path, entity_set=entity_set)
                     else:
+                        raise e # If it's a different error, we still want to see it
         except Exception as e:
+            st.error(f"Failed to load model. Error: {e}")
             return None
 # Use the cached model