Spaces:

WordLift
/

entity-linking

Running

App Files Files

cyberandy commited on Jul 16

Commit

25d948c

verified ·

1 Parent(s): fdaf3eb

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -44

app.py CHANGED Viewed

@@ -75,52 +75,46 @@ def load_model(selected_language, model_name=None, entity_set=None):
     # Suppress warnings during model loading
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         try:
-            if selected_language == "German":
-                # Download and load the German-specific model
-                try:
-                    nlp_model_de = spacy.load("de_core_news_lg")
-                except OSError:
-                    st.info("Downloading German language model... This may take a moment.")
-                    spacy.cli.download("de_core_news_lg")
-                    nlp_model_de = spacy.load("de_core_news_lg")
-                # Check if entityfishing component is available
-                if "entityfishing" not in nlp_model_de.pipe_names:
-                    try:
-                        nlp_model_de.add_pipe("entityfishing")
-                    except Exception as e:
-                        st.warning(f"Entity-fishing not available, using basic NER only: {e}")
-                        # Return model without entityfishing for basic NER
-                        return nlp_model_de
-                return nlp_model_de
-            elif selected_language == "English - spaCy":
-                # Download and load English-specific model
-                try:
-                    nlp_model_en = spacy.load("en_core_web_sm")
-                except OSError:
-                    st.info("Downloading English language model... This may take a moment.")
-                    spacy.cli.download("en_core_web_sm")
-                    nlp_model_en = spacy.load("en_core_web_sm")
-                # Check if entityfishing component is available
-                if "entityfishing" not in nlp_model_en.pipe_names:
-                    try:
-                        nlp_model_en.add_pipe("entityfishing")
-                    except Exception as e:
-                        st.warning(f"Entity-fishing not available, using basic NER only: {e}")
-                        # Return model without entityfishing for basic NER
-                        return nlp_model_en
-                return nlp_model_en
             else:
-                # Load the pretrained model for other languages
-                refined_model = Refined.from_pretrained(model_name=model_name, entity_set=entity_set)
-                return refined_model
         except Exception as e:
             st.error(f"Error loading model: {e}")
             return None

     # Suppress warnings during model loading
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         try:
+            if selected_language == "German" or selected_language == "English - spaCy":
+                # ... (your existing spaCy loading logic)
             else:
+                try:
+                    # Attempt to load the pretrained model directly
+                    refined_model = Refined.from_pretrained(model_name=model_name, entity_set=entity_set)
+                    return refined_model
+                except AttributeError as e:
+                    if "add_special_tokens" in str(e):
+                        st.warning("Encountered 'add_special_tokens' conflict. Attempting to fix by modifying tokenizer config...")
+                        # Define a local directory to save the model
+                        local_model_dir = f"./{model_name}_{entity_set}"
+                        # Download and save the tokenizer, then modify its config
+                        tokenizer = AutoTokenizer.from_pretrained(model_name)
+                        tokenizer.save_pretrained(local_model_dir)
+                        # Load the tokenizer_config.json and remove the conflicting key
+                        tokenizer_config_path = f"{local_model_dir}/tokenizer_config.json"
+                        with open(tokenizer_config_path, 'r') as f:
+                            config = json.load(f)
+                        if "add_special_tokens" in config:
+                            del config["add_special_tokens"]
+                        with open(tokenizer_config_path, 'w') as f:
+                            json.dump(config, f, indent=2)
+                        # Download and save the model
+                        model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+                        model.save_pretrained(local_model_dir)
+                        # Load the model from the modified local directory
+                        refined_model = Refined.from_pretrained(model_name=local_model_dir, entity_set=entity_set)
+                        st.success("Successfully loaded model after applying fix.")
+                        return refined_model
+                    else:
+                        raise e  # Re-raise other AttributeError exceptions
         except Exception as e:
             st.error(f"Error loading model: {e}")
             return None