Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -19,11 +19,27 @@ st.set_page_config(
|
|
| 19 |
|
| 20 |
# Sidebar
|
| 21 |
st.sidebar.image("logo-wordlift.png")
|
| 22 |
-
language_options = {"English", "German"}
|
| 23 |
selected_language = st.sidebar.selectbox("Select the Language", list(language_options), index=0)
|
| 24 |
|
| 25 |
# Based on selected language, configure model, entity set, and citation options
|
| 26 |
-
if selected_language
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
model_options = ["aida_model", "wikipedia_model_with_numbers"]
|
| 28 |
entity_set_options = ["wikidata", "wikipedia"]
|
| 29 |
|
|
@@ -41,22 +57,6 @@ if selected_language != "German":
|
|
| 41 |
|
| 42 |
with st.sidebar.expander('Citations'):
|
| 43 |
st.markdown(refined_citation)
|
| 44 |
-
else:
|
| 45 |
-
selected_model_name = None
|
| 46 |
-
selected_entity_set = None
|
| 47 |
-
|
| 48 |
-
entity_fishing_citation = """
|
| 49 |
-
@misc{entity-fishing,
|
| 50 |
-
title = {entity-fishing},
|
| 51 |
-
publisher = {GitHub},
|
| 52 |
-
year = {2016--2023},
|
| 53 |
-
archivePrefix = {swh},
|
| 54 |
-
eprint = {1:dir:cb0ba3379413db12b0018b7c3af8d0d2d864139c}
|
| 55 |
-
}
|
| 56 |
-
"""
|
| 57 |
-
|
| 58 |
-
with st.sidebar.expander('Citations'):
|
| 59 |
-
st.markdown(entity_fishing_citation)
|
| 60 |
|
| 61 |
@st.cache_resource # ๐ Add the caching decorator
|
| 62 |
def load_model(selected_language, model_name=None, entity_set=None):
|
|
@@ -66,6 +66,12 @@ def load_model(selected_language, model_name=None, entity_set=None):
|
|
| 66 |
nlp_model_de.add_pipe("entityfishing")
|
| 67 |
|
| 68 |
return nlp_model_de
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
else:
|
| 70 |
# Load the pretrained model for other languages
|
| 71 |
refined_model = Refined.from_pretrained(model_name=model_name, entity_set=entity_set)
|
|
@@ -101,20 +107,19 @@ entities_map = {}
|
|
| 101 |
entities_data = {}
|
| 102 |
|
| 103 |
if text_input:
|
| 104 |
-
if selected_language
|
| 105 |
doc_de = model(text_input)
|
| 106 |
entities = [(ent.text, ent.label_, ent._.kb_qid, ent._.url_wikidata) for ent in doc_de.ents]
|
| 107 |
for entity in entities:
|
| 108 |
entity_string, entity_type, wikidata_id, wikidata_url = entity
|
| 109 |
if wikidata_url:
|
| 110 |
-
# Ensure correct format for the German model
|
| 111 |
formatted_wikidata_url = wikidata_url.replace("https://www.wikidata.org/wiki/", "http://www.wikidata.org/entity/")
|
| 112 |
entities_map[entity_string] = {"id": wikidata_id, "link": formatted_wikidata_url}
|
| 113 |
entity_data = get_entity_data(formatted_wikidata_url)
|
| 114 |
|
| 115 |
if entity_data is not None:
|
| 116 |
entities_data[entity_string] = entity_data
|
| 117 |
-
|
| 118 |
else:
|
| 119 |
entities = model.process_text(text_input)
|
| 120 |
|
|
|
|
| 19 |
|
| 20 |
# Sidebar
|
| 21 |
st.sidebar.image("logo-wordlift.png")
|
| 22 |
+
language_options = {"English", "English - spaCy", "German"}
|
| 23 |
selected_language = st.sidebar.selectbox("Select the Language", list(language_options), index=0)
|
| 24 |
|
| 25 |
# Based on selected language, configure model, entity set, and citation options
|
| 26 |
+
if selected_language == "German" or selected_language == "English - spaCy":
|
| 27 |
+
selected_model_name = None
|
| 28 |
+
selected_entity_set = None
|
| 29 |
+
|
| 30 |
+
entity_fishing_citation = """
|
| 31 |
+
@misc{entity-fishing,
|
| 32 |
+
title = {entity-fishing},
|
| 33 |
+
publisher = {GitHub},
|
| 34 |
+
year = {2016--2023},
|
| 35 |
+
archivePrefix = {swh},
|
| 36 |
+
eprint = {1:dir:cb0ba3379413db12b0018b7c3af8d0d2d864139c}
|
| 37 |
+
}
|
| 38 |
+
"""
|
| 39 |
+
|
| 40 |
+
with st.sidebar.expander('Citations'):
|
| 41 |
+
st.markdown(entity_fishing_citation)
|
| 42 |
+
else:
|
| 43 |
model_options = ["aida_model", "wikipedia_model_with_numbers"]
|
| 44 |
entity_set_options = ["wikidata", "wikipedia"]
|
| 45 |
|
|
|
|
| 57 |
|
| 58 |
with st.sidebar.expander('Citations'):
|
| 59 |
st.markdown(refined_citation)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
@st.cache_resource # ๐ Add the caching decorator
|
| 62 |
def load_model(selected_language, model_name=None, entity_set=None):
|
|
|
|
| 66 |
nlp_model_de.add_pipe("entityfishing")
|
| 67 |
|
| 68 |
return nlp_model_de
|
| 69 |
+
elif selected_language == "English":
|
| 70 |
+
# Load English-specific model
|
| 71 |
+
nlp_model_en = spacy.load("en_core_web_sm")
|
| 72 |
+
nlp_model_en.add_pipe("entityfishing")
|
| 73 |
+
|
| 74 |
+
return nlp_model_en
|
| 75 |
else:
|
| 76 |
# Load the pretrained model for other languages
|
| 77 |
refined_model = Refined.from_pretrained(model_name=model_name, entity_set=entity_set)
|
|
|
|
| 107 |
entities_data = {}
|
| 108 |
|
| 109 |
if text_input:
|
| 110 |
+
if selected_language in ["German", "English - spaCy"]::
|
| 111 |
doc_de = model(text_input)
|
| 112 |
entities = [(ent.text, ent.label_, ent._.kb_qid, ent._.url_wikidata) for ent in doc_de.ents]
|
| 113 |
for entity in entities:
|
| 114 |
entity_string, entity_type, wikidata_id, wikidata_url = entity
|
| 115 |
if wikidata_url:
|
| 116 |
+
# Ensure correct format for the German and English model
|
| 117 |
formatted_wikidata_url = wikidata_url.replace("https://www.wikidata.org/wiki/", "http://www.wikidata.org/entity/")
|
| 118 |
entities_map[entity_string] = {"id": wikidata_id, "link": formatted_wikidata_url}
|
| 119 |
entity_data = get_entity_data(formatted_wikidata_url)
|
| 120 |
|
| 121 |
if entity_data is not None:
|
| 122 |
entities_data[entity_string] = entity_data
|
|
|
|
| 123 |
else:
|
| 124 |
entities = model.process_text(text_input)
|
| 125 |
|