Commit
Β·
1a30cc1
1
Parent(s):
44fb8bb
update app
Browse files
app.py
CHANGED
|
@@ -21,10 +21,6 @@ def get_wikipedia_page_props(input_str: str):
|
|
| 21 |
Returns:
|
| 22 |
str: The QID or "NIL" if the QID is not found.
|
| 23 |
"""
|
| 24 |
-
# # Check if the result is already in the cache
|
| 25 |
-
# if input_str in cache:
|
| 26 |
-
# return cache[input_str]
|
| 27 |
-
|
| 28 |
try:
|
| 29 |
# Preprocess the input string
|
| 30 |
page_name, language = input_str.split(" >> ")
|
|
@@ -57,14 +53,11 @@ def get_wikipedia_page_props(input_str: str):
|
|
| 57 |
if "wikibase_item" in page_props:
|
| 58 |
return page_props["wikibase_item"]
|
| 59 |
else:
|
| 60 |
-
|
| 61 |
-
return qid # fallback_to_openrefine(page_name, language)
|
| 62 |
else:
|
| 63 |
-
return qid
|
| 64 |
-
|
| 65 |
except Exception as e:
|
| 66 |
-
|
| 67 |
-
return qid # fallback_to_openrefine(page_name, language)
|
| 68 |
|
| 69 |
|
| 70 |
def get_wikipedia_title(qid, language="en"):
|
|
@@ -89,7 +82,6 @@ def get_wikipedia_title(qid, language="en"):
|
|
| 89 |
|
| 90 |
|
| 91 |
def disambiguate_sentence(sentence):
|
| 92 |
-
entities = []
|
| 93 |
# Generate model outputs for the sentence
|
| 94 |
outputs = model.generate(
|
| 95 |
**tokenizer([sentence], return_tensors="pt"),
|
|
@@ -99,18 +91,25 @@ def disambiguate_sentence(sentence):
|
|
| 99 |
)
|
| 100 |
decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
| 101 |
print(f"Decoded: {decoded}")
|
| 102 |
-
wikipedia_name = decoded[0] # Assuming
|
| 103 |
qid = get_wikipedia_page_props(wikipedia_name)
|
| 104 |
-
print(f"
|
| 105 |
-
|
| 106 |
-
#
|
| 107 |
title, url = get_wikipedia_title(qid)
|
| 108 |
-
#
|
| 109 |
-
entity_info = f"QID: {qid}, Title: {title}, URL: {url}"
|
| 110 |
-
entities.append(entity_info)
|
| 111 |
|
| 112 |
-
|
| 113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
|
| 115 |
|
| 116 |
def nel_app_interface():
|
|
@@ -122,7 +121,7 @@ def nel_app_interface():
|
|
| 122 |
"entity should be surrounded by `[START]` and `[END]`. // "
|
| 123 |
"!Only one entity per sentence is supported at the moment!",
|
| 124 |
)
|
| 125 |
-
output_entities = gr.
|
| 126 |
|
| 127 |
# Interface definition
|
| 128 |
interface = gr.Interface(
|
|
|
|
| 21 |
Returns:
|
| 22 |
str: The QID or "NIL" if the QID is not found.
|
| 23 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
try:
|
| 25 |
# Preprocess the input string
|
| 26 |
page_name, language = input_str.split(" >> ")
|
|
|
|
| 53 |
if "wikibase_item" in page_props:
|
| 54 |
return page_props["wikibase_item"]
|
| 55 |
else:
|
| 56 |
+
return qid
|
|
|
|
| 57 |
else:
|
| 58 |
+
return qid
|
|
|
|
| 59 |
except Exception as e:
|
| 60 |
+
return qid
|
|
|
|
| 61 |
|
| 62 |
|
| 63 |
def get_wikipedia_title(qid, language="en"):
|
|
|
|
| 82 |
|
| 83 |
|
| 84 |
def disambiguate_sentence(sentence):
|
|
|
|
| 85 |
# Generate model outputs for the sentence
|
| 86 |
outputs = model.generate(
|
| 87 |
**tokenizer([sentence], return_tensors="pt"),
|
|
|
|
| 91 |
)
|
| 92 |
decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
| 93 |
print(f"Decoded: {decoded}")
|
| 94 |
+
wikipedia_name = decoded[0] # Assuming the entity name is in the output
|
| 95 |
qid = get_wikipedia_page_props(wikipedia_name)
|
| 96 |
+
print(f"QID: {qid}")
|
| 97 |
+
|
| 98 |
+
# Get Wikipedia title and URL
|
| 99 |
title, url = get_wikipedia_title(qid)
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
+
if qid == "NIL":
|
| 102 |
+
return "No entity found."
|
| 103 |
+
|
| 104 |
+
# Create an HTML output with a clickable link
|
| 105 |
+
entity_info = f"""
|
| 106 |
+
<div>
|
| 107 |
+
<strong>Entity:</strong> {title} <br>
|
| 108 |
+
<strong>QID:</strong> {qid} <br>
|
| 109 |
+
<a href="{url}" target="_blank">Wikipedia Page</a>
|
| 110 |
+
</div>
|
| 111 |
+
"""
|
| 112 |
+
return entity_info
|
| 113 |
|
| 114 |
|
| 115 |
def nel_app_interface():
|
|
|
|
| 121 |
"entity should be surrounded by `[START]` and `[END]`. // "
|
| 122 |
"!Only one entity per sentence is supported at the moment!",
|
| 123 |
)
|
| 124 |
+
output_entities = gr.HTML(label="Linked Entity")
|
| 125 |
|
| 126 |
# Interface definition
|
| 127 |
interface = gr.Interface(
|