Spaces:
Runtime error
Runtime error
Commit
·
cfe3fe5
1
Parent(s):
d56f03c
add comments
Browse files
app.py
CHANGED
|
@@ -22,28 +22,30 @@ texts = {"en": DEFAULT_TEXT, "ca": "Apple està buscant comprar una startup del
|
|
| 22 |
button_css = "float: right; --tw-border-opacity: 1; border-color: rgb(229 231 235 / var(--tw-border-opacity)); --tw-gradient-from: rgb(243 244 246 / 0.7); --tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to, rgb(243 244 246 / 0)); --tw-gradient-to: rgb(229 231 235 / 0.8); --tw-text-opacity: 1; color: rgb(55 65 81 / var(--tw-text-opacity)); border-width: 1px; --tw-bg-opacity: 1; background-color: rgb(255 255 255 / var(--tw-bg-opacity)); background-image: linear-gradient(to bottom right, var(--tw-gradient-stops)); display: inline-flex; flex: 1 1 0%; align-items: center; justify-content: center; --tw-shadow: 0 1px 2px 0 rgb(0 0 0 / 0.05); --tw-shadow-colored: 0 1px 2px 0 var(--tw-shadow-color); box-shadow: var(--tw-ring-offset-shadow, 0 0 #0000), var(--tw-ring-shadow, 0 0 #0000), var(--tw-shadow); -webkit-appearance: button; border-radius: 0.5rem; padding-top: 0.5rem; padding-bottom: 0.5rem; padding-left: 1rem; padding-right: 1rem; font-size: 1rem; line-height: 1.5rem; font-weight: 600;"
|
| 23 |
NOUN_ATTR = ['text', 'root.text', 'root.dep_', 'root.head.text']
|
| 24 |
|
|
|
|
| 25 |
def get_all_models():
|
| 26 |
with open("requirements.txt") as f:
|
| 27 |
content = f.readlines()
|
| 28 |
models = []
|
| 29 |
for line in content:
|
| 30 |
if "huggingface.co" in line:
|
|
|
|
| 31 |
model = "_".join(line.split("/")[4].split("_")[:3])
|
| 32 |
if model not in models:
|
| 33 |
models.append(model)
|
| 34 |
return models
|
| 35 |
|
| 36 |
-
|
| 37 |
models = get_all_models()
|
| 38 |
|
| 39 |
-
|
| 40 |
def download_svg(svg):
|
| 41 |
encode = base64.b64encode(bytes(svg, 'utf-8'))
|
| 42 |
img = 'data:image/svg+xml;base64,' + str(encode)[2:-1]
|
| 43 |
html = f'<a download="displacy.svg" href="{img}" style="{button_css}">Download as SVG</a>'
|
| 44 |
return html
|
| 45 |
|
| 46 |
-
|
|
|
|
| 47 |
def dependency(text, col_punct, col_phrase, compact, bg, font, model):
|
| 48 |
model_name = model + "_sm"
|
| 49 |
nlp = spacy.load(model_name)
|
|
@@ -51,10 +53,10 @@ def dependency(text, col_punct, col_phrase, compact, bg, font, model):
|
|
| 51 |
options = {"compact": compact, "collapse_phrases": col_phrase,
|
| 52 |
"collapse_punct": col_punct, "bg": bg, "color": font}
|
| 53 |
svg = displacy.render(doc, style="dep", options=options)
|
| 54 |
-
download = download_svg(svg)
|
| 55 |
return svg, download, model_name
|
| 56 |
|
| 57 |
-
|
| 58 |
def entity(text, ents, model):
|
| 59 |
model_name = model + "_sm"
|
| 60 |
nlp = spacy.load(model_name)
|
|
@@ -63,7 +65,7 @@ def entity(text, ents, model):
|
|
| 63 |
svg = displacy.render(doc, style="ent", options=options)
|
| 64 |
return svg, model_name
|
| 65 |
|
| 66 |
-
|
| 67 |
def token(text, attributes, model):
|
| 68 |
model_name = model + "_sm"
|
| 69 |
nlp = spacy.load(model_name)
|
|
@@ -77,7 +79,8 @@ def token(text, attributes, model):
|
|
| 77 |
data = pd.DataFrame(data, columns=attributes)
|
| 78 |
return data, model_name
|
| 79 |
|
| 80 |
-
|
|
|
|
| 81 |
def default_token(text, attributes, model):
|
| 82 |
model_name = model + "_sm"
|
| 83 |
nlp = spacy.load(model_name)
|
|
@@ -90,7 +93,7 @@ def default_token(text, attributes, model):
|
|
| 90 |
data.append(tok_data)
|
| 91 |
return data, model_name
|
| 92 |
|
| 93 |
-
|
| 94 |
def noun_chunks(text, model):
|
| 95 |
model_name = model + "_sm"
|
| 96 |
nlp = spacy.load(model_name)
|
|
@@ -102,7 +105,8 @@ def noun_chunks(text, model):
|
|
| 102 |
data = pd.DataFrame(data, columns=NOUN_ATTR)
|
| 103 |
return data, model_name
|
| 104 |
|
| 105 |
-
|
|
|
|
| 106 |
def default_noun_chunks(text, model):
|
| 107 |
model_name = model + "_sm"
|
| 108 |
nlp = spacy.load(model_name)
|
|
@@ -113,7 +117,7 @@ def default_noun_chunks(text, model):
|
|
| 113 |
chunk.root.head.text])
|
| 114 |
return data, model_name
|
| 115 |
|
| 116 |
-
|
| 117 |
def random_vectors(text, model):
|
| 118 |
model_name = model + "_md"
|
| 119 |
nlp = spacy.load(model_name)
|
|
@@ -125,13 +129,13 @@ def random_vectors(text, model):
|
|
| 125 |
choice = random.choices(str_list, k=2)
|
| 126 |
return round(choice[0].similarity(choice[1]), 2), choice[0].text, choice[1].text, model_name
|
| 127 |
|
| 128 |
-
|
| 129 |
def vectors(input1, input2, model):
|
| 130 |
model_name = model + "_md"
|
| 131 |
nlp = spacy.load(model_name)
|
| 132 |
return round(nlp(input1).similarity(nlp(input2)), 2), model_name
|
| 133 |
|
| 134 |
-
|
| 135 |
def span(text, span1, span2, label1, label2, model):
|
| 136 |
model_name = model + "_sm"
|
| 137 |
nlp = spacy.load(model_name)
|
|
@@ -174,20 +178,17 @@ def span(text, span1, span2, label1, label2, model):
|
|
| 174 |
svg = displacy.render(doc, style="span")
|
| 175 |
return svg, model_name
|
| 176 |
|
| 177 |
-
|
| 178 |
def get_text(model):
|
| 179 |
for i in range(len(models)):
|
| 180 |
model = model.split("_")[0]
|
| 181 |
new_text = texts[model]
|
| 182 |
-
|
| 183 |
return new_text
|
| 184 |
|
| 185 |
-
|
| 186 |
demo = gr.Blocks(css="scrollbar.css")
|
| 187 |
|
| 188 |
with demo:
|
| 189 |
with gr.Box():
|
| 190 |
-
|
| 191 |
with gr.Row():
|
| 192 |
with gr.Column():
|
| 193 |
gr.Markdown("# Pipeline Visualizer")
|
|
@@ -208,14 +209,12 @@ with demo:
|
|
| 208 |
gr.Markdown("")
|
| 209 |
with gr.Column():
|
| 210 |
gr.Markdown("")
|
| 211 |
-
|
| 212 |
with gr.Row():
|
| 213 |
with gr.Column():
|
| 214 |
text_input = gr.Textbox(
|
| 215 |
value=DEFAULT_TEXT, interactive=True, label="Input Text")
|
| 216 |
with gr.Column():
|
| 217 |
gr.Markdown("")
|
| 218 |
-
|
| 219 |
button = gr.Button("Update", variant="primary")
|
| 220 |
with gr.Box():
|
| 221 |
with gr.Column():
|
|
@@ -349,7 +348,7 @@ with demo:
|
|
| 349 |
with gr.Column():
|
| 350 |
gr.Markdown("")
|
| 351 |
sim_random_button = gr.Button("Update random words")
|
| 352 |
-
sim_button = gr.Button("Update similarity", variant="primary")
|
| 353 |
with gr.Box():
|
| 354 |
with gr.Column():
|
| 355 |
with gr.Row():
|
|
@@ -391,7 +390,10 @@ with demo:
|
|
| 391 |
DEFAULT_TEXT, "U.K. startup", "U.K.", "ORG", "GPE", DEFAULT_MODEL)[0])
|
| 392 |
span_button = gr.Button("Update Spans", variant="primary")
|
| 393 |
|
|
|
|
| 394 |
model_input.change(get_text, inputs=[model_input], outputs=text_input)
|
|
|
|
|
|
|
| 395 |
button.click(dependency, inputs=[
|
| 396 |
text_input, col_punct, col_phrase, compact, bg, text, model_input], outputs=[dep_output, dep_download_button, dep_model])
|
| 397 |
button.click(
|
|
@@ -404,6 +406,8 @@ with demo:
|
|
| 404 |
sim_text2, model_input], outputs=[sim_output, sim_model])
|
| 405 |
button.click(
|
| 406 |
span, inputs=[text_input, span1, span2, label1, label2, model_input], outputs=[span_output, span_model])
|
|
|
|
|
|
|
| 407 |
dep_button.click(dependency, inputs=[
|
| 408 |
text_input, col_punct, col_phrase, compact, bg, text, model_input], outputs=[dep_output, dep_download_button, dep_model])
|
| 409 |
ent_button.click(
|
|
@@ -418,4 +422,5 @@ with demo:
|
|
| 418 |
span, inputs=[text_input, span1, span2, label1, label2, model_input], outputs=[span_output, span_model])
|
| 419 |
sim_random_button.click(random_vectors, inputs=[text_input, model_input], outputs=[
|
| 420 |
sim_output, sim_text1, sim_text2, sim_model])
|
|
|
|
| 421 |
demo.launch()
|
|
|
|
| 22 |
button_css = "float: right; --tw-border-opacity: 1; border-color: rgb(229 231 235 / var(--tw-border-opacity)); --tw-gradient-from: rgb(243 244 246 / 0.7); --tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to, rgb(243 244 246 / 0)); --tw-gradient-to: rgb(229 231 235 / 0.8); --tw-text-opacity: 1; color: rgb(55 65 81 / var(--tw-text-opacity)); border-width: 1px; --tw-bg-opacity: 1; background-color: rgb(255 255 255 / var(--tw-bg-opacity)); background-image: linear-gradient(to bottom right, var(--tw-gradient-stops)); display: inline-flex; flex: 1 1 0%; align-items: center; justify-content: center; --tw-shadow: 0 1px 2px 0 rgb(0 0 0 / 0.05); --tw-shadow-colored: 0 1px 2px 0 var(--tw-shadow-color); box-shadow: var(--tw-ring-offset-shadow, 0 0 #0000), var(--tw-ring-shadow, 0 0 #0000), var(--tw-shadow); -webkit-appearance: button; border-radius: 0.5rem; padding-top: 0.5rem; padding-bottom: 0.5rem; padding-left: 1rem; padding-right: 1rem; font-size: 1rem; line-height: 1.5rem; font-weight: 600;"
|
| 23 |
NOUN_ATTR = ['text', 'root.text', 'root.dep_', 'root.head.text']
|
| 24 |
|
| 25 |
+
# get the huggingface models specified in the requirements.txt file
|
| 26 |
def get_all_models():
|
| 27 |
with open("requirements.txt") as f:
|
| 28 |
content = f.readlines()
|
| 29 |
models = []
|
| 30 |
for line in content:
|
| 31 |
if "huggingface.co" in line:
|
| 32 |
+
# the first three tokens in model, ex. en_core_web
|
| 33 |
model = "_".join(line.split("/")[4].split("_")[:3])
|
| 34 |
if model not in models:
|
| 35 |
models.append(model)
|
| 36 |
return models
|
| 37 |
|
|
|
|
| 38 |
models = get_all_models()
|
| 39 |
|
| 40 |
+
# when clicked, download as SVG. Rendered as HTML on the page
|
| 41 |
def download_svg(svg):
|
| 42 |
encode = base64.b64encode(bytes(svg, 'utf-8'))
|
| 43 |
img = 'data:image/svg+xml;base64,' + str(encode)[2:-1]
|
| 44 |
html = f'<a download="displacy.svg" href="{img}" style="{button_css}">Download as SVG</a>'
|
| 45 |
return html
|
| 46 |
|
| 47 |
+
# create dependency graph, inputs are text, collapse punctuation,
|
| 48 |
+
# collapse phrases, compact, background color, font color, and model
|
| 49 |
def dependency(text, col_punct, col_phrase, compact, bg, font, model):
|
| 50 |
model_name = model + "_sm"
|
| 51 |
nlp = spacy.load(model_name)
|
|
|
|
| 53 |
options = {"compact": compact, "collapse_phrases": col_phrase,
|
| 54 |
"collapse_punct": col_punct, "bg": bg, "color": font}
|
| 55 |
svg = displacy.render(doc, style="dep", options=options)
|
| 56 |
+
download = download_svg(svg) # download button for SVG
|
| 57 |
return svg, download, model_name
|
| 58 |
|
| 59 |
+
# returns the NER displacy, inputs are text, checked ents, and model
|
| 60 |
def entity(text, ents, model):
|
| 61 |
model_name = model + "_sm"
|
| 62 |
nlp = spacy.load(model_name)
|
|
|
|
| 65 |
svg = displacy.render(doc, style="ent", options=options)
|
| 66 |
return svg, model_name
|
| 67 |
|
| 68 |
+
# returns token attributes for the user inputs
|
| 69 |
def token(text, attributes, model):
|
| 70 |
model_name = model + "_sm"
|
| 71 |
nlp = spacy.load(model_name)
|
|
|
|
| 79 |
data = pd.DataFrame(data, columns=attributes)
|
| 80 |
return data, model_name
|
| 81 |
|
| 82 |
+
# returns token attributtes in the default state
|
| 83 |
+
# the return value is not a pandas DataFrame
|
| 84 |
def default_token(text, attributes, model):
|
| 85 |
model_name = model + "_sm"
|
| 86 |
nlp = spacy.load(model_name)
|
|
|
|
| 93 |
data.append(tok_data)
|
| 94 |
return data, model_name
|
| 95 |
|
| 96 |
+
# returns noun chunks in text
|
| 97 |
def noun_chunks(text, model):
|
| 98 |
model_name = model + "_sm"
|
| 99 |
nlp = spacy.load(model_name)
|
|
|
|
| 105 |
data = pd.DataFrame(data, columns=NOUN_ATTR)
|
| 106 |
return data, model_name
|
| 107 |
|
| 108 |
+
# returns noun chuncks for the default value
|
| 109 |
+
# the return value is not a pandas DataFrame
|
| 110 |
def default_noun_chunks(text, model):
|
| 111 |
model_name = model + "_sm"
|
| 112 |
nlp = spacy.load(model_name)
|
|
|
|
| 117 |
chunk.root.head.text])
|
| 118 |
return data, model_name
|
| 119 |
|
| 120 |
+
# Get similarity of two random generated vectors
|
| 121 |
def random_vectors(text, model):
|
| 122 |
model_name = model + "_md"
|
| 123 |
nlp = spacy.load(model_name)
|
|
|
|
| 129 |
choice = random.choices(str_list, k=2)
|
| 130 |
return round(choice[0].similarity(choice[1]), 2), choice[0].text, choice[1].text, model_name
|
| 131 |
|
| 132 |
+
# Get similarity of two inputted vectors
|
| 133 |
def vectors(input1, input2, model):
|
| 134 |
model_name = model + "_md"
|
| 135 |
nlp = spacy.load(model_name)
|
| 136 |
return round(nlp(input1).similarity(nlp(input2)), 2), model_name
|
| 137 |
|
| 138 |
+
# display spans, inputs are text, spans, labels, and model
|
| 139 |
def span(text, span1, span2, label1, label2, model):
|
| 140 |
model_name = model + "_sm"
|
| 141 |
nlp = spacy.load(model_name)
|
|
|
|
| 178 |
svg = displacy.render(doc, style="span")
|
| 179 |
return svg, model_name
|
| 180 |
|
| 181 |
+
# get default text based on language model
|
| 182 |
def get_text(model):
|
| 183 |
for i in range(len(models)):
|
| 184 |
model = model.split("_")[0]
|
| 185 |
new_text = texts[model]
|
|
|
|
| 186 |
return new_text
|
| 187 |
|
|
|
|
| 188 |
demo = gr.Blocks(css="scrollbar.css")
|
| 189 |
|
| 190 |
with demo:
|
| 191 |
with gr.Box():
|
|
|
|
| 192 |
with gr.Row():
|
| 193 |
with gr.Column():
|
| 194 |
gr.Markdown("# Pipeline Visualizer")
|
|
|
|
| 209 |
gr.Markdown("")
|
| 210 |
with gr.Column():
|
| 211 |
gr.Markdown("")
|
|
|
|
| 212 |
with gr.Row():
|
| 213 |
with gr.Column():
|
| 214 |
text_input = gr.Textbox(
|
| 215 |
value=DEFAULT_TEXT, interactive=True, label="Input Text")
|
| 216 |
with gr.Column():
|
| 217 |
gr.Markdown("")
|
|
|
|
| 218 |
button = gr.Button("Update", variant="primary")
|
| 219 |
with gr.Box():
|
| 220 |
with gr.Column():
|
|
|
|
| 348 |
with gr.Column():
|
| 349 |
gr.Markdown("")
|
| 350 |
sim_random_button = gr.Button("Update random words")
|
| 351 |
+
sim_button = gr.Button("Update similarity", variant="primary")
|
| 352 |
with gr.Box():
|
| 353 |
with gr.Column():
|
| 354 |
with gr.Row():
|
|
|
|
| 390 |
DEFAULT_TEXT, "U.K. startup", "U.K.", "ORG", "GPE", DEFAULT_MODEL)[0])
|
| 391 |
span_button = gr.Button("Update Spans", variant="primary")
|
| 392 |
|
| 393 |
+
# change text based on model input
|
| 394 |
model_input.change(get_text, inputs=[model_input], outputs=text_input)
|
| 395 |
+
|
| 396 |
+
# main button - update all components
|
| 397 |
button.click(dependency, inputs=[
|
| 398 |
text_input, col_punct, col_phrase, compact, bg, text, model_input], outputs=[dep_output, dep_download_button, dep_model])
|
| 399 |
button.click(
|
|
|
|
| 406 |
sim_text2, model_input], outputs=[sim_output, sim_model])
|
| 407 |
button.click(
|
| 408 |
span, inputs=[text_input, span1, span2, label1, label2, model_input], outputs=[span_output, span_model])
|
| 409 |
+
|
| 410 |
+
# individual component buttons
|
| 411 |
dep_button.click(dependency, inputs=[
|
| 412 |
text_input, col_punct, col_phrase, compact, bg, text, model_input], outputs=[dep_output, dep_download_button, dep_model])
|
| 413 |
ent_button.click(
|
|
|
|
| 422 |
span, inputs=[text_input, span1, span2, label1, label2, model_input], outputs=[span_output, span_model])
|
| 423 |
sim_random_button.click(random_vectors, inputs=[text_input, model_input], outputs=[
|
| 424 |
sim_output, sim_text1, sim_text2, sim_model])
|
| 425 |
+
|
| 426 |
demo.launch()
|