Spaces:

huggingface
/

speech-bench-metrics-editor

Runtime error

App Files Files Community

speech-test commited on Apr 14, 2022

Commit

582e085

1 Parent(s): 47e279a

Metrics editor

Browse files

Files changed (2) hide show

app.py +118 -10
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import requests
 import streamlit as st
 import yaml
 from huggingface_hub import hf_hub_download
 from streamlit_tags import st_tags
 # exact same regex as in the Hub server. Please keep in sync.
@@ -93,7 +94,7 @@ def main():
         "[our list here](https://huggingface.co/spaces/huggingface/hf-speech-bench/blob/main/languages.json). "
         "When in doubt, use the most generic language code, e.g. `en` instead of `en-GB` and `en-US`."
     )
-    st.markdown("*Example*: `cs, hsb, pl`")
     metadata["language"] = metadata["language"] if "language" in metadata else []
     metadata["language"] = (
         metadata["language"]
@@ -107,13 +108,19 @@ def main():
     lang_names = [lang2name[lang] if lang in lang2name else lang for lang in languages]
     st.markdown("These languages will be parsed by the leaderboard as: ")
     st.code(", ".join(lang_names))
     ############################
     # TRAIN DATASETS
     ############################
     st.markdown("### Training dataset(s)")
-    st.markdown("List the datasets that your model was trained on.")
-    st.markdown("*Example*: `librispeech_asr, mozilla-foundation/common_voice_8_0`")
     if "datasets" not in metadata:
         metadata["datasets"] = []
@@ -126,6 +133,7 @@ def main():
             "WARNING: `common_voice` is deprecated, please replace it with its equivalent: "
             "`mozilla-foundation/common_voice_6_1`"
         )
     ############################
     # MODEL NAME
@@ -134,16 +142,116 @@ def main():
     st.markdown("Enter a descriptive name for your model.")
     st.markdown("*Example*: `XLS-R Wav2Vec2 LM Spanish by Jane Doe`")
-    if "model_index" not in metadata:
-        metadata["model_index"] = [{}]
-    if "name" not in ["model_index"][0]:
-        metadata["model_index"][0]["name"] = model_id.split("/")[-1]
-    model_name = st.text_input("", value=metadata["model_index"][0]["name"])
     ############################
-    # EVAL DATASETS
     ############################
-    st.markdown("### Evaluation metrics")
 if __name__ == "__main__":

 import streamlit as st
 import yaml
 from huggingface_hub import hf_hub_download
+from streamlit_ace import st_ace
 from streamlit_tags import st_tags
 # exact same regex as in the Hub server. Please keep in sync.
         "[our list here](https://huggingface.co/spaces/huggingface/hf-speech-bench/blob/main/languages.json). "
         "When in doubt, use the most generic language code, e.g. `en` instead of `en-GB` and `en-US`."
     )
+    st.markdown("*Example*: `en, gsw, pt-BR`")
     metadata["language"] = metadata["language"] if "language" in metadata else []
     metadata["language"] = (
         metadata["language"]
     lang_names = [lang2name[lang] if lang in lang2name else lang for lang in languages]
     st.markdown("These languages will be parsed by the leaderboard as: ")
     st.code(", ".join(lang_names))
+    metadata["language"] = languages
     ############################
     # TRAIN DATASETS
     ############################
     st.markdown("### Training dataset(s)")
+    st.markdown(
+        "List the datasets that your model was **trained** on. "
+        "If the datasets aren't published on the Hub yet, just add their names anyway."
+    )
+    st.markdown(
+        "*Example*: `librispeech_asr, mozilla-foundation/common_voice_8_0, my_custom_youtube_dataset`"
+    )
     if "datasets" not in metadata:
         metadata["datasets"] = []
             "WARNING: `common_voice` is deprecated, please replace it with its equivalent: "
             "`mozilla-foundation/common_voice_6_1`"
         )
+    metadata["datasets"] = train_datasets
     ############################
     # MODEL NAME
     st.markdown("Enter a descriptive name for your model.")
     st.markdown("*Example*: `XLS-R Wav2Vec2 LM Spanish by Jane Doe`")
+    if "model-index" not in metadata:
+        metadata["model-index"] = [{}]
+    if "name" not in ["model-index"][0]:
+        metadata["model-index"][0]["name"] = model_id.split("/")[-1]
+    model_name = st.text_input("", value=metadata["model-index"][0]["name"])
+    metadata["model-index"][0]["name"] = model_name
     ############################
+    # EVAL RESULTS
     ############################
+    st.markdown("### Evaluation results")
+    st.markdown("To edit the metrics, you can either use the YAML editor below, or add new metrics using the handy "
+                "form under it.")
+    if "results" not in metadata["model-index"][0]:
+        metadata["model-index"][0]["results"] = []
+    results_editor = st.empty()
+    with results_editor:
+        results_yaml = yaml.dump(
+            metadata["model-index"][0]["results"], sort_keys=False, line_break="\n"
+        )
+        results_yaml = st_ace(value=results_yaml, language="yaml")
+        metadata["model-index"][0]["results"] = try_parse_yaml(results_yaml)
+    with st.form(key="eval_form"):
+        dataset_name = st.text_input(
+            label="Full name of the dataset", placeholder="Common Voice 8.0"
+        )
+        dataset_path = st.text_input(
+            label="Dataset path / id", placeholder="mozilla-foundation/common_voice_8_0"
+        )
+        dataset_config = st.text_input(
+            label="Dataset config (language). Examples: en, pt-BR, clean",
+            placeholder="en",
+        )
+        metric_name = st.text_input(label="Metric name", placeholder="Test WER (+LM)")
+        metric2name = {"wer": "Word Error Rate", "cer": "Character Error Rate"}
+        metric_type = st.selectbox(
+            label="Metric",
+            options=["wer", "cer"],
+            format_func=lambda key: metric2name[key],
+        )
+        metric_value = st.text_input(
+            label="Metric value (0.0 - 100.0)",
+            placeholder="12.34",
+        )
+        try:
+            metric_value = float(metric_value)
+        except ValueError:
+            st.error(f"Couldn't parse `{metric_value}`. Make sure it's a number from 0.0 to 100.0")
+        submitted = st.form_submit_button("Submit")
+        if submitted:
+            metric = {
+                "name": metric_name,
+                "type": metric_type,
+                "value": metric_value,
+            }
+            # first, try to find an existing dataset+config record to add a new metric to it
+            updated_existing = False
+            for existing_result in metadata["model-index"][0]["results"]:
+                existing_dataset = existing_result["dataset"]
+                if (
+                    existing_dataset["type"] == dataset_path
+                    and existing_dataset["args"] == dataset_config
+                ):
+                    if "metrics" not in existing_result:
+                        existing_result["metrics"] = []
+                    existing_result["metrics"].append(metric)
+                    updated_existing = True
+                    break
+            # if no dataset+config results found, create a new one
+            if not updated_existing:
+                result = {
+                    "task": {
+                        "name": "Automatic Speech Recognition",
+                        "type": "automatic-speech-recognition",
+                    },
+                    "dataset": {
+                        "name": dataset_name,
+                        "type": dataset_path,
+                        "args": dataset_config,
+                    },
+                    "metrics": [metric],
+                }
+                metadata["model-index"][0]["results"].append(result)
+            # update the code editor
+            with results_editor:
+                results_yaml = yaml.dump(
+                    metadata["model-index"][0]["results"],
+                    sort_keys=False,
+                    line_break="\n",
+                )
+                results_yaml = st_ace(value=results_yaml, language="yaml")
+                metadata["model-index"][0]["results"] = try_parse_yaml(results_yaml)
+            st.success(f"Added the metric for {dataset_path} - {dataset_config}! "
+                       f"Check the result in the YAML editor above.")
+    ############################
+    # FINAL YAML
+    ############################
+    st.markdown("## 3. Copy the generated metadata")
+    st.markdown(
+        "Copy the YAML from below and replace the metadata at the top of your model's README.md here: "
+        f"https://huggingface.co/{model_id}/blob/main/README.md"
+    )
+    new_yaml = yaml.dump(metadata, sort_keys=False, line_break="\n")
+    st.markdown(f"```yaml\n---\n{new_yaml}---\n```")
 if __name__ == "__main__":

requirements.txt CHANGED Viewed

	@@ -1 +1,2 @@
1	- streamlit-tags


1	+ streamlit-tags
2	+ streamlit-ace