Spaces:
Runtime error
Runtime error
| import json | |
| import re | |
| from pathlib import Path | |
| import requests | |
| import streamlit as st | |
| import yaml | |
| from huggingface_hub import hf_hub_download | |
| from streamlit_tags import st_tags | |
| # exact same regex as in the Hub server. Please keep in sync. | |
| REGEX_YAML_BLOCK = re.compile(r"---[\n\r]+([\S\s]*?)[\n\r]+---[\n\r]") | |
| with open("languages.json") as f: | |
| lang2name = json.load(f) | |
| def try_parse_yaml(yaml_block): | |
| try: | |
| metadata = yaml.load(yaml_block, yaml.SafeLoader) | |
| except yaml.YAMLError as e: | |
| print("Error while parsing the metadata YAML:") | |
| if hasattr(e, "problem_mark"): | |
| if e.context is not None: | |
| st.error( | |
| str(e.problem_mark) | |
| + "\n " | |
| + str(e.problem) | |
| + " " | |
| + str(e.context) | |
| + "\nPlease correct the README.md and retry." | |
| ) | |
| else: | |
| st.error( | |
| str(e.problem_mark) | |
| + "\n " | |
| + str(e.problem) | |
| + "\nPlease correct the README.md and retry." | |
| ) | |
| else: | |
| st.error( | |
| "Something went wrong while parsing the metadata. " | |
| "Make sure it's written according to the YAML spec!" | |
| ) | |
| return None | |
| return metadata | |
| def main(): | |
| st.markdown("## 1. Load your model's metadata") | |
| st.markdown("Enter your model's path below.") | |
| model_id = st.text_input("", placeholder="<username>/<model>") | |
| if not model_id.strip(): | |
| st.stop() | |
| try: | |
| readme_path = hf_hub_download(model_id, filename="README.md") | |
| except requests.exceptions.HTTPError: | |
| st.error( | |
| f"ERROR: https://huggingface.co/{model_id}/blob/main/README.md " | |
| f"not found, make sure you've entered a correct model path!" | |
| ) | |
| st.stop() | |
| content = Path(readme_path).read_text() | |
| match = REGEX_YAML_BLOCK.search(content) | |
| if match: | |
| meta_yaml = match.group(1) | |
| else: | |
| st.error( | |
| "ERROR: Couldn't find the metadata section inside your model's `README.md`. Do you have some basic metadata " | |
| "enclosed in `---` as described in [the Hub documentation](https://huggingface.co/docs/hub/model-repos#model-card-metadata)?" | |
| ) | |
| st.stop() | |
| metadata = try_parse_yaml(meta_yaml) | |
| if metadata is None: | |
| st.stop() | |
| else: | |
| st.success("Successfully loaded the metadata!") | |
| with st.expander("Inspect the parsed metadata for debugging"): | |
| st.json(metadata) | |
| st.markdown("## 2. Edit the data") | |
| ############################ | |
| # LANGUAGES | |
| ############################ | |
| st.markdown("### Language(s)") | |
| st.markdown( | |
| "For each spoken language that your model handles, enter an " | |
| "[ISO 639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) language code, or " | |
| "find an appropriate alternative from " | |
| "[our list here](https://huggingface.co/spaces/huggingface/hf-speech-bench/blob/main/languages.json). " | |
| "When in doubt, use the most generic language code, e.g. `en` instead of `en-GB` and `en-US`." | |
| ) | |
| st.markdown("*Example*: `cs, hsb, pl`") | |
| metadata["language"] = metadata["language"] if "language" in metadata else [] | |
| metadata["language"] = ( | |
| metadata["language"] | |
| if isinstance(metadata["language"], list) | |
| else [metadata["language"]] | |
| ) | |
| languages = st_tags( | |
| label="", text="add more if needed, and press enter", value=metadata["language"] | |
| ) | |
| lang_names = [lang2name[lang] if lang in lang2name else lang for lang in languages] | |
| st.markdown("These languages will be parsed by the leaderboard as: ") | |
| st.code(", ".join(lang_names)) | |
| ############################ | |
| # TRAIN DATASETS | |
| ############################ | |
| st.markdown("### Training dataset(s)") | |
| st.markdown("List the datasets that your model was trained on.") | |
| st.markdown("*Example*: `librispeech_asr, mozilla-foundation/common_voice_8_0`") | |
| if "datasets" not in metadata: | |
| metadata["datasets"] = [] | |
| train_datasets = st_tags( | |
| label="", text="add more if needed, and press enter", value=metadata["datasets"] | |
| ) | |
| if "common_voice" in train_datasets: | |
| st.warning( | |
| "WARNING: `common_voice` is deprecated, please replace it with its equivalent: " | |
| "`mozilla-foundation/common_voice_6_1`" | |
| ) | |
| ############################ | |
| # MODEL NAME | |
| ############################ | |
| st.markdown("### Model name") | |
| st.markdown("Enter a descriptive name for your model.") | |
| st.markdown("*Example*: `XLS-R Wav2Vec2 LM Spanish by Jane Doe`") | |
| if "model_index" not in metadata: | |
| metadata["model_index"] = [{}] | |
| if "name" not in ["model_index"][0]: | |
| metadata["model_index"][0]["name"] = model_id.split("/")[-1] | |
| model_name = st.text_input("", value=metadata["model_index"][0]["name"]) | |
| ############################ | |
| # EVAL DATASETS | |
| ############################ | |
| st.markdown("### Evaluation metrics") | |
| if __name__ == "__main__": | |
| main() | |