Spaces:
Runtime error
Runtime error
| import os | |
| from pathlib import Path | |
| import pandas as pd | |
| import streamlit as st | |
| import utils as ut | |
| st.set_page_config(layout="wide") | |
| st.markdown("# Elo Rating of Models") | |
| st.markdown( | |
| """This app shows the Elo rating of models on the H4 Hub based on their performance on the H4 eval dataset. """) | |
| st.markdown( | |
| """**Notes** | |
| * This is currently using synthetic data | |
| * You can tweak the number of tasks, models, and human rating per task to generate different datasets | |
| """ | |
| ) | |
| # user input | |
| num_tasks = st.number_input("Number of tasks", min_value=1, max_value=5000, value=100) | |
| num_models = st.number_input("Number of models", min_value=1, max_value=100, value=4) | |
| num_human_ratings = st.number_input( | |
| "Number of human ratings per task", min_value=1, max_value=10, value=3 | |
| ) | |
| button = st.button("Show me the leaderboard!") | |
| if button is True: | |
| # generate synthetic data | |
| df = ut.create_synthetic_data( n_tasks=num_tasks, n_models=num_models, n_ratings=num_human_ratings) | |
| # calculate elo rating | |
| elo_df = ut.calculate_elo_rating(df) | |
| # show leaderboard | |
| ut.display_leaderboard(elo_df) | |