Spaces:

safe-challenge
/

video-challenge-leaderboard

Running

App Files Files Community

Gabe Mancino-Ball commited on Oct 10

Commit

9b9ead9

1 Parent(s): f48dc65

Updates

Browse files

Files changed (2) hide show

app.py +39 -8
utils.py +14 -0

app.py CHANGED Viewed

@@ -33,6 +33,16 @@ def get_max_score(group: pd.DataFrame, metric: str, use_selection: bool = True)
     return group.loc[max_idx]
 @st.cache_data
 def load_results(task_key, best_only, metric="balanced_accuracy"):
     to_return = {}
@@ -41,16 +51,37 @@ def load_results(task_key, best_only, metric="balanced_accuracy"):
             file_path = f"{results_path}/{task_key}_{score}_{split}_score.csv"
             if os.path.exists(file_path):
                 df = pd.read_csv(file_path)
                 if not best_only:
                     to_return[f"{split}_{score}_score"] = df
                 else:
-                    df = df.sort_values(["team", metric], ascending=False).reset_index(drop=True)
-                    df = (
-                        df.groupby("team", group_keys=False)
-                        .apply(get_max_score, metric=metric, use_selection=False if split == "public" else True)
-                        .sort_values([metric], ascending=False)
-                        .set_index("team")
-                    )
                     to_return[f"{split}_{score}_score"] = df
     return to_return
@@ -305,7 +336,7 @@ def show_dataframe_w_format(df, format="compact", top_n=None):
         # Calculate the mean of top n values for each column
         top_n_means = {}
         for col in df.columns:
-            sorted_values = df[col].sort_values(ascending=False)
             # Ensure we don't try to take more values than available
             actual_n = min(top_n, len(sorted_values))
             if actual_n > 0:

     return group.loc[max_idx]
+def select_rows(df, metric: str = "balanced_accuracy"):
+    def select(group):
+        if group["selected"].any():
+            return group[group["selected"]].loc[group[group["selected"]][metric].idxmax()]
+        else:
+            return group.loc[group[f"{metric}_public"].idxmax()]
+    return df.groupby("team", group_keys=False).apply(select)
 @st.cache_data
 def load_results(task_key, best_only, metric="balanced_accuracy"):
     to_return = {}
             file_path = f"{results_path}/{task_key}_{score}_{split}_score.csv"
             if os.path.exists(file_path):
                 df = pd.read_csv(file_path)
+                public_df = pd.read_csv(f"{results_path}/{task_key}_{score}_public_score.csv")
                 if not best_only:
                     to_return[f"{split}_{score}_score"] = df
                 else:
+                    if split == "public":
+                        df = df.sort_values(["team", metric], ascending=False).reset_index(drop=True)
+                        selected_max = (
+                            df.copy()
+                            .groupby("team", group_keys=False)
+                            .apply(get_max_score, metric=metric, use_selection=True)
+                            .sort_values([metric], ascending=False)
+                            .set_index("team")
+                        )
+                        df = (
+                            df.copy()
+                            .groupby("team", group_keys=False)
+                            .apply(get_max_score, metric=metric, use_selection=False)
+                            .sort_values([metric], ascending=False)
+                            .set_index("team")
+                        )
+                        print((df["balanced_accuracy"] - selected_max["balanced_accuracy"]))
+                    else:
+                        public_df = (
+                            public_df.sort_values(["team", metric], ascending=False)
+                            .reset_index(drop=True)
+                            .set_index("submission_id")["balanced_accuracy"]
+                        )
+                        tmp = df.set_index("submission_id")
+                        tmp = tmp.join(public_df, on=["submission_id"], rsuffix="_public")
+                        df = select_rows(tmp)
+                        df = df.sort_values([metric], ascending=False).set_index("team")
                     to_return[f"{split}_{score}_score"] = df
     return to_return
         # Calculate the mean of top n values for each column
         top_n_means = {}
         for col in df.columns:
+            sorted_values = df[col]  # .sort_values(ascending=False)
             # Ensure we don't try to take more values than available
             actual_n = min(top_n, len(sorted_values))
             if actual_n > 0:

utils.py CHANGED Viewed

@@ -30,6 +30,7 @@ STATUS_MAP = {0: "PENDING", 1: "QUEUED", 2: "PROCESSING", 3: "SUCCESS", 4: "FAIL
 ## Make a directory to store computed results
 os.makedirs(Path("competition_cache") / "cached_results", exist_ok=True)
 def load_teams(competition_space_path: Path) -> pd.DataFrame:
@@ -218,6 +219,17 @@ def create_custom_subs():
         )
 if __name__ == "__main__":
     ## Download data
@@ -513,6 +525,8 @@ if __name__ == "__main__":
                 / f"{str(local_dir).split('/')[-1]}_{score_name}_private_only_score.csv",
                 index=False,
             )
             rocs.to_csv(
                 Path("competition_cache") / "cached_results" / f"{str(local_dir).split('/')[-1]}_{score_name}_rocs.csv",

 ## Make a directory to store computed results
 os.makedirs(Path("competition_cache") / "cached_results", exist_ok=True)
+os.makedirs(Path("competition_cache") / "cached_results" / "by_team", exist_ok=True)
 def load_teams(competition_space_path: Path) -> pd.DataFrame:
         )
+def save_by_team(df: pd.DataFrame, save_path_base: str) -> None:
+    df = df.copy()
+    for team in df["team"].unique():
+        os.makedirs(f"competition_cache/cached_results/by_team/{team}", exist_ok=True)
+        df_ = df[df["team"] == team].copy()
+        df_.to_csv(
+            f"competition_cache/cached_results/by_team/{team}/{save_path_base}",
+            index=False,
+        )
 if __name__ == "__main__":
     ## Download data
                 / f"{str(local_dir).split('/')[-1]}_{score_name}_private_only_score.csv",
                 index=False,
             )
+            save_by_team(df=public, save_path_base=f"{str(local_dir).split('/')[-1]}_{score_name}_public.csv")
+            save_by_team(df=private, save_path_base=f"{str(local_dir).split('/')[-1]}_{score_name}_private.csv")
             rocs.to_csv(
                 Path("competition_cache") / "cached_results" / f"{str(local_dir).split('/')[-1]}_{score_name}_rocs.csv",