Gabe Mancino-Ball
commited on
Commit
·
9b9ead9
1
Parent(s):
f48dc65
Updates
Browse files
app.py
CHANGED
|
@@ -33,6 +33,16 @@ def get_max_score(group: pd.DataFrame, metric: str, use_selection: bool = True)
|
|
| 33 |
return group.loc[max_idx]
|
| 34 |
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
@st.cache_data
|
| 37 |
def load_results(task_key, best_only, metric="balanced_accuracy"):
|
| 38 |
to_return = {}
|
|
@@ -41,16 +51,37 @@ def load_results(task_key, best_only, metric="balanced_accuracy"):
|
|
| 41 |
file_path = f"{results_path}/{task_key}_{score}_{split}_score.csv"
|
| 42 |
if os.path.exists(file_path):
|
| 43 |
df = pd.read_csv(file_path)
|
|
|
|
| 44 |
if not best_only:
|
| 45 |
to_return[f"{split}_{score}_score"] = df
|
| 46 |
else:
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
to_return[f"{split}_{score}_score"] = df
|
| 55 |
return to_return
|
| 56 |
|
|
@@ -305,7 +336,7 @@ def show_dataframe_w_format(df, format="compact", top_n=None):
|
|
| 305 |
# Calculate the mean of top n values for each column
|
| 306 |
top_n_means = {}
|
| 307 |
for col in df.columns:
|
| 308 |
-
sorted_values = df[col].sort_values(ascending=False)
|
| 309 |
# Ensure we don't try to take more values than available
|
| 310 |
actual_n = min(top_n, len(sorted_values))
|
| 311 |
if actual_n > 0:
|
|
|
|
| 33 |
return group.loc[max_idx]
|
| 34 |
|
| 35 |
|
| 36 |
+
def select_rows(df, metric: str = "balanced_accuracy"):
|
| 37 |
+
def select(group):
|
| 38 |
+
if group["selected"].any():
|
| 39 |
+
return group[group["selected"]].loc[group[group["selected"]][metric].idxmax()]
|
| 40 |
+
else:
|
| 41 |
+
return group.loc[group[f"{metric}_public"].idxmax()]
|
| 42 |
+
|
| 43 |
+
return df.groupby("team", group_keys=False).apply(select)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
@st.cache_data
|
| 47 |
def load_results(task_key, best_only, metric="balanced_accuracy"):
|
| 48 |
to_return = {}
|
|
|
|
| 51 |
file_path = f"{results_path}/{task_key}_{score}_{split}_score.csv"
|
| 52 |
if os.path.exists(file_path):
|
| 53 |
df = pd.read_csv(file_path)
|
| 54 |
+
public_df = pd.read_csv(f"{results_path}/{task_key}_{score}_public_score.csv")
|
| 55 |
if not best_only:
|
| 56 |
to_return[f"{split}_{score}_score"] = df
|
| 57 |
else:
|
| 58 |
+
if split == "public":
|
| 59 |
+
df = df.sort_values(["team", metric], ascending=False).reset_index(drop=True)
|
| 60 |
+
selected_max = (
|
| 61 |
+
df.copy()
|
| 62 |
+
.groupby("team", group_keys=False)
|
| 63 |
+
.apply(get_max_score, metric=metric, use_selection=True)
|
| 64 |
+
.sort_values([metric], ascending=False)
|
| 65 |
+
.set_index("team")
|
| 66 |
+
)
|
| 67 |
+
df = (
|
| 68 |
+
df.copy()
|
| 69 |
+
.groupby("team", group_keys=False)
|
| 70 |
+
.apply(get_max_score, metric=metric, use_selection=False)
|
| 71 |
+
.sort_values([metric], ascending=False)
|
| 72 |
+
.set_index("team")
|
| 73 |
+
)
|
| 74 |
+
print((df["balanced_accuracy"] - selected_max["balanced_accuracy"]))
|
| 75 |
+
else:
|
| 76 |
+
public_df = (
|
| 77 |
+
public_df.sort_values(["team", metric], ascending=False)
|
| 78 |
+
.reset_index(drop=True)
|
| 79 |
+
.set_index("submission_id")["balanced_accuracy"]
|
| 80 |
+
)
|
| 81 |
+
tmp = df.set_index("submission_id")
|
| 82 |
+
tmp = tmp.join(public_df, on=["submission_id"], rsuffix="_public")
|
| 83 |
+
df = select_rows(tmp)
|
| 84 |
+
df = df.sort_values([metric], ascending=False).set_index("team")
|
| 85 |
to_return[f"{split}_{score}_score"] = df
|
| 86 |
return to_return
|
| 87 |
|
|
|
|
| 336 |
# Calculate the mean of top n values for each column
|
| 337 |
top_n_means = {}
|
| 338 |
for col in df.columns:
|
| 339 |
+
sorted_values = df[col] # .sort_values(ascending=False)
|
| 340 |
# Ensure we don't try to take more values than available
|
| 341 |
actual_n = min(top_n, len(sorted_values))
|
| 342 |
if actual_n > 0:
|
utils.py
CHANGED
|
@@ -30,6 +30,7 @@ STATUS_MAP = {0: "PENDING", 1: "QUEUED", 2: "PROCESSING", 3: "SUCCESS", 4: "FAIL
|
|
| 30 |
|
| 31 |
## Make a directory to store computed results
|
| 32 |
os.makedirs(Path("competition_cache") / "cached_results", exist_ok=True)
|
|
|
|
| 33 |
|
| 34 |
|
| 35 |
def load_teams(competition_space_path: Path) -> pd.DataFrame:
|
|
@@ -218,6 +219,17 @@ def create_custom_subs():
|
|
| 218 |
)
|
| 219 |
|
| 220 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
if __name__ == "__main__":
|
| 222 |
|
| 223 |
## Download data
|
|
@@ -513,6 +525,8 @@ if __name__ == "__main__":
|
|
| 513 |
/ f"{str(local_dir).split('/')[-1]}_{score_name}_private_only_score.csv",
|
| 514 |
index=False,
|
| 515 |
)
|
|
|
|
|
|
|
| 516 |
|
| 517 |
rocs.to_csv(
|
| 518 |
Path("competition_cache") / "cached_results" / f"{str(local_dir).split('/')[-1]}_{score_name}_rocs.csv",
|
|
|
|
| 30 |
|
| 31 |
## Make a directory to store computed results
|
| 32 |
os.makedirs(Path("competition_cache") / "cached_results", exist_ok=True)
|
| 33 |
+
os.makedirs(Path("competition_cache") / "cached_results" / "by_team", exist_ok=True)
|
| 34 |
|
| 35 |
|
| 36 |
def load_teams(competition_space_path: Path) -> pd.DataFrame:
|
|
|
|
| 219 |
)
|
| 220 |
|
| 221 |
|
| 222 |
+
def save_by_team(df: pd.DataFrame, save_path_base: str) -> None:
|
| 223 |
+
df = df.copy()
|
| 224 |
+
for team in df["team"].unique():
|
| 225 |
+
os.makedirs(f"competition_cache/cached_results/by_team/{team}", exist_ok=True)
|
| 226 |
+
df_ = df[df["team"] == team].copy()
|
| 227 |
+
df_.to_csv(
|
| 228 |
+
f"competition_cache/cached_results/by_team/{team}/{save_path_base}",
|
| 229 |
+
index=False,
|
| 230 |
+
)
|
| 231 |
+
|
| 232 |
+
|
| 233 |
if __name__ == "__main__":
|
| 234 |
|
| 235 |
## Download data
|
|
|
|
| 525 |
/ f"{str(local_dir).split('/')[-1]}_{score_name}_private_only_score.csv",
|
| 526 |
index=False,
|
| 527 |
)
|
| 528 |
+
save_by_team(df=public, save_path_base=f"{str(local_dir).split('/')[-1]}_{score_name}_public.csv")
|
| 529 |
+
save_by_team(df=private, save_path_base=f"{str(local_dir).split('/')[-1]}_{score_name}_private.csv")
|
| 530 |
|
| 531 |
rocs.to_csv(
|
| 532 |
Path("competition_cache") / "cached_results" / f"{str(local_dir).split('/')[-1]}_{score_name}_rocs.csv",
|