Spaces:
Sleeping
Sleeping
Updated for V2.0
Browse files- model_comparison.py +26 -44
model_comparison.py
CHANGED
|
@@ -9,35 +9,37 @@ from huggingface_hub import snapshot_download
|
|
| 9 |
from profanity_check import predict
|
| 10 |
|
| 11 |
databaseDF = None
|
|
|
|
| 12 |
EVAL_DATABASE_DIR = Path("data")
|
| 13 |
EVAL_DATABASE_DIR.mkdir(parents=True, exist_ok=True)
|
| 14 |
|
| 15 |
GEN_EVAL_DATABASE_PATH = 'user_data/data/general_eval_database.yaml'
|
| 16 |
TASK_EVAL_DATABASE_PATH = 'user_data/data/task_oriented_eval_database.yaml'
|
| 17 |
-
def get_evaluation_id(evalType, debugging):
|
| 18 |
-
global GEN_EVAL_DATABASE_PATH
|
| 19 |
-
global TASK_EVAL_DATABASE_PATH
|
| 20 |
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
DFPath = TASK_EVAL_DATABASE_PATH
|
| 25 |
|
| 26 |
-
|
| 27 |
-
|
|
|
|
|
|
|
| 28 |
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
|
| 42 |
def check_profanity(df):
|
| 43 |
cleanedDF = df
|
|
@@ -64,33 +66,24 @@ def dataframe_with_selections(df):
|
|
| 64 |
# Filter the dataframe using the temporary column, then drop the column
|
| 65 |
selected_rows = edited_df[edited_df.Select]
|
| 66 |
return selected_rows.drop('Select', axis=1)
|
| 67 |
-
def add_user_evalID_columns_to_df(df, evalDataPath
|
| 68 |
with open(evalDataPath, 'r') as f:
|
| 69 |
yamlData = safe_load(f)
|
| 70 |
for user in yamlData['evaluations']['username']:
|
| 71 |
if df is None:
|
| 72 |
df = pd.DataFrame(yamlData['evaluations']['username'][user]).T
|
| 73 |
df.insert(0, "Eval. ID", list(yamlData['evaluations']['username'][user].keys()), True)
|
| 74 |
-
df.insert(0, "User", [user for i in range(len(yamlData['evaluations']['username'][user]))],
|
| 75 |
-
True)
|
| 76 |
else:
|
| 77 |
df = pd.concat([df, pd.DataFrame(yamlData['evaluations']['username'][user]).T],
|
| 78 |
ignore_index=True)
|
| 79 |
evalIDIterator = 0
|
| 80 |
for index, row in df.iterrows():
|
| 81 |
-
if row['User'] is np.nan:
|
| 82 |
-
df.loc[index, 'User'] = user
|
| 83 |
if row['Eval. ID'] is np.nan:
|
| 84 |
df.loc[index, 'Eval. ID'] = list(yamlData['evaluations']['username'][user].keys())[
|
| 85 |
evalIDIterator]
|
| 86 |
evalIDIterator += 1
|
| 87 |
-
if personalFLAG:
|
| 88 |
-
df.drop(df[df['User'] != user_evaluation_variables.USERNAME].index, inplace=True)
|
| 89 |
-
if len(df) == 0:
|
| 90 |
-
st.warning("It looks like you haven't conducted any evaluations! Run some evaluations and refresh this page."
|
| 91 |
-
"If the problem persists, please contact support. ", icon="⚠️")
|
| 92 |
-
|
| 93 |
return df
|
|
|
|
| 94 |
def initialise_page(tab):
|
| 95 |
global databaseDF
|
| 96 |
global GEN_EVAL_DATABASE_PATH
|
|
@@ -100,30 +93,19 @@ def initialise_page(tab):
|
|
| 100 |
with c1:
|
| 101 |
st.subheader("\U0001F30E General Bias")
|
| 102 |
with st.form("gen_bias_database_loading_form", clear_on_submit=False):
|
| 103 |
-
personalGEN = st.form_submit_button("Personal Evaluations")
|
| 104 |
communityGEN = st.form_submit_button("TBYB Community Evaluations")
|
| 105 |
-
if personalGEN:
|
| 106 |
-
databaseDF = None
|
| 107 |
-
databaseDF = add_user_evalID_columns_to_df(databaseDF, GEN_EVAL_DATABASE_PATH,True)[["User", "Eval. ID", "Model", "Model Type", "Resolution", "No. Samples", "Inference Steps",
|
| 108 |
-
"Objects", "Actions", "Occupations", "Dist. Bias", "Hallucination", "Gen. Miss Rate",
|
| 109 |
-
"Run Time", "Date", "Time"]]
|
| 110 |
if communityGEN:
|
| 111 |
databaseDF = None
|
| 112 |
-
databaseDF = add_user_evalID_columns_to_df(databaseDF, GEN_EVAL_DATABASE_PATH
|
| 113 |
"Objects", "Actions", "Occupations", "Dist. Bias", "Hallucination", "Gen. Miss Rate",
|
| 114 |
"Run Time", "Date", "Time"]]
|
| 115 |
with c2:
|
| 116 |
st.subheader("\U0001F3AF Task-Oriented Bias")
|
| 117 |
with st.form("task_oriented_database_loading_form", clear_on_submit=False):
|
| 118 |
-
personalTASK = st.form_submit_button("Personal Evaluations")
|
| 119 |
communityTASK = st.form_submit_button("TBYB Community Evaluations")
|
| 120 |
-
if personalTASK:
|
| 121 |
-
databaseDF = None
|
| 122 |
-
databaseDF = add_user_evalID_columns_to_df(databaseDF, TASK_EVAL_DATABASE_PATH, True)[["User", "Eval. ID", "Model", "Model Type", "Resolution", "No. Samples", "Inference Steps",
|
| 123 |
-
"Target", "Dist. Bias", "Hallucination", "Gen. Miss Rate", "Run Time", "Date", "Time"]]
|
| 124 |
if communityTASK:
|
| 125 |
databaseDF = None
|
| 126 |
-
databaseDF = add_user_evalID_columns_to_df(databaseDF, TASK_EVAL_DATABASE_PATH
|
| 127 |
"Target", "Dist. Bias", "Hallucination", "Gen. Miss Rate", "Run Time", "Date", "Time"]]
|
| 128 |
if databaseDF is not None:
|
| 129 |
selection = dataframe_with_selections(databaseDF)
|
|
|
|
| 9 |
from profanity_check import predict
|
| 10 |
|
| 11 |
databaseDF = None
|
| 12 |
+
|
| 13 |
EVAL_DATABASE_DIR = Path("data")
|
| 14 |
EVAL_DATABASE_DIR.mkdir(parents=True, exist_ok=True)
|
| 15 |
|
| 16 |
GEN_EVAL_DATABASE_PATH = 'user_data/data/general_eval_database.yaml'
|
| 17 |
TASK_EVAL_DATABASE_PATH = 'user_data/data/task_oriented_eval_database.yaml'
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
+
# def get_evaluation_id(evalType, debugging):
|
| 20 |
+
# global GEN_EVAL_DATABASE_PATH
|
| 21 |
+
# global TASK_EVAL_DATABASE_PATH
|
|
|
|
| 22 |
|
| 23 |
+
# if evalType == 'general':
|
| 24 |
+
# DFPath = GEN_EVAL_DATABASE_PATH
|
| 25 |
+
# else:
|
| 26 |
+
# DFPath = TASK_EVAL_DATABASE_PATH
|
| 27 |
|
| 28 |
+
# df = add_user_evalID_columns_to_df(None, DFPath, False)
|
| 29 |
+
# evalColumn = [int(x.split('_')[1]) for x in list(df['Eval. ID'])]
|
| 30 |
+
|
| 31 |
+
# newEvalID = max(evalColumn) + 1
|
| 32 |
+
# if evalType == 'general':
|
| 33 |
+
# newEvalID = 'G_'+str(newEvalID).zfill(len(list(df['Eval. ID'])[0].split('_')[1]))
|
| 34 |
+
# else:
|
| 35 |
+
# newEvalID = 'T_' + str(newEvalID).zfill(len(list(df['Eval. ID'])[0].split('_')[1]))
|
| 36 |
|
| 37 |
+
# if debugging:
|
| 38 |
+
# st.write(df['Eval. ID'])
|
| 39 |
+
# st.write(evalColumn)
|
| 40 |
+
# st.write("current last EVAL ID:", df['Eval. ID'].iloc[-1])
|
| 41 |
+
# st.write("NEW EVAL ID:", newEvalID)
|
| 42 |
+
# return newEvalID
|
| 43 |
|
| 44 |
def check_profanity(df):
|
| 45 |
cleanedDF = df
|
|
|
|
| 66 |
# Filter the dataframe using the temporary column, then drop the column
|
| 67 |
selected_rows = edited_df[edited_df.Select]
|
| 68 |
return selected_rows.drop('Select', axis=1)
|
| 69 |
+
def add_user_evalID_columns_to_df(df, evalDataPath):
|
| 70 |
with open(evalDataPath, 'r') as f:
|
| 71 |
yamlData = safe_load(f)
|
| 72 |
for user in yamlData['evaluations']['username']:
|
| 73 |
if df is None:
|
| 74 |
df = pd.DataFrame(yamlData['evaluations']['username'][user]).T
|
| 75 |
df.insert(0, "Eval. ID", list(yamlData['evaluations']['username'][user].keys()), True)
|
|
|
|
|
|
|
| 76 |
else:
|
| 77 |
df = pd.concat([df, pd.DataFrame(yamlData['evaluations']['username'][user]).T],
|
| 78 |
ignore_index=True)
|
| 79 |
evalIDIterator = 0
|
| 80 |
for index, row in df.iterrows():
|
|
|
|
|
|
|
| 81 |
if row['Eval. ID'] is np.nan:
|
| 82 |
df.loc[index, 'Eval. ID'] = list(yamlData['evaluations']['username'][user].keys())[
|
| 83 |
evalIDIterator]
|
| 84 |
evalIDIterator += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
return df
|
| 86 |
+
|
| 87 |
def initialise_page(tab):
|
| 88 |
global databaseDF
|
| 89 |
global GEN_EVAL_DATABASE_PATH
|
|
|
|
| 93 |
with c1:
|
| 94 |
st.subheader("\U0001F30E General Bias")
|
| 95 |
with st.form("gen_bias_database_loading_form", clear_on_submit=False):
|
|
|
|
| 96 |
communityGEN = st.form_submit_button("TBYB Community Evaluations")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
if communityGEN:
|
| 98 |
databaseDF = None
|
| 99 |
+
databaseDF = add_user_evalID_columns_to_df(databaseDF, GEN_EVAL_DATABASE_PATH)[["Eval. ID", "Model", "Model Type", "Resolution", "No. Samples", "Inference Steps",
|
| 100 |
"Objects", "Actions", "Occupations", "Dist. Bias", "Hallucination", "Gen. Miss Rate",
|
| 101 |
"Run Time", "Date", "Time"]]
|
| 102 |
with c2:
|
| 103 |
st.subheader("\U0001F3AF Task-Oriented Bias")
|
| 104 |
with st.form("task_oriented_database_loading_form", clear_on_submit=False):
|
|
|
|
| 105 |
communityTASK = st.form_submit_button("TBYB Community Evaluations")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
if communityTASK:
|
| 107 |
databaseDF = None
|
| 108 |
+
databaseDF = add_user_evalID_columns_to_df(databaseDF, TASK_EVAL_DATABASE_PATH)[["Eval. ID", "Model", "Model Type", "Resolution", "No. Samples", "Inference Steps",
|
| 109 |
"Target", "Dist. Bias", "Hallucination", "Gen. Miss Rate", "Run Time", "Date", "Time"]]
|
| 110 |
if databaseDF is not None:
|
| 111 |
selection = dataframe_with_selections(databaseDF)
|