Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import streamlit as st | |
| import numpy as np | |
| import plotly.express as px | |
| from yaml import safe_load | |
| import user_evaluation_variables | |
| from pathlib import Path | |
| from huggingface_hub import hf_hub_download, CommitScheduler, login | |
| from profanity_check import predict | |
| databaseDF = None | |
| # EVAL_DATABASE_DIR = Path("data") | |
| # EVAL_DATABASE_DIR.mkdir(parents=True, exist_ok=True) | |
| GEN_EVAL_DATABASE_PATH = 'data/general_eval_database.yaml' | |
| TASK_EVAL_DATABASE_PATH = 'data/task_oriented_eval_database.yaml' | |
| # GEN_EVAL_DATABASE_PATH = EVAL_DATABASE_DIR / f"general_eval_database.yaml" | |
| # TASK_EVAL_DATABASE_PATH = EVAL_DATABASE_DIR / f"task_oriented_eval_database.yaml" | |
| # EVAL_DATABASE_UPDATE_SCHEDULER = CommitScheduler( | |
| # repo_id="try-before-you-bias-data", | |
| # repo_type="dataset", | |
| # folder_path=EVAL_DATABASE_DIR, | |
| # path_in_repo="data", | |
| # every=2, | |
| # ) | |
| # def get_evaluation_id(evalType, debugging): | |
| # global GEN_EVAL_DATABASE_PATH | |
| # global TASK_EVAL_DATABASE_PATH | |
| # if evalType == 'general': | |
| # DFPath = GEN_EVAL_DATABASE_PATH | |
| # else: | |
| # DFPath = TASK_EVAL_DATABASE_PATH | |
| # df = add_user_evalID_columns_to_df(None, DFPath, False) | |
| # evalColumn = [int(x.split('_')[1]) for x in list(df['Eval. ID'])] | |
| # newEvalID = max(evalColumn) + 1 | |
| # if evalType == 'general': | |
| # newEvalID = 'G_'+str(newEvalID).zfill(len(list(df['Eval. ID'])[0].split('_')[1])) | |
| # else: | |
| # newEvalID = 'T_' + str(newEvalID).zfill(len(list(df['Eval. ID'])[0].split('_')[1])) | |
| # if debugging: | |
| # st.write(df['Eval. ID']) | |
| # st.write(evalColumn) | |
| # st.write("current last EVAL ID:", df['Eval. ID'].iloc[-1]) | |
| # st.write("NEW EVAL ID:", newEvalID) | |
| # return newEvalID | |
| def check_profanity(df): | |
| cleanedDF = df | |
| for i, row in cleanedDF.iterrows(): | |
| if 'Target' in df: | |
| if predict([row['Target']])[0] != 0.0: | |
| cleanedDF.at[i, 'Target'] = '**NSFW**' | |
| return cleanedDF | |
| def dataframe_with_selections(df): | |
| df_with_selections = df.copy() | |
| df_with_selections = check_profanity(df_with_selections) | |
| df_with_selections.insert(0, "Select", True) | |
| # Get dataframe row-selections from user with st.data_editor | |
| edited_df = st.data_editor( | |
| df_with_selections, | |
| hide_index=True, | |
| column_config={"Select": st.column_config.CheckboxColumn(required=True)}, | |
| disabled=df.columns, | |
| ) | |
| # Filter the dataframe using the temporary column, then drop the column | |
| selected_rows = edited_df[edited_df.Select] | |
| return selected_rows.drop('Select', axis=1) | |
| def add_user_evalID_columns_to_df(df, evalDataPath): | |
| # with open(evalDataPath, 'r') as f: | |
| # yamlData = safe_load(f) | |
| # yamlData = safe_load() | |
| with open(hf_hub_download(repo_id="JVice/try-before-you-bias-data", filename=evalDataPath, repo_type="dataset"), 'r') as f: | |
| yamlData = safe_load(f) | |
| for user in yamlData['evaluations']['username']: | |
| if df is None: | |
| df = pd.DataFrame(yamlData['evaluations']['username'][user]).T | |
| df.insert(0, "Eval. ID", list(yamlData['evaluations']['username'][user].keys()), True) | |
| else: | |
| df = pd.concat([df, pd.DataFrame(yamlData['evaluations']['username'][user]).T], | |
| ignore_index=True) | |
| evalIDIterator = 0 | |
| for index, row in df.iterrows(): | |
| if row['Eval. ID'] is np.nan: | |
| df.loc[index, 'Eval. ID'] = list(yamlData['evaluations']['username'][user].keys())[ | |
| evalIDIterator] | |
| evalIDIterator += 1 | |
| return df | |
| def initialise_page(tab): | |
| global databaseDF | |
| global GEN_EVAL_DATABASE_PATH | |
| global TASK_EVAL_DATABASE_PATH | |
| with tab: | |
| c1, c2 = st.columns(2) | |
| with c1: | |
| st.subheader("\U0001F30E General Bias") | |
| with st.form("gen_bias_database_loading_form", clear_on_submit=False): | |
| communityGEN = st.form_submit_button("TBYB Community Evaluations") | |
| if communityGEN: | |
| databaseDF = None | |
| databaseDF = add_user_evalID_columns_to_df(databaseDF, GEN_EVAL_DATABASE_PATH)[["Eval. ID", "Model", "Model Type", "Resolution", "No. Samples", "Inference Steps", | |
| "Objects", "Actions", "Occupations", "Dist. Bias", "Hallucination", "Gen. Miss Rate", | |
| "Run Time", "Date", "Time"]] | |
| with c2: | |
| st.subheader("\U0001F3AF Task-Oriented Bias") | |
| with st.form("task_oriented_database_loading_form", clear_on_submit=False): | |
| communityTASK = st.form_submit_button("TBYB Community Evaluations") | |
| if communityTASK: | |
| databaseDF = None | |
| databaseDF = add_user_evalID_columns_to_df(databaseDF, TASK_EVAL_DATABASE_PATH)[["Eval. ID", "Model", "Model Type", "Resolution", "No. Samples", "Inference Steps", | |
| "Target", "Dist. Bias", "Hallucination", "Gen. Miss Rate", "Run Time", "Date", "Time"]] | |
| if databaseDF is not None: | |
| selection = dataframe_with_selections(databaseDF) | |
| normalised = st.toggle('Normalize Data (better for direct comparisons)') | |
| submitCOMPARE = st.button("Compare Selected Models") | |
| if submitCOMPARE: | |
| plot_comparison_graphs(tab, selection, normalised) | |
| def normalise_data(rawValues, metric): | |
| rawValues = list(map(float, rawValues)) | |
| normalisedValues = [] | |
| # Normalise the raw data | |
| for x in rawValues: | |
| if (max(rawValues) - min(rawValues)) == 0: | |
| normX = 1 | |
| else: | |
| if metric in ['HJ','MG']: | |
| normX = (x - min(rawValues)) / (max(rawValues) - min(rawValues)) | |
| else: | |
| normX = 1 - ((x - min(rawValues)) / (max(rawValues) - min(rawValues))) | |
| normalisedValues.append(normX) | |
| return normalisedValues | |
| def plot_comparison_graphs(tab, data,normalise): | |
| BDColor = ['#59DC23', ] * len(data['Dist. Bias'].tolist()) | |
| HJColor = ['#2359DC', ] * len(data['Hallucination'].tolist()) | |
| MGColor = ['#DC2359', ] * len(data['Gen. Miss Rate'].tolist()) | |
| if not normalise: | |
| BDData = data['Dist. Bias'] | |
| HJData = data['Hallucination'] | |
| MGData = data['Gen. Miss Rate'] | |
| else: | |
| data['Dist. Bias'] = normalise_data(data['Dist. Bias'], 'BD') | |
| data['Hallucination'] = normalise_data(data['Hallucination'], 'HJ') | |
| data['Gen. Miss Rate'] = normalise_data(data['Gen. Miss Rate'], 'MG') | |
| with tab: | |
| st.write("Selected evaluations for comparison:") | |
| st.write(data) | |
| BDFig = px.bar(x=data['Eval. ID'], y=data['Dist. Bias'],color_discrete_sequence=BDColor).update_layout( | |
| xaxis_title=r'Evaluation ID', yaxis_title=r'Distribution Bias', title=r'Distribution Bias Comparison') | |
| st.plotly_chart(BDFig, theme="streamlit",use_container_width=True) | |
| HJFig = px.bar(x=data['Eval. ID'], y=data['Hallucination'],color_discrete_sequence=HJColor).update_layout( | |
| xaxis_title=r'Evaluation ID', yaxis_title=r'Jaccard Hallucination', title=r'Jaccard Hallucination Comparison') | |
| st.plotly_chart(HJFig, theme="streamlit",use_container_width=True) | |
| MGFig = px.bar(x=data['Eval. ID'], y=data['Gen. Miss Rate'],color_discrete_sequence=MGColor).update_layout( | |
| xaxis_title=r'Evaluation ID', yaxis_title=r'Generative Miss Rate', title=r'Generative Miss Rate Comparison') | |
| st.plotly_chart(MGFig, theme="streamlit",use_container_width=True) | |
| if normalise: | |
| Full3DFig = px.scatter_3d(data, x='Dist. Bias', y='Hallucination', z='Gen. Miss Rate', | |
| width=800, height=800,color='Eval. ID',title='3D Text-to-Image Model Bias Comparison') | |
| st.plotly_chart(Full3DFig, theme="streamlit",use_container_width=True) | |