Spaces:
Runtime error
Runtime error
meg-huggingface
commited on
Commit
·
dd0583d
1
Parent(s):
c3d29b7
Removing logging bug
Browse files- src/leaderboard/read_evals.py +11 -14
src/leaderboard/read_evals.py
CHANGED
|
@@ -11,10 +11,7 @@ from src.display.formatting import make_clickable_model
|
|
| 11 |
from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
|
| 12 |
from src.submission.check_validity import is_model_on_hub
|
| 13 |
|
| 14 |
-
from src.logging import setup_logger, log_file
|
| 15 |
-
|
| 16 |
logging.basicConfig(level=logging.DEBUG)
|
| 17 |
-
logger = setup_logger(__name__)
|
| 18 |
|
| 19 |
|
| 20 |
@dataclass
|
|
@@ -75,13 +72,13 @@ class EvalResult:
|
|
| 75 |
results = {}
|
| 76 |
for task in Tasks:
|
| 77 |
task = task.value
|
| 78 |
-
|
| 79 |
-
|
| 80 |
# We average all scores of a given metric (not all metrics are present in all files)
|
| 81 |
# This looks a bit odd, should just be the one score in the one file. (?)
|
| 82 |
scores = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
|
| 83 |
-
|
| 84 |
-
|
| 85 |
if scores.size == 0 or any([score is None for score in scores]):
|
| 86 |
continue
|
| 87 |
|
|
@@ -114,7 +111,7 @@ class EvalResult:
|
|
| 114 |
self.num_params = request.get("params", 0)
|
| 115 |
self.date = request.get("submitted_time", "")
|
| 116 |
except Exception:
|
| 117 |
-
|
| 118 |
|
| 119 |
def to_dict(self):
|
| 120 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
|
@@ -166,8 +163,8 @@ def get_request_file_for_model(requests_path, model_name, precision):
|
|
| 166 |
def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
|
| 167 |
"""From the path of the results folder root, extract all needed info for results"""
|
| 168 |
model_result_filepaths = []
|
| 169 |
-
|
| 170 |
-
|
| 171 |
for root, _, files in os.walk(results_path):
|
| 172 |
# We should only have json files in model results
|
| 173 |
if len(files) == 0 or any([not f.endswith(".json") for f in files]):
|
|
@@ -184,8 +181,8 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
| 184 |
|
| 185 |
eval_results = {}
|
| 186 |
for model_result_filepath in model_result_filepaths:
|
| 187 |
-
|
| 188 |
-
|
| 189 |
# Creation of result
|
| 190 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
| 191 |
eval_result.update_with_request_file(requests_path)
|
|
@@ -196,8 +193,8 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
| 196 |
eval_results[eval_name].results.update({k: v for k, v in eval_result.results.items() if v is not None})
|
| 197 |
else:
|
| 198 |
eval_results[eval_name] = eval_result
|
| 199 |
-
|
| 200 |
-
|
| 201 |
|
| 202 |
results = []
|
| 203 |
for v in eval_results.values():
|
|
|
|
| 11 |
from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
|
| 12 |
from src.submission.check_validity import is_model_on_hub
|
| 13 |
|
|
|
|
|
|
|
| 14 |
logging.basicConfig(level=logging.DEBUG)
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
@dataclass
|
|
|
|
| 72 |
results = {}
|
| 73 |
for task in Tasks:
|
| 74 |
task = task.value
|
| 75 |
+
logging.info("Task: %s" % task.metric)
|
| 76 |
+
logging.info(data["results"].items())
|
| 77 |
# We average all scores of a given metric (not all metrics are present in all files)
|
| 78 |
# This looks a bit odd, should just be the one score in the one file. (?)
|
| 79 |
scores = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
|
| 80 |
+
logging.info("scores are:")
|
| 81 |
+
logging.info(scores)
|
| 82 |
if scores.size == 0 or any([score is None for score in scores]):
|
| 83 |
continue
|
| 84 |
|
|
|
|
| 111 |
self.num_params = request.get("params", 0)
|
| 112 |
self.date = request.get("submitted_time", "")
|
| 113 |
except Exception:
|
| 114 |
+
logging.error(f"Could not find request file for {self.org}/{self.model}") #with precision {self.precision.value.name}")
|
| 115 |
|
| 116 |
def to_dict(self):
|
| 117 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
|
|
|
| 163 |
def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
|
| 164 |
"""From the path of the results folder root, extract all needed info for results"""
|
| 165 |
model_result_filepaths = []
|
| 166 |
+
logging.debug('looking in results_path: %s' % results_path)
|
| 167 |
+
logging.debug('looking in requests_path: %s' % requests_path)
|
| 168 |
for root, _, files in os.walk(results_path):
|
| 169 |
# We should only have json files in model results
|
| 170 |
if len(files) == 0 or any([not f.endswith(".json") for f in files]):
|
|
|
|
| 181 |
|
| 182 |
eval_results = {}
|
| 183 |
for model_result_filepath in model_result_filepaths:
|
| 184 |
+
logging.debug("Examining filepath:")
|
| 185 |
+
logging.debug(model_result_filepath)
|
| 186 |
# Creation of result
|
| 187 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
| 188 |
eval_result.update_with_request_file(requests_path)
|
|
|
|
| 193 |
eval_results[eval_name].results.update({k: v for k, v in eval_result.results.items() if v is not None})
|
| 194 |
else:
|
| 195 |
eval_results[eval_name] = eval_result
|
| 196 |
+
logging.info("eval results is")
|
| 197 |
+
logging.info(eval_results)
|
| 198 |
|
| 199 |
results = []
|
| 200 |
for v in eval_results.values():
|