aauss commited on
Commit
ddf1ba7
·
1 Parent(s): 29a0e42

Fix misconfiguration in feature types

Browse files
Files changed (2) hide show
  1. test_of_time_accuracy.py +1 -4
  2. tests.py +6 -6
test_of_time_accuracy.py CHANGED
@@ -56,13 +56,12 @@ Examples:
56
 
57
 
58
  @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
59
- class TestofTimeAccuracy(evaluate.Metric):
60
  """Accuracy metric for the Test of Time benchmark by Bahar et al. (2025)."""
61
 
62
  __test__ = False
63
 
64
  def _info(self):
65
- # TODO: Specifies the evaluate.EvaluationModuleInfo object
66
  return evaluate.MetricInfo(
67
  module_type="metric",
68
  description=_DESCRIPTION,
@@ -73,8 +72,6 @@ class TestofTimeAccuracy(evaluate.Metric):
73
  {
74
  "predictions": datasets.Value("string"),
75
  "references": datasets.Value("string"),
76
- "subset": datasets.Value("string"),
77
- "return_average": datasets.Value("bool"),
78
  }
79
  ),
80
  # Homepage of the module for documentation
 
56
 
57
 
58
  @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
59
+ class TestOfTimeAccuracy(evaluate.Metric):
60
  """Accuracy metric for the Test of Time benchmark by Bahar et al. (2025)."""
61
 
62
  __test__ = False
63
 
64
  def _info(self):
 
65
  return evaluate.MetricInfo(
66
  module_type="metric",
67
  description=_DESCRIPTION,
 
72
  {
73
  "predictions": datasets.Value("string"),
74
  "references": datasets.Value("string"),
 
 
75
  }
76
  ),
77
  # Homepage of the module for documentation
tests.py CHANGED
@@ -1,6 +1,6 @@
1
  import pytest
2
 
3
- from test_of_time_accuracy import TestofTimeAccuracy
4
 
5
  arithmetic_test_cases = {
6
  "predictions": [
@@ -30,7 +30,7 @@ semantic_test_cases = {
30
 
31
 
32
  def test_arithmetic_accuracy():
33
- metric = TestofTimeAccuracy()
34
  results = metric.compute(
35
  predictions=arithmetic_test_cases["predictions"],
36
  references=arithmetic_test_cases["references"],
@@ -40,7 +40,7 @@ def test_arithmetic_accuracy():
40
 
41
 
42
  def test_semantic_accuracy():
43
- metric = TestofTimeAccuracy()
44
  results = metric.compute(
45
  predictions=semantic_test_cases["predictions"],
46
  references=semantic_test_cases["references"],
@@ -50,7 +50,7 @@ def test_semantic_accuracy():
50
 
51
 
52
  def test_per_item_arithmetic_accuracy():
53
- metric = TestofTimeAccuracy()
54
  results = metric.compute(
55
  predictions=arithmetic_test_cases["predictions"],
56
  references=arithmetic_test_cases["references"],
@@ -61,7 +61,7 @@ def test_per_item_arithmetic_accuracy():
61
 
62
 
63
  def test_per_item_semantic_accuracy():
64
- metric = TestofTimeAccuracy()
65
  results = metric.compute(
66
  predictions=semantic_test_cases["predictions"],
67
  references=semantic_test_cases["references"],
@@ -72,7 +72,7 @@ def test_per_item_semantic_accuracy():
72
 
73
 
74
  def test_invalid_subset():
75
- metric = TestofTimeAccuracy()
76
  with pytest.raises(ValueError):
77
  metric.compute(
78
  predictions=arithmetic_test_cases["predictions"],
 
1
  import pytest
2
 
3
+ from test_of_time_accuracy import TestOfTimeAccuracy
4
 
5
  arithmetic_test_cases = {
6
  "predictions": [
 
30
 
31
 
32
  def test_arithmetic_accuracy():
33
+ metric = TestOfTimeAccuracy()
34
  results = metric.compute(
35
  predictions=arithmetic_test_cases["predictions"],
36
  references=arithmetic_test_cases["references"],
 
40
 
41
 
42
  def test_semantic_accuracy():
43
+ metric = TestOfTimeAccuracy()
44
  results = metric.compute(
45
  predictions=semantic_test_cases["predictions"],
46
  references=semantic_test_cases["references"],
 
50
 
51
 
52
  def test_per_item_arithmetic_accuracy():
53
+ metric = TestOfTimeAccuracy()
54
  results = metric.compute(
55
  predictions=arithmetic_test_cases["predictions"],
56
  references=arithmetic_test_cases["references"],
 
61
 
62
 
63
  def test_per_item_semantic_accuracy():
64
+ metric = TestOfTimeAccuracy()
65
  results = metric.compute(
66
  predictions=semantic_test_cases["predictions"],
67
  references=semantic_test_cases["references"],
 
72
 
73
 
74
  def test_invalid_subset():
75
+ metric = TestOfTimeAccuracy()
76
  with pytest.raises(ValueError):
77
  metric.compute(
78
  predictions=arithmetic_test_cases["predictions"],