Spaces:
Running
Running
Deal with unescaped \n \r \t in response.
Browse files- test_of_time_accuracy.py +12 -0
- tests/test_arithmetic_scoring.py +5 -2
test_of_time_accuracy.py
CHANGED
|
@@ -110,6 +110,18 @@ class TestOfTimeAccuracy(evaluate.Metric):
|
|
| 110 |
if isinstance(obj, dict):
|
| 111 |
return obj
|
| 112 |
idx = next_idx
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
except ValueError:
|
| 114 |
idx += 1
|
| 115 |
return None
|
|
|
|
| 110 |
if isinstance(obj, dict):
|
| 111 |
return obj
|
| 112 |
idx = next_idx
|
| 113 |
+
except json.JSONDecodeError:
|
| 114 |
+
# Try escaping newlines and parsing again from this position
|
| 115 |
+
try:
|
| 116 |
+
# Find the potential JSON substring and escape newlines
|
| 117 |
+
remaining = text[idx:]
|
| 118 |
+
fixed = remaining.replace('\n', '\\n').replace('\r', '\\r').replace('\t', '\\t')
|
| 119 |
+
obj, _ = decoder.raw_decode(fixed, 0)
|
| 120 |
+
if isinstance(obj, dict):
|
| 121 |
+
return obj
|
| 122 |
+
except (json.JSONDecodeError, ValueError):
|
| 123 |
+
pass
|
| 124 |
+
idx += 1
|
| 125 |
except ValueError:
|
| 126 |
idx += 1
|
| 127 |
return None
|
tests/test_arithmetic_scoring.py
CHANGED
|
@@ -12,6 +12,8 @@ arithmetic_test_cases = {
|
|
| 12 |
' "To find the date of the second most important game, we need to subtract 7 days from the date of the most important game. We can do this by counting back 7 days from April 14, 2005. April 14 - 7 days = April 7, 2005", "answer": "2005-04-07"}',
|
| 13 |
'\n```json\n{\n "explanation": "Step 1: Determine the time it takes the robot to carry a single box. The robot takes 4 hours, 34 minutes, and 30 seconds to carry 2 boxes. We divide this time by 2 to find the time per box.\\n- Hours: 4 / 2 = 2 hours\\n- Minutes: 34 / 2 = 17 minutes\\n- Seconds: 30 / 2 = 15 seconds\\nSo, it takes the robot 2 hours, 17 minutes, and 15 seconds to carry one box.\\n\\nStep 2: Calculate the total time to carry 25 boxes. We multiply the time per box by the total number of boxes (25).\\n- Total Hours: 2 hours/box * 25 boxes = 50 hours\\n- Total Minutes: 17 minutes/box * 25 boxes = 425 minutes\\n- Total Seconds: 15 seconds/box * 25 boxes = 375 seconds\\n\\nStep 3: Convert the calculated time into the standard H:M:S format by carrying over excess seconds and minutes.\\n- Convert seconds to minutes: 375 seconds is equal to 6 minutes and 15 seconds (since 375 / 60 = 6 with a remainder of 15). We add the 6 minutes to our minutes total.\\n- New total: 50 hours, (425 + 6) minutes, 15 seconds -> 50 hours, 431 minutes, 15 seconds.\\n- Convert minutes to hours: 431 minutes is equal to 7 hours and 11 minutes (since 431 / 60 = 7 with a remainder of 11). We add the 7 hours to our hours total.\\n- New total: (50 + 7) hours, 11 minutes, 15 seconds -> 57 hours, 11 minutes, 15 seconds.\\n\\nThe final time is 57 hours, 11 minutes, and 15 seconds.",\n "H": 57,\n "M": 11,\n "S": 15\n}\n```',
|
| 14 |
'{"explanation": "some explanation", "age": "3319"}',
|
|
|
|
|
|
|
| 15 |
],
|
| 16 |
"references": [
|
| 17 |
'{"answer": "352 BC"}',
|
|
@@ -22,9 +24,10 @@ arithmetic_test_cases = {
|
|
| 22 |
'{"answer": "2005-04-07"}',
|
| 23 |
'{"H": 57.0, "M": 11.0, "S": 15.0}',
|
| 24 |
'{"answer": 3319}',
|
|
|
|
| 25 |
],
|
| 26 |
-
"result": {"accuracy":
|
| 27 |
-
"per_item_accuracy": [True, True, True, False,False, False, True, True],
|
| 28 |
}
|
| 29 |
|
| 30 |
|
|
|
|
| 12 |
' "To find the date of the second most important game, we need to subtract 7 days from the date of the most important game. We can do this by counting back 7 days from April 14, 2005. April 14 - 7 days = April 7, 2005", "answer": "2005-04-07"}',
|
| 13 |
'\n```json\n{\n "explanation": "Step 1: Determine the time it takes the robot to carry a single box. The robot takes 4 hours, 34 minutes, and 30 seconds to carry 2 boxes. We divide this time by 2 to find the time per box.\\n- Hours: 4 / 2 = 2 hours\\n- Minutes: 34 / 2 = 17 minutes\\n- Seconds: 30 / 2 = 15 seconds\\nSo, it takes the robot 2 hours, 17 minutes, and 15 seconds to carry one box.\\n\\nStep 2: Calculate the total time to carry 25 boxes. We multiply the time per box by the total number of boxes (25).\\n- Total Hours: 2 hours/box * 25 boxes = 50 hours\\n- Total Minutes: 17 minutes/box * 25 boxes = 425 minutes\\n- Total Seconds: 15 seconds/box * 25 boxes = 375 seconds\\n\\nStep 3: Convert the calculated time into the standard H:M:S format by carrying over excess seconds and minutes.\\n- Convert seconds to minutes: 375 seconds is equal to 6 minutes and 15 seconds (since 375 / 60 = 6 with a remainder of 15). We add the 6 minutes to our minutes total.\\n- New total: 50 hours, (425 + 6) minutes, 15 seconds -> 50 hours, 431 minutes, 15 seconds.\\n- Convert minutes to hours: 431 minutes is equal to 7 hours and 11 minutes (since 431 / 60 = 7 with a remainder of 11). We add the 7 hours to our hours total.\\n- New total: (50 + 7) hours, 11 minutes, 15 seconds -> 57 hours, 11 minutes, 15 seconds.\\n\\nThe final time is 57 hours, 11 minutes, and 15 seconds.",\n "H": 57,\n "M": 11,\n "S": 15\n}\n```',
|
| 14 |
'{"explanation": "some explanation", "age": "3319"}',
|
| 15 |
+
'JSON = {"explanation": "Let\'s analyze the available time slots for a 30-minute meeting, considering the constraints and schedules of Clara, William, and Alice.\n\n**1. Define Working Hours:** 9:00 AM to 5:00 PM\n\n**2. Identify Booked Time Slots:**\n\n* Clara: 9:30 AM - 12:30 PM\n* William: 1:30 PM - 3:30 PM\n* Alice: 9:00 AM - 11:00 AM (flexible, but this constraint impacts the meeting time)\n\n**3. Determine Available Time Slots for Clara and William:**\n\n* Clara is available: 9:00 AM - 9:30 AM and 12:30 PM - 5:00 PM\n* William is available: 9:00 AM - 1:30 PM and 3:30 PM - 5:00 PM\n\n**4. Find the Intersection of Clara and William\'s Availability:**\n\n* From 9:00 AM to 9:30 AM: Both are available.\n* From 9:30 AM to 12:30 PM: Clara is booked.\n* From 12:30 PM to 1:30 PM: Both are available.\n* From 1:30 PM to 3:30 PM: William is booked.\n* From 3:30 PM to 5:00 PM: Both are available.\n\nSo, the available time slots for both Clara and William are:\n* 9:00 AM - 9:30 AM\n* 12:30 PM - 1:30 PM\n* 3:30 PM - 5:00 PM\n\n**5. Consider Alice\'s Flexibility:** Since Alice is flexible and can shift her meetings, we don\'t need to consider her booked time slot (9:00 AM - 11:00 AM) when determining the possibilities.\n\n**6. Calculate Possible Meeting Start Times:**\n\n* **9:00 AM - 9:30 AM:** Possible start time: 9:00 AM. (1 possibility)\n* **12:30 PM - 1:30 PM:** Possible start times: 12:30 PM, 1:00 PM. (2 possibilities)\n* **3:30 PM - 5:00 PM:** Possible start times: 3:30 PM, 4:00 PM, 4:30 PM. (3 possibilities)\n\n**7. Total Possible Meeting Times:** 1 + 2 + 3 = 6\n\n", "answer": 6}'
|
| 16 |
+
|
| 17 |
],
|
| 18 |
"references": [
|
| 19 |
'{"answer": "352 BC"}',
|
|
|
|
| 24 |
'{"answer": "2005-04-07"}',
|
| 25 |
'{"H": 57.0, "M": 11.0, "S": 15.0}',
|
| 26 |
'{"answer": 3319}',
|
| 27 |
+
'{"answer": 6}',
|
| 28 |
],
|
| 29 |
+
"result": {"accuracy": 6 / 9},
|
| 30 |
+
"per_item_accuracy": [True, True, True, False,False, False, True, True, True],
|
| 31 |
}
|
| 32 |
|
| 33 |
|