Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -21,17 +21,24 @@ def preprocess_image_for_ocr(image):
|
|
| 21 |
|
| 22 |
|
| 23 |
|
|
|
|
|
|
|
| 24 |
def extract_medication_lines(text):
|
| 25 |
"""
|
| 26 |
-
Extracts
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
"""
|
| 28 |
-
form
|
|
|
|
| 29 |
name = r"([A-Z0-9\-/]+(?:\s+[A-Z0-9\-/]+){0,4})"
|
| 30 |
-
opt_form = fr"(?:\s+{form})?"
|
| 31 |
-
#
|
| 32 |
-
opt_dose = r"(?:\s*\d{1,4}(?:\.\d+)?(?:/\d{1,4}(?:\.\d+)?)?\s*(mg|ml|mcg|g|kg|units|
|
| 33 |
|
| 34 |
-
|
| 35 |
fr"\b{form}\s+{name}{opt_form}{opt_dose}\b",
|
| 36 |
re.IGNORECASE
|
| 37 |
)
|
|
@@ -40,7 +47,7 @@ def extract_medication_lines(text):
|
|
| 40 |
matches = set()
|
| 41 |
for line in lines:
|
| 42 |
line = line.strip()
|
| 43 |
-
for m in
|
| 44 |
out = m.group(0)
|
| 45 |
out = re.sub(r"\s+", " ", out).strip()
|
| 46 |
matches.add(out.upper())
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
|
| 24 |
+
import re
|
| 25 |
+
|
| 26 |
def extract_medication_lines(text):
|
| 27 |
"""
|
| 28 |
+
Extracts medication lines robustly:
|
| 29 |
+
- Matches form as T./TAB./TAB/TABLET/TABLETS, C./CAP./CAP/CAPSULE/CAPSULES, etc.
|
| 30 |
+
- Floating/slash doses (e.g., 2.5MG, 10/20MG)
|
| 31 |
+
- Optional second form (prefix/suffix/mid)
|
| 32 |
+
- Any case
|
| 33 |
"""
|
| 34 |
+
# Comprehensive form pattern (optional . or plural S)
|
| 35 |
+
form = r"(T\.?|TAB\.?|TABLET(S)?|C\.?|CAP\.?|CAPSULE(S)?|SYRUP(S)?|SYP|DROP(S)?|INJ\.?|INJECTION(S)?|OINTMENT(S)?|CREAM(S)?|GEL(S)?|PATCH(ES)?|SOL\.?|SOLUTION(S)?|ORAL)"
|
| 36 |
name = r"([A-Z0-9\-/]+(?:\s+[A-Z0-9\-/]+){0,4})"
|
| 37 |
+
opt_form = fr"(?:\s+{form})?" # allow form at end as well
|
| 38 |
+
# Dose: decimal numbers, slash combos, unit, or blank
|
| 39 |
+
opt_dose = r"(?:\s*\d{1,4}(?:\.\d+)?(?:/\d{1,4}(?:\.\d+)?)?\s*(mg|ml|mcg|g|kg|units|iu|%|))?"
|
| 40 |
|
| 41 |
+
pattern = re.compile(
|
| 42 |
fr"\b{form}\s+{name}{opt_form}{opt_dose}\b",
|
| 43 |
re.IGNORECASE
|
| 44 |
)
|
|
|
|
| 47 |
matches = set()
|
| 48 |
for line in lines:
|
| 49 |
line = line.strip()
|
| 50 |
+
for m in pattern.finditer(line):
|
| 51 |
out = m.group(0)
|
| 52 |
out = re.sub(r"\s+", " ", out).strip()
|
| 53 |
matches.add(out.upper())
|