Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -18,18 +18,11 @@ def preprocess_image_for_ocr(image):
|
|
| 18 |
preprocessed_pil = Image.fromarray(adaptive_threshold)
|
| 19 |
return preprocessed_pil
|
| 20 |
|
|
|
|
| 21 |
def extract_medication_lines(text):
|
| 22 |
-
"""
|
| 23 |
-
Flexible regex: Find lines with [form], [name], [dose] anywhere.
|
| 24 |
-
Handles free text/table/mixed layouts.
|
| 25 |
-
"""
|
| 26 |
-
# Medicine forms
|
| 27 |
form_pattern = r"(TAB(L?ET)?|CAP(SULE)?|SYRUP|SYP|DROP(S)?|INJ(CTION)?|OINTMENT|CREAM|GEL|PATCH|SOL(UTION)?|ORAL)"
|
| 28 |
-
# Name: up to 4 tokens (space/hyphen/slash), case/mixed
|
| 29 |
name_pattern = r"([A-Z0-9\-/]+(?:\s+[A-Z0-9\-/]+){0,4})"
|
| 30 |
-
# Dose/concentration: 1-4 digits, optional space, units
|
| 31 |
dose_pattern = r"(\d{1,4}\s*(mg|ml|mcg|g|kg|units|IU)|\d{1,2}\s*%(\s*w\/w|\s*w\/v|\s*v\/v)?)"
|
| 32 |
-
# Allow any order: form+name+dose/mid/suffix/prefix
|
| 33 |
main_pattern = (
|
| 34 |
r"(?<!\w)(" + form_pattern + r")[\s\-]+"
|
| 35 |
r"" + name_pattern + r"" # name after form
|
|
@@ -42,11 +35,13 @@ def extract_medication_lines(text):
|
|
| 42 |
line_stripped = line.strip()
|
| 43 |
match = med_regex.search(line_stripped)
|
| 44 |
if match:
|
| 45 |
-
#
|
| 46 |
-
|
| 47 |
-
meds.append(cleaned.strip())
|
| 48 |
return '\n'.join(meds)
|
| 49 |
|
|
|
|
|
|
|
|
|
|
| 50 |
def clinical_ner_extract(text, use_gpu=False):
|
| 51 |
"""
|
| 52 |
Uses ClinicalNER for medicine name, then finds form/dose in source sentence.
|
|
|
|
| 18 |
preprocessed_pil = Image.fromarray(adaptive_threshold)
|
| 19 |
return preprocessed_pil
|
| 20 |
|
| 21 |
+
|
| 22 |
def extract_medication_lines(text):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
form_pattern = r"(TAB(L?ET)?|CAP(SULE)?|SYRUP|SYP|DROP(S)?|INJ(CTION)?|OINTMENT|CREAM|GEL|PATCH|SOL(UTION)?|ORAL)"
|
|
|
|
| 24 |
name_pattern = r"([A-Z0-9\-/]+(?:\s+[A-Z0-9\-/]+){0,4})"
|
|
|
|
| 25 |
dose_pattern = r"(\d{1,4}\s*(mg|ml|mcg|g|kg|units|IU)|\d{1,2}\s*%(\s*w\/w|\s*w\/v|\s*v\/v)?)"
|
|
|
|
| 26 |
main_pattern = (
|
| 27 |
r"(?<!\w)(" + form_pattern + r")[\s\-]+"
|
| 28 |
r"" + name_pattern + r"" # name after form
|
|
|
|
| 35 |
line_stripped = line.strip()
|
| 36 |
match = med_regex.search(line_stripped)
|
| 37 |
if match:
|
| 38 |
+
# Ignore group indices, instead join non-None groups or use match.group(0)
|
| 39 |
+
meds.append(match.group(0).strip())
|
|
|
|
| 40 |
return '\n'.join(meds)
|
| 41 |
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
|
| 45 |
def clinical_ner_extract(text, use_gpu=False):
|
| 46 |
"""
|
| 47 |
Uses ClinicalNER for medicine name, then finds form/dose in source sentence.
|