IFMedTechdemo commited on
Commit
a071219
·
verified ·
1 Parent(s): cb81381

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -7
app.py CHANGED
@@ -21,17 +21,24 @@ def preprocess_image_for_ocr(image):
21
 
22
 
23
 
 
 
24
  def extract_medication_lines(text):
25
  """
26
- Extracts medicine lines with possible decimals in dose (e.g., 2.5MG), with or without explicit dose.
 
 
 
 
27
  """
28
- form = r"(TAB(L?ET)?|CAP(SULE)?|SYRUP|SYP|DROP(S)?|INJ(CTION)?|OINTMENT|CREAM|GEL|PATCH|SOL(UTION)?|ORAL)"
 
29
  name = r"([A-Z0-9\-/]+(?:\s+[A-Z0-9\-/]+){0,4})"
30
- opt_form = fr"(?:\s+{form})?" # form can repeat at end
31
- # Now dose accepts decimal e.g. 2.5MG and number/slash (e.g. 20/50MG)
32
- opt_dose = r"(?:\s*\d{1,4}(?:\.\d+)?(?:/\d{1,4}(?:\.\d+)?)?\s*(mg|ml|mcg|g|kg|units|IU|%|))?"
33
 
34
- pat = re.compile(
35
  fr"\b{form}\s+{name}{opt_form}{opt_dose}\b",
36
  re.IGNORECASE
37
  )
@@ -40,7 +47,7 @@ def extract_medication_lines(text):
40
  matches = set()
41
  for line in lines:
42
  line = line.strip()
43
- for m in pat.finditer(line):
44
  out = m.group(0)
45
  out = re.sub(r"\s+", " ", out).strip()
46
  matches.add(out.upper())
 
21
 
22
 
23
 
24
+ import re
25
+
26
  def extract_medication_lines(text):
27
  """
28
+ Extracts medication lines robustly:
29
+ - Matches form as T./TAB./TAB/TABLET/TABLETS, C./CAP./CAP/CAPSULE/CAPSULES, etc.
30
+ - Floating/slash doses (e.g., 2.5MG, 10/20MG)
31
+ - Optional second form (prefix/suffix/mid)
32
+ - Any case
33
  """
34
+ # Comprehensive form pattern (optional . or plural S)
35
+ form = r"(T\.?|TAB\.?|TABLET(S)?|C\.?|CAP\.?|CAPSULE(S)?|SYRUP(S)?|SYP|DROP(S)?|INJ\.?|INJECTION(S)?|OINTMENT(S)?|CREAM(S)?|GEL(S)?|PATCH(ES)?|SOL\.?|SOLUTION(S)?|ORAL)"
36
  name = r"([A-Z0-9\-/]+(?:\s+[A-Z0-9\-/]+){0,4})"
37
+ opt_form = fr"(?:\s+{form})?" # allow form at end as well
38
+ # Dose: decimal numbers, slash combos, unit, or blank
39
+ opt_dose = r"(?:\s*\d{1,4}(?:\.\d+)?(?:/\d{1,4}(?:\.\d+)?)?\s*(mg|ml|mcg|g|kg|units|iu|%|))?"
40
 
41
+ pattern = re.compile(
42
  fr"\b{form}\s+{name}{opt_form}{opt_dose}\b",
43
  re.IGNORECASE
44
  )
 
47
  matches = set()
48
  for line in lines:
49
  line = line.strip()
50
+ for m in pattern.finditer(line):
51
  out = m.group(0)
52
  out = re.sub(r"\s+", " ", out).strip()
53
  matches.add(out.upper())