Spaces:
Sleeping
Sleeping
Upload model0.py
Browse files
model0.py
CHANGED
|
@@ -3,6 +3,7 @@ import openai
|
|
| 3 |
from dotenv import load_dotenv
|
| 4 |
import os
|
| 5 |
import json
|
|
|
|
| 6 |
|
| 7 |
def model0(path):
|
| 8 |
ocr = CnOcr(rec_model_name='en_PP-OCRv3')
|
|
@@ -19,25 +20,29 @@ def model0(path):
|
|
| 19 |
if item['text'] not in invalid_list:
|
| 20 |
data_set_1.append(item['text'])
|
| 21 |
|
|
|
|
|
|
|
| 22 |
completion = openai.ChatCompletion.create(
|
| 23 |
model = "gpt-3.5-turbo",
|
| 24 |
temperature = 0,
|
| 25 |
messages = [
|
| 26 |
{"role": "system", "content": "You are an AI assistant for extracting data from HKID card with following information \
|
| 27 |
-
(name,
|
| 28 |
dictionary format"},
|
| 29 |
{"role": "user", "content": f"Extract data from the following set of text: {data_set_1}. \
|
| 30 |
You have three types of data to extract. \
|
| 31 |
1. id card holder full name (it noramlly is a chinese name, including surname and family \
|
| 32 |
name in English spelling, and it may be separate in different fields in the data set for surname and family name \
|
| 33 |
sometimes) \
|
| 34 |
-
2.
|
| 35 |
-
because date of
|
| 36 |
-
3.
|
|
|
|
| 37 |
(a) @ represents any one or two capital letters of the alphabet. \
|
| 38 |
(b) # is the check digit which has 11 possible values from 0 to 9 and A.) \
|
| 39 |
Remember to include the check digit with () \
|
| 40 |
Only reply a dictionary. No need to add other words or explanation. Use double quote for dictionary."},
|
|
|
|
| 41 |
]
|
| 42 |
)
|
| 43 |
|
|
@@ -46,9 +51,17 @@ def model0(path):
|
|
| 46 |
print(data)
|
| 47 |
|
| 48 |
id_data = json.loads(data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
print(id_data)
|
| 51 |
-
return
|
| 52 |
# return [name, valid_hkid, hkid, issuedate]
|
| 53 |
-
|
| 54 |
-
model0('dontTouchMe/IMG_4499.jpg')
|
|
|
|
| 3 |
from dotenv import load_dotenv
|
| 4 |
import os
|
| 5 |
import json
|
| 6 |
+
import checkTool
|
| 7 |
|
| 8 |
def model0(path):
|
| 9 |
ocr = CnOcr(rec_model_name='en_PP-OCRv3')
|
|
|
|
| 20 |
if item['text'] not in invalid_list:
|
| 21 |
data_set_1.append(item['text'])
|
| 22 |
|
| 23 |
+
print(f'All data here: {data_set_1}')
|
| 24 |
+
|
| 25 |
completion = openai.ChatCompletion.create(
|
| 26 |
model = "gpt-3.5-turbo",
|
| 27 |
temperature = 0,
|
| 28 |
messages = [
|
| 29 |
{"role": "system", "content": "You are an AI assistant for extracting data from HKID card with following information \
|
| 30 |
+
(name, date of birth, date of issue, HKID number) from HKID card. Uppercase and lowercase letters are the same. Store the results in \
|
| 31 |
dictionary format"},
|
| 32 |
{"role": "user", "content": f"Extract data from the following set of text: {data_set_1}. \
|
| 33 |
You have three types of data to extract. \
|
| 34 |
1. id card holder full name (it noramlly is a chinese name, including surname and family \
|
| 35 |
name in English spelling, and it may be separate in different fields in the data set for surname and family name \
|
| 36 |
sometimes) \
|
| 37 |
+
2. date of birth (should be a date with year, month and day, e.g. 23-02-2003 is the required format, but 26-11 is not \
|
| 38 |
+
because date of birth should have 10 characters) Only choose valid format!!!\
|
| 39 |
+
3. date of issue (a string with format xx-xx) \
|
| 40 |
+
4. HKID number (The standard format of HKID number is @123456(#) e.g. A123456(7) is a valid HKID number. \
|
| 41 |
(a) @ represents any one or two capital letters of the alphabet. \
|
| 42 |
(b) # is the check digit which has 11 possible values from 0 to 9 and A.) \
|
| 43 |
Remember to include the check digit with () \
|
| 44 |
Only reply a dictionary. No need to add other words or explanation. Use double quote for dictionary."},
|
| 45 |
+
|
| 46 |
]
|
| 47 |
)
|
| 48 |
|
|
|
|
| 51 |
print(data)
|
| 52 |
|
| 53 |
id_data = json.loads(data)
|
| 54 |
+
|
| 55 |
+
name = id_data["name"]
|
| 56 |
+
dateofbirth = id_data["date of birth"]
|
| 57 |
+
issuedate = id_data["date of issue"]
|
| 58 |
+
hkid = id_data["HKID number"]
|
| 59 |
+
if checkTool.validate_hkid(hkid=hkid):
|
| 60 |
+
valid_hkid = 'True'
|
| 61 |
+
else:
|
| 62 |
+
valid_hkid = 'False'
|
| 63 |
+
name = checkTool.seperate_name(name)
|
| 64 |
|
| 65 |
print(id_data)
|
| 66 |
+
return [name, valid_hkid, hkid, issuedate, dateofbirth]
|
| 67 |
# return [name, valid_hkid, hkid, issuedate]
|
|
|
|
|
|