Spaces:
Sleeping
Sleeping
| from cnocr import CnOcr | |
| import openai | |
| from dotenv import load_dotenv | |
| import os | |
| import json | |
| def get_chiname(path): | |
| ocr = CnOcr(rec_model_name='chinese_cht_PP-OCRv3') | |
| out = ocr.ocr(path) | |
| print(out) | |
| load_dotenv() | |
| openai.api_key = os.environ.get("data-extraction-api") | |
| invalid_list = [' ',','] | |
| data_set_1 = [] | |
| for item in out: | |
| if item['text'] not in invalid_list: | |
| data_set_1.append(item['text']) | |
| completion = openai.ChatCompletion.create( | |
| model = "gpt-3.5-turbo", | |
| temperature = 0, | |
| messages = [ | |
| {"role": "system", "content": "You are an AI assistant for extracting Chinese name from HKID card."}, | |
| {"role": "user", "content": f"Extract data from the following set of text: {data_set_1}. \ | |
| You only need to return a dictonary with Chinese name in Chinese. Use double quote!"}, | |
| ] | |
| ) | |
| data = completion['choices'][0]['message']['content'] | |
| # print(data) | |
| id_data = json.loads(data) | |
| # print(id_data) | |
| return id_data | |
| # return [name, valid_hkid, hkid, issuedate] |