DSatishchandra commited on
Commit
a0b47f9
·
verified ·
1 Parent(s): 8dfad87

Update modules/kyc_processor.py

Browse files
Files changed (1) hide show
  1. modules/kyc_processor.py +14 -6
modules/kyc_processor.py CHANGED
@@ -1,8 +1,16 @@
1
- import pytesseract
2
- import cv2
 
 
 
 
 
3
 
4
  def extract_text_from_id(image_path):
5
- image = cv2.imread(image_path)
6
- if image is None:
7
- raise ValueError(f"Invalid image file: {image_path}")
8
- return pytesseract.image_to_string(image)
 
 
 
 
1
+ from PIL import Image
2
+ from transformers import TrOCRProcessor, VisionEncoderDecoderModel
3
+ import torch
4
+
5
+ # Load once at startup
6
+ processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-stage1")
7
+ model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-stage1")
8
 
9
  def extract_text_from_id(image_path):
10
+ image = Image.open(image_path).convert("RGB")
11
+
12
+ pixel_values = processor(images=image, return_tensors="pt").pixel_values
13
+ generated_ids = model.generate(pixel_values)
14
+
15
+ generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
16
+ return generated_text