Spaces:

imflash217
/

TrOCR_EN_ICR

Runtime error

App Files Files Community

imflash217 commited on Oct 3, 2022

Commit

a6ce242

1 Parent(s): 4a89cb7

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -66

app.py CHANGED Viewed

@@ -1,86 +1,40 @@
 import gradio as gr
 from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 from PIL import Image
-from transformers import TrOCRProcessor
-from transformers import VisionEncoderDecoderModel
-import cv2
-import matplotlib.pyplot as plt
-import numpy as np
-import warnings
-warnings.filterwarnings("ignore")
 processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
 model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
-def extract_text(image):
-  # calling the processor is equivalent to calling the feature extractor
-  pixel_values = processor(image, return_tensors="pt").pixel_values
-  generated_ids = model.generate(pixel_values)
-  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-  return generated_text
-def hand_written(image_raw):
-  image_raw = np.array(image_raw)
-  image = cv2.cvtColor(image_raw,cv2.COLOR_BGR2GRAY)
-  image = cv2.GaussianBlur(image,(5,5),0)
-  image = cv2.threshold(image,200,255,cv2.THRESH_BINARY_INV)[1]
-  kernal = cv2.getStructuringElement(cv2.MORPH_RECT,(10,1))
-  image = cv2.dilate(image,kernal,iterations=5)
-  contours,hier = cv2.findContours(image,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)
-  all_box = []
-  for i in contours:
-    bbox = cv2.boundingRect(i)
-    all_box.append(bbox)
-  # Calculate maximum rectangle height
-  c = np.array(all_box)
-  max_height = np.max(c[::, 3])
-  # Sort the contours by y-value
-  by_y = sorted(all_box, key=lambda x: x[1])  # y values
-  line_y = by_y[0][1]       # first y
-  line = 1
-  by_line = []
-  # Assign a line number to each contour
-  for x, y, w, h in by_y:
-      if y > line_y + max_height:
-          line_y = y
-          line += 1
-      by_line.append((line, x, y, w, h))
-  # This will now sort automatically by line then by x
-  contours_sorted = [(x, y, w, h) for line, x, y, w, h in sorted(by_line)]
-  text = ""
-  for line in contours_sorted:
-    x,y,w,h = line
-    cropped_image = image_raw[y:y+h,x:x+w]
-    try:
-      extracted = extract_text(cropped_image)
-      if not extracted == "0 0" and not extracted == "0 1":
-        text = "\n".join([text,extracted])
-    except:
-      print("skiping")
-      pass
-  return text
-## gradio app
-title = "TrOCR + EN_ICR demo"
-description = "TrOCR Handwritten Recognizer"
 article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2109.10282'>TrOCR: Transformer-based Optical Character Recognition with Pre-trained Models</a> | <a href='https://github.com/microsoft/unilm/tree/master/trocr'>Github Repo</a></p>"
-examples =[["img_hw_0.png"]]
-iface = gr.Interface(fn=hand_written,
                      inputs=gr.inputs.Image(type="pil"),
                      outputs=gr.outputs.Textbox(),
                      title=title,
                      description=description,
                      article=article,
                      examples=examples)
-iface.launch(debug=True,share=True)

 import gradio as gr
 from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+import requests
 from PIL import Image
 processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
 model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
+# load image examples
+urls = ['https://fki.tic.heia-fr.ch/static/img/a01-122-02.jpg', 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSoolxi9yWGAT5SLZShv8vVd0bz47UWRzQC19fDTeE8GmGv_Rn-PCF1pP1rrUx8kOjA4gg&usqp=CAU',
+        'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRNYtTuSBpZPV_nkBYPMFwVVD9asZOPgHww4epu9EqWgDmXW--sE2o8og40ZfDGo87j5w&usqp=CAU']
+for idx, url in enumerate(urls):
+  image = Image.open(requests.get(url, stream=True).raw)
+  image.save(f"image_{idx}.png")
+def process_image(image):
+    # prepare image
+    pixel_values = processor(image, return_tensors="pt").pixel_values
+    # generate (no beam search)
+    generated_ids = model.generate(pixel_values)
+    # decode
+    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return generated_text
+title = "TrOCR + EN_ICR"
+description = "Demo for handwritten TrOCR"
 article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2109.10282'>TrOCR: Transformer-based Optical Character Recognition with Pre-trained Models</a> | <a href='https://github.com/microsoft/unilm/tree/master/trocr'>Github Repo</a></p>"
+examples =[["img_hw_0.png"], ["img_hw_1.png"]]
+iface = gr.Interface(fn=process_image,
                      inputs=gr.inputs.Image(type="pil"),
                      outputs=gr.outputs.Textbox(),
                      title=title,
                      description=description,
                      article=article,
                      examples=examples)
+iface.launch(debug=True)