Keemoz0 commited on
Commit
fa8646f
·
1 Parent(s): 02ce220

RevertToOriginal

Browse files
Files changed (1) hide show
  1. app.py +6 -53
app.py CHANGED
@@ -2,81 +2,34 @@ import gradio as gr
2
  from huggingface_hub import hf_hub_download
3
  from PIL import Image
4
  import torch
5
- import pytesseract
6
  from transformers import AutoImageProcessor, AutoModelForObjectDetection
7
- import os
8
 
9
- os.system('chmod 777 /tmp')
10
- os.system('apt-get update -y')
11
- os.system('apt-get install tesseract-ocr -y')
12
- os.system('pip install -q pytesseract')
13
  # Load the processor and model for table structure recognition
14
  processor = AutoImageProcessor.from_pretrained("microsoft/table-transformer-structure-recognition")
15
  model = AutoModelForObjectDetection.from_pretrained("microsoft/table-transformer-structure-recognition")
16
 
17
- # Check if GPU is available and use it; otherwise, use CPU
18
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
19
- model.to(device)
20
-
21
- # Define the inference and OCR function
22
  def predict(image):
23
  # Preprocess the input image
24
- inputs = processor(images=image, return_tensors="pt").to(device)
25
 
26
  # Perform object detection using the model
27
  with torch.no_grad():
28
  outputs = model(**inputs)
29
 
30
- # Extract bounding boxes and filter for columns
31
  predicted_boxes = outputs.pred_boxes[0].cpu().numpy() # First image
32
  predicted_classes = outputs.logits.argmax(-1).cpu().numpy() # Class predictions
33
 
34
- # Prepare OCR results
35
- ocr_results = []
36
-
37
- image_width, image_height = image.size # Get original image dimensions
38
-
39
- # Iterate over detected boxes and perform OCR on columns
40
- for box in predicted_boxes:
41
- # Unpack the normalized bounding box (x_min, y_min, x_max, y_max)
42
- x_min, y_min, x_max, y_max = box
43
-
44
- # Ensure the coordinates are valid (x_max > x_min, y_max > y_min)
45
- if x_min >= x_max or y_min >= y_max:
46
- continue
47
-
48
- # Convert normalized coordinates to pixel values
49
- left = max(int(x_min * image_width), 0)
50
- top = max(int(y_min * image_height), 0)
51
- right = min(int(x_max * image_width), image_width)
52
- bottom = min(int(y_max * image_height), image_height)
53
-
54
- # Double-check that the coordinates are valid after conversion
55
- if right <= left or bottom <= top:
56
- continue
57
-
58
- # Crop the image to the bounding box area
59
- cropped_image = image.crop((left, top, right, bottom))
60
-
61
- # Perform OCR on the cropped image
62
- ocr_text = pytesseract.image_to_string(cropped_image)
63
-
64
- # Append OCR result for this box
65
- ocr_results.append({
66
- "box": [left, top, right, bottom],
67
- "text": ocr_text
68
- })
69
 
70
- # Return OCR results
71
- return {"ocr_results": ocr_results}
72
- #relaunch
73
  # Set up the Gradio interface
74
  interface = gr.Interface(
75
  fn=predict, # The function that gets called when an image is uploaded
76
  inputs=gr.Image(type="pil"), # Image input (as PIL image)
77
- outputs="json", # Outputting a JSON with the OCR results
78
  )
79
 
80
  # Launch the Gradio app
81
  interface.launch()
82
- #recheck gradio bugging
 
2
  from huggingface_hub import hf_hub_download
3
  from PIL import Image
4
  import torch
 
5
  from transformers import AutoImageProcessor, AutoModelForObjectDetection
 
6
 
 
 
 
 
7
  # Load the processor and model for table structure recognition
8
  processor = AutoImageProcessor.from_pretrained("microsoft/table-transformer-structure-recognition")
9
  model = AutoModelForObjectDetection.from_pretrained("microsoft/table-transformer-structure-recognition")
10
 
11
+ # Define the inference function
 
 
 
 
12
  def predict(image):
13
  # Preprocess the input image
14
+ inputs = processor(images=image, return_tensors="pt")
15
 
16
  # Perform object detection using the model
17
  with torch.no_grad():
18
  outputs = model(**inputs)
19
 
20
+ # Extract bounding boxes and class labels
21
  predicted_boxes = outputs.pred_boxes[0].cpu().numpy() # First image
22
  predicted_classes = outputs.logits.argmax(-1).cpu().numpy() # Class predictions
23
 
24
+ # Return the bounding boxes for display
25
+ return {"boxes": predicted_boxes.tolist(), "classes": predicted_classes.tolist()}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
 
 
 
27
  # Set up the Gradio interface
28
  interface = gr.Interface(
29
  fn=predict, # The function that gets called when an image is uploaded
30
  inputs=gr.Image(type="pil"), # Image input (as PIL image)
31
+ outputs="json", # Outputting a JSON with the boxes and classes
32
  )
33
 
34
  # Launch the Gradio app
35
  interface.launch()