Tonic commited on
Commit
8bebde5
Β·
verified Β·
1 Parent(s): a5d7d17

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -16
app.py CHANGED
@@ -43,22 +43,6 @@ def image_to_base64(image):
43
 
44
  @spaces.GPU()
45
  def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
46
- """
47
- Process an input image using the OCR model based on the specified task.
48
-
49
- Args:
50
- image (Union[dict, np.ndarray, str, PIL.Image]): Input image in various formats
51
- task (str): Type of OCR task to perform
52
- ocr_type (str, optional): Type of OCR processing ('ocr' or 'format')
53
- ocr_box (str, optional): Bounding box coordinates for fine-grained OCR
54
- ocr_color (str, optional): Color specification for fine-grained OCR
55
-
56
- Returns:
57
- tuple: (result_text, html_content, unique_id)
58
- - result_text (str): OCR processing result or error message
59
- - html_content (str): HTML content for visualization if applicable
60
- - unique_id (str): Unique identifier for the processed image
61
- """
62
  if image is None:
63
  return "Error: No image provided", None, None
64
 
@@ -191,6 +175,39 @@ def parse_latex_output(res):
191
 
192
 
193
  def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  res, html_content, unique_id = process_image(image, task, ocr_type, ocr_box, ocr_color)
195
 
196
  if isinstance(res, str) and res.startswith("Error:"):
 
43
 
44
  @spaces.GPU()
45
  def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  if image is None:
47
  return "Error: No image provided", None, None
48
 
 
175
 
176
 
177
  def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
178
+ """
179
+ Main OCR demonstration function that processes images and returns results.
180
+
181
+ Args:
182
+ image (Union[dict, np.ndarray, str, PIL.Image]): Input image in one of these formats:
183
+ - dict: Image component state with keys:
184
+ - path: str | None (Path to local file)
185
+ - url: str | None (Public URL or base64 image)
186
+ - size: int | None (Image size in bytes)
187
+ - orig_name: str | None (Original filename)
188
+ - mime_type: str | None (Image MIME type)
189
+ - is_stream: bool (Always False)
190
+ - meta: dict(str, Any)
191
+ - dict: ImageEditor component state with keys:
192
+ - background: filepath | None
193
+ - layers: list[filepath]
194
+ - composite: filepath | None
195
+ - id: str | None
196
+ - np.ndarray: Raw image array
197
+ - str: Path to image file
198
+ - PIL.Image: PIL Image object
199
+ task (Literal['Plain Text OCR', 'Format Text OCR', 'Fine-grained OCR (Box)',
200
+ 'Fine-grained OCR (Color)', 'Multi-crop OCR', 'Render Formatted OCR']):
201
+ Selected OCR task type
202
+ ocr_type (Literal['ocr', 'format']): Type of OCR processing
203
+ ocr_box (str): Bounding box coordinates in format "x1,y1,x2,y2"
204
+ ocr_color (Literal['red', 'green', 'blue']): Color specification for fine-grained OCR
205
+
206
+ Returns:
207
+ tuple: (formatted_result, html_output)
208
+ - formatted_result (str): Formatted OCR result text
209
+ - html_output (str): HTML visualization if applicable
210
+ """
211
  res, html_content, unique_id = process_image(image, task, ocr_type, ocr_box, ocr_color)
212
 
213
  if isinstance(res, str) and res.startswith("Error:"):