Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -43,22 +43,6 @@ def image_to_base64(image):
|
|
43 |
|
44 |
@spaces.GPU()
|
45 |
def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
|
46 |
-
"""
|
47 |
-
Process an input image using the OCR model based on the specified task.
|
48 |
-
|
49 |
-
Args:
|
50 |
-
image (Union[dict, np.ndarray, str, PIL.Image]): Input image in various formats
|
51 |
-
task (str): Type of OCR task to perform
|
52 |
-
ocr_type (str, optional): Type of OCR processing ('ocr' or 'format')
|
53 |
-
ocr_box (str, optional): Bounding box coordinates for fine-grained OCR
|
54 |
-
ocr_color (str, optional): Color specification for fine-grained OCR
|
55 |
-
|
56 |
-
Returns:
|
57 |
-
tuple: (result_text, html_content, unique_id)
|
58 |
-
- result_text (str): OCR processing result or error message
|
59 |
-
- html_content (str): HTML content for visualization if applicable
|
60 |
-
- unique_id (str): Unique identifier for the processed image
|
61 |
-
"""
|
62 |
if image is None:
|
63 |
return "Error: No image provided", None, None
|
64 |
|
@@ -191,6 +175,39 @@ def parse_latex_output(res):
|
|
191 |
|
192 |
|
193 |
def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
194 |
res, html_content, unique_id = process_image(image, task, ocr_type, ocr_box, ocr_color)
|
195 |
|
196 |
if isinstance(res, str) and res.startswith("Error:"):
|
|
|
43 |
|
44 |
@spaces.GPU()
|
45 |
def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
if image is None:
|
47 |
return "Error: No image provided", None, None
|
48 |
|
|
|
175 |
|
176 |
|
177 |
def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
|
178 |
+
"""
|
179 |
+
Main OCR demonstration function that processes images and returns results.
|
180 |
+
|
181 |
+
Args:
|
182 |
+
image (Union[dict, np.ndarray, str, PIL.Image]): Input image in one of these formats:
|
183 |
+
- dict: Image component state with keys:
|
184 |
+
- path: str | None (Path to local file)
|
185 |
+
- url: str | None (Public URL or base64 image)
|
186 |
+
- size: int | None (Image size in bytes)
|
187 |
+
- orig_name: str | None (Original filename)
|
188 |
+
- mime_type: str | None (Image MIME type)
|
189 |
+
- is_stream: bool (Always False)
|
190 |
+
- meta: dict(str, Any)
|
191 |
+
- dict: ImageEditor component state with keys:
|
192 |
+
- background: filepath | None
|
193 |
+
- layers: list[filepath]
|
194 |
+
- composite: filepath | None
|
195 |
+
- id: str | None
|
196 |
+
- np.ndarray: Raw image array
|
197 |
+
- str: Path to image file
|
198 |
+
- PIL.Image: PIL Image object
|
199 |
+
task (Literal['Plain Text OCR', 'Format Text OCR', 'Fine-grained OCR (Box)',
|
200 |
+
'Fine-grained OCR (Color)', 'Multi-crop OCR', 'Render Formatted OCR']):
|
201 |
+
Selected OCR task type
|
202 |
+
ocr_type (Literal['ocr', 'format']): Type of OCR processing
|
203 |
+
ocr_box (str): Bounding box coordinates in format "x1,y1,x2,y2"
|
204 |
+
ocr_color (Literal['red', 'green', 'blue']): Color specification for fine-grained OCR
|
205 |
+
|
206 |
+
Returns:
|
207 |
+
tuple: (formatted_result, html_output)
|
208 |
+
- formatted_result (str): Formatted OCR result text
|
209 |
+
- html_output (str): HTML visualization if applicable
|
210 |
+
"""
|
211 |
res, html_content, unique_id = process_image(image, task, ocr_type, ocr_box, ocr_color)
|
212 |
|
213 |
if isinstance(res, str) and res.startswith("Error:"):
|