Spaces:
Running
on
Zero
Running
on
Zero
wjm55
commited on
Commit
·
0f72e2b
1
Parent(s):
cf99179
fixed ner and text output
Browse files
app.py
CHANGED
@@ -257,11 +257,7 @@ with gr.Blocks(css=css) as demo:
|
|
257 |
|
258 |
def create_zip(image, text_data, fname):
|
259 |
# Validate inputs
|
260 |
-
if not fname:
|
261 |
-
return None
|
262 |
-
if not text_data:
|
263 |
-
return None
|
264 |
-
if not isinstance(image, (Image.Image, np.ndarray)):
|
265 |
return None
|
266 |
|
267 |
try:
|
@@ -271,27 +267,35 @@ with gr.Blocks(css=css) as demo:
|
|
271 |
|
272 |
# Create a temporary directory
|
273 |
with tempfile.TemporaryDirectory() as temp_dir:
|
274 |
-
# Save image
|
275 |
img_path = os.path.join(temp_dir, f"{fname}.png")
|
276 |
image.save(img_path)
|
277 |
|
278 |
-
# Extract text
|
279 |
-
|
280 |
entities = []
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
286 |
|
287 |
# Save text
|
288 |
txt_path = os.path.join(temp_dir, f"{fname}.txt")
|
289 |
with open(txt_path, 'w', encoding='utf-8') as f:
|
290 |
-
f.write(
|
291 |
|
292 |
# Create JSON with text and entities
|
293 |
json_data = {
|
294 |
-
"text":
|
295 |
"entities": entities,
|
296 |
"image_file": f"{fname}.png"
|
297 |
}
|
@@ -301,7 +305,7 @@ with gr.Blocks(css=css) as demo:
|
|
301 |
with open(json_path, 'w', encoding='utf-8') as f:
|
302 |
json.dump(json_data, f, indent=2, ensure_ascii=False)
|
303 |
|
304 |
-
# Create zip file
|
305 |
output_dir = "downloads"
|
306 |
os.makedirs(output_dir, exist_ok=True)
|
307 |
zip_path = os.path.join(output_dir, f"{fname}.zip")
|
@@ -311,7 +315,7 @@ with gr.Blocks(css=css) as demo:
|
|
311 |
zipf.write(txt_path, os.path.basename(txt_path))
|
312 |
zipf.write(json_path, os.path.basename(json_path))
|
313 |
|
314 |
-
return zip_path
|
315 |
|
316 |
except Exception as e:
|
317 |
print(f"Error creating zip: {str(e)}")
|
|
|
257 |
|
258 |
def create_zip(image, text_data, fname):
|
259 |
# Validate inputs
|
260 |
+
if not fname or not text_data or not isinstance(image, (Image.Image, np.ndarray)):
|
|
|
|
|
|
|
|
|
261 |
return None
|
262 |
|
263 |
try:
|
|
|
267 |
|
268 |
# Create a temporary directory
|
269 |
with tempfile.TemporaryDirectory() as temp_dir:
|
270 |
+
# Save image
|
271 |
img_path = os.path.join(temp_dir, f"{fname}.png")
|
272 |
image.save(img_path)
|
273 |
|
274 |
+
# Extract text and entities from text_data
|
275 |
+
full_text = ""
|
276 |
entities = []
|
277 |
+
current_pos = 0
|
278 |
+
|
279 |
+
# Process the highlighted text data
|
280 |
+
for segment, label in text_data:
|
281 |
+
full_text += segment
|
282 |
+
if label: # If this segment has a label (is an entity)
|
283 |
+
entities.append({
|
284 |
+
"text": segment,
|
285 |
+
"label": label,
|
286 |
+
"start": current_pos,
|
287 |
+
"end": current_pos + len(segment)
|
288 |
+
})
|
289 |
+
current_pos += len(segment)
|
290 |
|
291 |
# Save text
|
292 |
txt_path = os.path.join(temp_dir, f"{fname}.txt")
|
293 |
with open(txt_path, 'w', encoding='utf-8') as f:
|
294 |
+
f.write(full_text)
|
295 |
|
296 |
# Create JSON with text and entities
|
297 |
json_data = {
|
298 |
+
"text": full_text,
|
299 |
"entities": entities,
|
300 |
"image_file": f"{fname}.png"
|
301 |
}
|
|
|
305 |
with open(json_path, 'w', encoding='utf-8') as f:
|
306 |
json.dump(json_data, f, indent=2, ensure_ascii=False)
|
307 |
|
308 |
+
# Create zip file
|
309 |
output_dir = "downloads"
|
310 |
os.makedirs(output_dir, exist_ok=True)
|
311 |
zip_path = os.path.join(output_dir, f"{fname}.zip")
|
|
|
315 |
zipf.write(txt_path, os.path.basename(txt_path))
|
316 |
zipf.write(json_path, os.path.basename(json_path))
|
317 |
|
318 |
+
return zip_path
|
319 |
|
320 |
except Exception as e:
|
321 |
print(f"Error creating zip: {str(e)}")
|