wjm55 commited on
Commit
71e27e4
·
1 Parent(s): 529282d

fixed zip and ner outputs

Browse files
Files changed (1) hide show
  1. app.py +43 -22
app.py CHANGED
@@ -9,6 +9,7 @@ from datetime import datetime
9
  import numpy as np
10
  import os
11
  from gliner import GLiNER
 
12
 
13
  # Initialize GLiNER model
14
  gliner_model = GLiNER.from_pretrained("knowledgator/modern-gliner-bi-large-v1.0")
@@ -113,20 +114,18 @@ def run_example(image, model_id="Qwen/Qwen2-VL-7B-Instruct", run_ner=False, ner_
113
  threshold=0.3
114
  )
115
 
116
- # Format the text with entity annotations
117
- annotated_text = ocr_text
118
- for entity in sorted(ner_results, key=lambda x: x["start"], reverse=True):
119
- entity_text = entity["text"]
120
- entity_label = entity["label"]
121
- annotated_text = (
122
- annotated_text[:entity["start"]] +
123
- f"[{entity_text}]({entity_label})" +
124
- annotated_text[entity["end"]:]
125
- )
126
 
127
- return f"OCR Text:\n{ocr_text}\n\nAnnotated Entities:\n{annotated_text}"
128
 
129
- return ocr_text
130
 
131
  css = """
132
  /* Overall app styling */
@@ -218,7 +217,7 @@ with gr.Blocks(css=css) as demo:
218
 
219
  submit_btn = gr.Button(value="Submit", elem_classes="submit-btn")
220
  with gr.Column(elem_classes="output-container"):
221
- output_text = gr.Textbox(label="Output Text", elem_id="output")
222
 
223
  # Show/hide NER labels based on checkbox
224
  ner_checkbox.change(
@@ -237,35 +236,57 @@ with gr.Blocks(css=css) as demo:
237
  filename = gr.Textbox(label="Save filename (without extension)", placeholder="Enter filename to save")
238
  download_btn = gr.Button("Download Image & Text", elem_classes="submit-btn")
239
 
240
- def create_zip(image, text, fname):
241
- if not image or not text or not fname:
242
  return None
243
 
 
 
 
 
244
  # Create a temporary directory
245
  with tempfile.TemporaryDirectory() as temp_dir:
246
- # Save image with same extension
247
  img_ext = image.format.lower() if hasattr(image, 'format') else 'png'
248
  img_path = os.path.join(temp_dir, f"{fname}.{img_ext}")
249
  image.save(img_path)
250
 
 
 
 
251
  # Save text
252
  txt_path = os.path.join(temp_dir, f"{fname}.txt")
253
  with open(txt_path, 'w', encoding='utf-8') as f:
254
  f.write(text)
255
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
  # Create zip file
257
  zip_path = os.path.join(temp_dir, f"{fname}.zip")
258
  with zipfile.ZipFile(zip_path, 'w') as zipf:
259
  zipf.write(img_path, os.path.basename(img_path))
260
  zipf.write(txt_path, os.path.basename(txt_path))
 
261
 
262
  return zip_path
263
 
264
- download_btn.click(
265
- create_zip,
266
- inputs=[input_img, output_text, filename],
267
- outputs=gr.File(label="Download")
268
- )
269
-
270
  demo.queue(api_open=False)
271
  demo.launch(debug=True)
 
9
  import numpy as np
10
  import os
11
  from gliner import GLiNER
12
+ import json
13
 
14
  # Initialize GLiNER model
15
  gliner_model = GLiNER.from_pretrained("knowledgator/modern-gliner-bi-large-v1.0")
 
114
  threshold=0.3
115
  )
116
 
117
+ # Format entities for highlighting
118
+ entities = []
119
+ for entity in ner_results:
120
+ entities.append((
121
+ entity["start"],
122
+ entity["end"],
123
+ entity["label"]
124
+ ))
 
 
125
 
126
+ return [(ocr_text, entities)]
127
 
128
+ return [(ocr_text, [])]
129
 
130
  css = """
131
  /* Overall app styling */
 
217
 
218
  submit_btn = gr.Button(value="Submit", elem_classes="submit-btn")
219
  with gr.Column(elem_classes="output-container"):
220
+ output_text = gr.HighlightedText(label="Output Text", elem_id="output")
221
 
222
  # Show/hide NER labels based on checkbox
223
  ner_checkbox.change(
 
236
  filename = gr.Textbox(label="Save filename (without extension)", placeholder="Enter filename to save")
237
  download_btn = gr.Button("Download Image & Text", elem_classes="submit-btn")
238
 
239
+ def create_zip(image, text_data, fname):
240
+ if not isinstance(image, (Image.Image, np.ndarray)) or not text_data or not fname:
241
  return None
242
 
243
+ # Convert numpy array to PIL Image if needed
244
+ if isinstance(image, np.ndarray):
245
+ image = Image.fromarray(image)
246
+
247
  # Create a temporary directory
248
  with tempfile.TemporaryDirectory() as temp_dir:
249
+ # Save image
250
  img_ext = image.format.lower() if hasattr(image, 'format') else 'png'
251
  img_path = os.path.join(temp_dir, f"{fname}.{img_ext}")
252
  image.save(img_path)
253
 
254
+ # Extract text and entities from the HighlightedText output
255
+ text, entities = text_data[0] if isinstance(text_data, list) else (text_data, [])
256
+
257
  # Save text
258
  txt_path = os.path.join(temp_dir, f"{fname}.txt")
259
  with open(txt_path, 'w', encoding='utf-8') as f:
260
  f.write(text)
261
 
262
+ # Create JSON with text, entities, and image info
263
+ json_data = {
264
+ "text": text,
265
+ "entities": [
266
+ {
267
+ "start": start,
268
+ "end": end,
269
+ "label": label,
270
+ "text": text[start:end]
271
+ }
272
+ for start, end, label in entities
273
+ ],
274
+ "image_file": f"{fname}.{img_ext}"
275
+ }
276
+
277
+ # Save JSON
278
+ json_path = os.path.join(temp_dir, f"{fname}.json")
279
+ with open(json_path, 'w', encoding='utf-8') as f:
280
+ json.dump(json_data, f, indent=2, ensure_ascii=False)
281
+
282
  # Create zip file
283
  zip_path = os.path.join(temp_dir, f"{fname}.zip")
284
  with zipfile.ZipFile(zip_path, 'w') as zipf:
285
  zipf.write(img_path, os.path.basename(img_path))
286
  zipf.write(txt_path, os.path.basename(txt_path))
287
+ zipf.write(json_path, os.path.basename(json_path))
288
 
289
  return zip_path
290
 
 
 
 
 
 
 
291
  demo.queue(api_open=False)
292
  demo.launch(debug=True)