Spaces:
Running
on
Zero
Running
on
Zero
wjm55
commited on
Commit
·
71e27e4
1
Parent(s):
529282d
fixed zip and ner outputs
Browse files
app.py
CHANGED
@@ -9,6 +9,7 @@ from datetime import datetime
|
|
9 |
import numpy as np
|
10 |
import os
|
11 |
from gliner import GLiNER
|
|
|
12 |
|
13 |
# Initialize GLiNER model
|
14 |
gliner_model = GLiNER.from_pretrained("knowledgator/modern-gliner-bi-large-v1.0")
|
@@ -113,20 +114,18 @@ def run_example(image, model_id="Qwen/Qwen2-VL-7B-Instruct", run_ner=False, ner_
|
|
113 |
threshold=0.3
|
114 |
)
|
115 |
|
116 |
-
# Format
|
117 |
-
|
118 |
-
for entity in
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
annotated_text[entity["end"]:]
|
125 |
-
)
|
126 |
|
127 |
-
return
|
128 |
|
129 |
-
return ocr_text
|
130 |
|
131 |
css = """
|
132 |
/* Overall app styling */
|
@@ -218,7 +217,7 @@ with gr.Blocks(css=css) as demo:
|
|
218 |
|
219 |
submit_btn = gr.Button(value="Submit", elem_classes="submit-btn")
|
220 |
with gr.Column(elem_classes="output-container"):
|
221 |
-
output_text = gr.
|
222 |
|
223 |
# Show/hide NER labels based on checkbox
|
224 |
ner_checkbox.change(
|
@@ -237,35 +236,57 @@ with gr.Blocks(css=css) as demo:
|
|
237 |
filename = gr.Textbox(label="Save filename (without extension)", placeholder="Enter filename to save")
|
238 |
download_btn = gr.Button("Download Image & Text", elem_classes="submit-btn")
|
239 |
|
240 |
-
def create_zip(image,
|
241 |
-
if not image or not
|
242 |
return None
|
243 |
|
|
|
|
|
|
|
|
|
244 |
# Create a temporary directory
|
245 |
with tempfile.TemporaryDirectory() as temp_dir:
|
246 |
-
# Save image
|
247 |
img_ext = image.format.lower() if hasattr(image, 'format') else 'png'
|
248 |
img_path = os.path.join(temp_dir, f"{fname}.{img_ext}")
|
249 |
image.save(img_path)
|
250 |
|
|
|
|
|
|
|
251 |
# Save text
|
252 |
txt_path = os.path.join(temp_dir, f"{fname}.txt")
|
253 |
with open(txt_path, 'w', encoding='utf-8') as f:
|
254 |
f.write(text)
|
255 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
256 |
# Create zip file
|
257 |
zip_path = os.path.join(temp_dir, f"{fname}.zip")
|
258 |
with zipfile.ZipFile(zip_path, 'w') as zipf:
|
259 |
zipf.write(img_path, os.path.basename(img_path))
|
260 |
zipf.write(txt_path, os.path.basename(txt_path))
|
|
|
261 |
|
262 |
return zip_path
|
263 |
|
264 |
-
download_btn.click(
|
265 |
-
create_zip,
|
266 |
-
inputs=[input_img, output_text, filename],
|
267 |
-
outputs=gr.File(label="Download")
|
268 |
-
)
|
269 |
-
|
270 |
demo.queue(api_open=False)
|
271 |
demo.launch(debug=True)
|
|
|
9 |
import numpy as np
|
10 |
import os
|
11 |
from gliner import GLiNER
|
12 |
+
import json
|
13 |
|
14 |
# Initialize GLiNER model
|
15 |
gliner_model = GLiNER.from_pretrained("knowledgator/modern-gliner-bi-large-v1.0")
|
|
|
114 |
threshold=0.3
|
115 |
)
|
116 |
|
117 |
+
# Format entities for highlighting
|
118 |
+
entities = []
|
119 |
+
for entity in ner_results:
|
120 |
+
entities.append((
|
121 |
+
entity["start"],
|
122 |
+
entity["end"],
|
123 |
+
entity["label"]
|
124 |
+
))
|
|
|
|
|
125 |
|
126 |
+
return [(ocr_text, entities)]
|
127 |
|
128 |
+
return [(ocr_text, [])]
|
129 |
|
130 |
css = """
|
131 |
/* Overall app styling */
|
|
|
217 |
|
218 |
submit_btn = gr.Button(value="Submit", elem_classes="submit-btn")
|
219 |
with gr.Column(elem_classes="output-container"):
|
220 |
+
output_text = gr.HighlightedText(label="Output Text", elem_id="output")
|
221 |
|
222 |
# Show/hide NER labels based on checkbox
|
223 |
ner_checkbox.change(
|
|
|
236 |
filename = gr.Textbox(label="Save filename (without extension)", placeholder="Enter filename to save")
|
237 |
download_btn = gr.Button("Download Image & Text", elem_classes="submit-btn")
|
238 |
|
239 |
+
def create_zip(image, text_data, fname):
|
240 |
+
if not isinstance(image, (Image.Image, np.ndarray)) or not text_data or not fname:
|
241 |
return None
|
242 |
|
243 |
+
# Convert numpy array to PIL Image if needed
|
244 |
+
if isinstance(image, np.ndarray):
|
245 |
+
image = Image.fromarray(image)
|
246 |
+
|
247 |
# Create a temporary directory
|
248 |
with tempfile.TemporaryDirectory() as temp_dir:
|
249 |
+
# Save image
|
250 |
img_ext = image.format.lower() if hasattr(image, 'format') else 'png'
|
251 |
img_path = os.path.join(temp_dir, f"{fname}.{img_ext}")
|
252 |
image.save(img_path)
|
253 |
|
254 |
+
# Extract text and entities from the HighlightedText output
|
255 |
+
text, entities = text_data[0] if isinstance(text_data, list) else (text_data, [])
|
256 |
+
|
257 |
# Save text
|
258 |
txt_path = os.path.join(temp_dir, f"{fname}.txt")
|
259 |
with open(txt_path, 'w', encoding='utf-8') as f:
|
260 |
f.write(text)
|
261 |
|
262 |
+
# Create JSON with text, entities, and image info
|
263 |
+
json_data = {
|
264 |
+
"text": text,
|
265 |
+
"entities": [
|
266 |
+
{
|
267 |
+
"start": start,
|
268 |
+
"end": end,
|
269 |
+
"label": label,
|
270 |
+
"text": text[start:end]
|
271 |
+
}
|
272 |
+
for start, end, label in entities
|
273 |
+
],
|
274 |
+
"image_file": f"{fname}.{img_ext}"
|
275 |
+
}
|
276 |
+
|
277 |
+
# Save JSON
|
278 |
+
json_path = os.path.join(temp_dir, f"{fname}.json")
|
279 |
+
with open(json_path, 'w', encoding='utf-8') as f:
|
280 |
+
json.dump(json_data, f, indent=2, ensure_ascii=False)
|
281 |
+
|
282 |
# Create zip file
|
283 |
zip_path = os.path.join(temp_dir, f"{fname}.zip")
|
284 |
with zipfile.ZipFile(zip_path, 'w') as zipf:
|
285 |
zipf.write(img_path, os.path.basename(img_path))
|
286 |
zipf.write(txt_path, os.path.basename(txt_path))
|
287 |
+
zipf.write(json_path, os.path.basename(json_path))
|
288 |
|
289 |
return zip_path
|
290 |
|
|
|
|
|
|
|
|
|
|
|
|
|
291 |
demo.queue(api_open=False)
|
292 |
demo.launch(debug=True)
|