Spaces:
Running
on
Zero
Running
on
Zero
wjm55
commited on
Commit
·
71e27e4
1
Parent(s):
529282d
fixed zip and ner outputs
Browse files
app.py
CHANGED
|
@@ -9,6 +9,7 @@ from datetime import datetime
|
|
| 9 |
import numpy as np
|
| 10 |
import os
|
| 11 |
from gliner import GLiNER
|
|
|
|
| 12 |
|
| 13 |
# Initialize GLiNER model
|
| 14 |
gliner_model = GLiNER.from_pretrained("knowledgator/modern-gliner-bi-large-v1.0")
|
|
@@ -113,20 +114,18 @@ def run_example(image, model_id="Qwen/Qwen2-VL-7B-Instruct", run_ner=False, ner_
|
|
| 113 |
threshold=0.3
|
| 114 |
)
|
| 115 |
|
| 116 |
-
# Format
|
| 117 |
-
|
| 118 |
-
for entity in
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
annotated_text[entity["end"]:]
|
| 125 |
-
)
|
| 126 |
|
| 127 |
-
return
|
| 128 |
|
| 129 |
-
return ocr_text
|
| 130 |
|
| 131 |
css = """
|
| 132 |
/* Overall app styling */
|
|
@@ -218,7 +217,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 218 |
|
| 219 |
submit_btn = gr.Button(value="Submit", elem_classes="submit-btn")
|
| 220 |
with gr.Column(elem_classes="output-container"):
|
| 221 |
-
output_text = gr.
|
| 222 |
|
| 223 |
# Show/hide NER labels based on checkbox
|
| 224 |
ner_checkbox.change(
|
|
@@ -237,35 +236,57 @@ with gr.Blocks(css=css) as demo:
|
|
| 237 |
filename = gr.Textbox(label="Save filename (without extension)", placeholder="Enter filename to save")
|
| 238 |
download_btn = gr.Button("Download Image & Text", elem_classes="submit-btn")
|
| 239 |
|
| 240 |
-
def create_zip(image,
|
| 241 |
-
if not image or not
|
| 242 |
return None
|
| 243 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
# Create a temporary directory
|
| 245 |
with tempfile.TemporaryDirectory() as temp_dir:
|
| 246 |
-
# Save image
|
| 247 |
img_ext = image.format.lower() if hasattr(image, 'format') else 'png'
|
| 248 |
img_path = os.path.join(temp_dir, f"{fname}.{img_ext}")
|
| 249 |
image.save(img_path)
|
| 250 |
|
|
|
|
|
|
|
|
|
|
| 251 |
# Save text
|
| 252 |
txt_path = os.path.join(temp_dir, f"{fname}.txt")
|
| 253 |
with open(txt_path, 'w', encoding='utf-8') as f:
|
| 254 |
f.write(text)
|
| 255 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
# Create zip file
|
| 257 |
zip_path = os.path.join(temp_dir, f"{fname}.zip")
|
| 258 |
with zipfile.ZipFile(zip_path, 'w') as zipf:
|
| 259 |
zipf.write(img_path, os.path.basename(img_path))
|
| 260 |
zipf.write(txt_path, os.path.basename(txt_path))
|
|
|
|
| 261 |
|
| 262 |
return zip_path
|
| 263 |
|
| 264 |
-
download_btn.click(
|
| 265 |
-
create_zip,
|
| 266 |
-
inputs=[input_img, output_text, filename],
|
| 267 |
-
outputs=gr.File(label="Download")
|
| 268 |
-
)
|
| 269 |
-
|
| 270 |
demo.queue(api_open=False)
|
| 271 |
demo.launch(debug=True)
|
|
|
|
| 9 |
import numpy as np
|
| 10 |
import os
|
| 11 |
from gliner import GLiNER
|
| 12 |
+
import json
|
| 13 |
|
| 14 |
# Initialize GLiNER model
|
| 15 |
gliner_model = GLiNER.from_pretrained("knowledgator/modern-gliner-bi-large-v1.0")
|
|
|
|
| 114 |
threshold=0.3
|
| 115 |
)
|
| 116 |
|
| 117 |
+
# Format entities for highlighting
|
| 118 |
+
entities = []
|
| 119 |
+
for entity in ner_results:
|
| 120 |
+
entities.append((
|
| 121 |
+
entity["start"],
|
| 122 |
+
entity["end"],
|
| 123 |
+
entity["label"]
|
| 124 |
+
))
|
|
|
|
|
|
|
| 125 |
|
| 126 |
+
return [(ocr_text, entities)]
|
| 127 |
|
| 128 |
+
return [(ocr_text, [])]
|
| 129 |
|
| 130 |
css = """
|
| 131 |
/* Overall app styling */
|
|
|
|
| 217 |
|
| 218 |
submit_btn = gr.Button(value="Submit", elem_classes="submit-btn")
|
| 219 |
with gr.Column(elem_classes="output-container"):
|
| 220 |
+
output_text = gr.HighlightedText(label="Output Text", elem_id="output")
|
| 221 |
|
| 222 |
# Show/hide NER labels based on checkbox
|
| 223 |
ner_checkbox.change(
|
|
|
|
| 236 |
filename = gr.Textbox(label="Save filename (without extension)", placeholder="Enter filename to save")
|
| 237 |
download_btn = gr.Button("Download Image & Text", elem_classes="submit-btn")
|
| 238 |
|
| 239 |
+
def create_zip(image, text_data, fname):
|
| 240 |
+
if not isinstance(image, (Image.Image, np.ndarray)) or not text_data or not fname:
|
| 241 |
return None
|
| 242 |
|
| 243 |
+
# Convert numpy array to PIL Image if needed
|
| 244 |
+
if isinstance(image, np.ndarray):
|
| 245 |
+
image = Image.fromarray(image)
|
| 246 |
+
|
| 247 |
# Create a temporary directory
|
| 248 |
with tempfile.TemporaryDirectory() as temp_dir:
|
| 249 |
+
# Save image
|
| 250 |
img_ext = image.format.lower() if hasattr(image, 'format') else 'png'
|
| 251 |
img_path = os.path.join(temp_dir, f"{fname}.{img_ext}")
|
| 252 |
image.save(img_path)
|
| 253 |
|
| 254 |
+
# Extract text and entities from the HighlightedText output
|
| 255 |
+
text, entities = text_data[0] if isinstance(text_data, list) else (text_data, [])
|
| 256 |
+
|
| 257 |
# Save text
|
| 258 |
txt_path = os.path.join(temp_dir, f"{fname}.txt")
|
| 259 |
with open(txt_path, 'w', encoding='utf-8') as f:
|
| 260 |
f.write(text)
|
| 261 |
|
| 262 |
+
# Create JSON with text, entities, and image info
|
| 263 |
+
json_data = {
|
| 264 |
+
"text": text,
|
| 265 |
+
"entities": [
|
| 266 |
+
{
|
| 267 |
+
"start": start,
|
| 268 |
+
"end": end,
|
| 269 |
+
"label": label,
|
| 270 |
+
"text": text[start:end]
|
| 271 |
+
}
|
| 272 |
+
for start, end, label in entities
|
| 273 |
+
],
|
| 274 |
+
"image_file": f"{fname}.{img_ext}"
|
| 275 |
+
}
|
| 276 |
+
|
| 277 |
+
# Save JSON
|
| 278 |
+
json_path = os.path.join(temp_dir, f"{fname}.json")
|
| 279 |
+
with open(json_path, 'w', encoding='utf-8') as f:
|
| 280 |
+
json.dump(json_data, f, indent=2, ensure_ascii=False)
|
| 281 |
+
|
| 282 |
# Create zip file
|
| 283 |
zip_path = os.path.join(temp_dir, f"{fname}.zip")
|
| 284 |
with zipfile.ZipFile(zip_path, 'w') as zipf:
|
| 285 |
zipf.write(img_path, os.path.basename(img_path))
|
| 286 |
zipf.write(txt_path, os.path.basename(txt_path))
|
| 287 |
+
zipf.write(json_path, os.path.basename(json_path))
|
| 288 |
|
| 289 |
return zip_path
|
| 290 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
demo.queue(api_open=False)
|
| 292 |
demo.launch(debug=True)
|