ARCQUB commited on
Commit
97fc69c
·
verified ·
1 Parent(s): 8981ad9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -35
app.py CHANGED
@@ -2,20 +2,10 @@ import gradio as gr
2
  import importlib
3
  from PIL import Image
4
  import json
5
- import spaces
6
 
7
- # === Model Mapping ===
8
- MODEL_MAP = {
9
- #"Qwen": "models.qwen",
10
- #"Pixtral": "models.pixtral",
11
- #"Aya Vision": "models.aya_vision",
12
- "GPT-4o": "models.gpt4o"
13
- }
14
-
15
- # === Load Model
16
- def load_model_runner(model_name):
17
- module = importlib.import_module(MODEL_MAP[model_name])
18
- return module.run_model
19
 
20
  # === Format Raw JSON Output
21
  def format_result_json(output):
@@ -138,15 +128,12 @@ def format_pretty_view(output):
138
 
139
  return "\n".join(lines).strip()
140
 
141
- # === Main Inference Handler
142
- def process_single_image(model_name, image_file):
143
- runner = load_model_runner(model_name)
144
  image = Image.open(image_file.name).convert("RGB")
145
-
146
- result = runner(image)
147
-
148
  parsed_json = result.get("json")
149
- raw_text = result.get("raw", "")
150
 
151
  if parsed_json:
152
  json_output = format_result_json(parsed_json)
@@ -157,27 +144,23 @@ def process_single_image(model_name, image_file):
157
 
158
  return image, json_output, pretty_output
159
 
160
- # === Gradio UI
161
  iface = gr.Interface(
162
- fn=process_single_image,
163
- inputs=[
164
- gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Select Vision Model"),
165
- gr.File(file_types=["image"], label="Upload a BPMN Image")
166
- ],
167
  outputs=[
168
- gr.Image(label="Input Image"),
169
- gr.Textbox(label="Raw JSON Output (Technical)", lines=20),
170
- gr.Textbox(label="Prettified View (User-Friendly)", lines=25)
171
  ],
172
- title="🖼️ Vision Model Extractor - JSON + Pretty View",
173
- description="Upload a BPMN image and select a vision model to extract structured output. GPT-4o uses an API key from your Hugging Face Space Secret.",
174
- flagging_mode="never"
175
  )
176
 
177
- # === Enable GPU mode and launch
178
- #@spaces.GPU
179
  def main():
180
- iface.launch()
181
 
182
  if __name__ == "__main__":
183
  main()
 
2
  import importlib
3
  from PIL import Image
4
  import json
5
+ import os
6
 
7
+ # === Load the GPT-4o module only
8
+ from models import gpt4o_pix2struct_ocr
 
 
 
 
 
 
 
 
 
 
9
 
10
  # === Format Raw JSON Output
11
  def format_result_json(output):
 
128
 
129
  return "\n".join(lines).strip()
130
 
131
+ # === Inference Handler (GPT-4o only)
132
+ def process_image(image_file):
 
133
  image = Image.open(image_file.name).convert("RGB")
134
+ result = gpt4o_pix2struct_ocr.run_model(image)
 
 
135
  parsed_json = result.get("json")
136
+ raw_text = result.get("raw")
137
 
138
  if parsed_json:
139
  json_output = format_result_json(parsed_json)
 
144
 
145
  return image, json_output, pretty_output
146
 
147
+ # === Gradio Interface
148
  iface = gr.Interface(
149
+ fn=process_image,
150
+ inputs=[gr.File(file_types=["image"], label="Upload a BPMN Diagram Image")],
 
 
 
151
  outputs=[
152
+ gr.Image(label="📷 Input Image"),
153
+ gr.Textbox(label="🧠 Raw JSON Output", lines=20),
154
+ gr.Textbox(label="📋 Prettified View", lines=25)
155
  ],
156
+ title="🧩 BPMN Extractor using GPT-4o + OCR",
157
+ description="Upload a BPMN diagram image. Extracts structured JSON using GPT-4o and Pix2Struct OCR. Runs on CPU-only Space.",
158
+ allow_flagging="never"
159
  )
160
 
161
+ # === Launch without GPU
 
162
  def main():
163
+ iface.launch(ssr=False)
164
 
165
  if __name__ == "__main__":
166
  main()