Spaces:
Running
on
L40S
Running
on
L40S
Update app.py
Browse files
app.py
CHANGED
@@ -4,6 +4,8 @@ import base64
|
|
4 |
from PIL import Image
|
5 |
from io import BytesIO
|
6 |
|
|
|
|
|
7 |
def encode_image_to_base64(image: Image.Image) -> str:
|
8 |
buffered = BytesIO()
|
9 |
image.save(buffered, format="JPEG")
|
@@ -11,8 +13,13 @@ def encode_image_to_base64(image: Image.Image) -> str:
|
|
11 |
return f"data:image/jpeg;base64,{img_str}"
|
12 |
|
13 |
def query_vllm_api(image, temperature, max_tokens=12_000):
|
14 |
-
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
16 |
# Optional: Resize image if needed (to avoid huge uploads)
|
17 |
max_size = 1024
|
18 |
if max(image.size) > max_size:
|
@@ -28,14 +35,14 @@ def query_vllm_api(image, temperature, max_tokens=12_000):
|
|
28 |
]
|
29 |
})
|
30 |
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
|
38 |
-
|
39 |
response = requests.post(
|
40 |
"http://localhost:8000/v1/chat/completions",
|
41 |
json=payload,
|
@@ -45,32 +52,35 @@ def query_vllm_api(image, temperature, max_tokens=12_000):
|
|
45 |
data = response.json()
|
46 |
|
47 |
result = data["choices"][0]["message"]["content"]
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
return reasoning, answer, answer
|
|
|
52 |
except requests.exceptions.RequestException as e:
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
with gr.Blocks(title="NuMarkdown-8B-Thinking", theme=gr.themes.Soft()) as demo:
|
56 |
-
# Clean banner with centered content
|
57 |
gr.HTML("""
|
58 |
<div style="text-align: center; padding: 20px; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); border-radius: 10px; margin-bottom: 20px;">
|
59 |
<h1 style="color: white; margin: 0; font-size: 2.5em; font-weight: bold;">๐๏ธ NuMarkdown-8B-Thinking</h1>
|
60 |
<p style="color: rgba(255,255,255,0.9); margin: 10px 0; font-size: 1.2em;">Upload an image to convert to Markdown!</p>
|
61 |
-
<div style="margin-top: 15px;">
|
62 |
-
<a href="https://nuextract.ai/" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">๐ฅ๏ธ API / Platform</a>
|
63 |
-
<span style="color: rgba(255,255,255,0.7);">|</span>
|
64 |
-
<a href="https://discord.gg/3tsEtJNCDe" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">๐ฃ๏ธ Discord</a>
|
65 |
-
<span style="color: rgba(255,255,255,0.7);">|</span>
|
66 |
-
<a href="https://github.com/numindai/NuMarkdown" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">๐ GitHub</a>
|
67 |
-
<span style="color: rgba(255,255,255,0.7);">|</span>
|
68 |
-
<a href="https://huggingface.co/numind/NuMarkdown-8B-Thinking" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">๐ค Model</a>
|
69 |
-
</div>
|
70 |
</div>
|
71 |
-
|
72 |
-
<p>NuMarkdown-8B-Thinking is the first reasoning OCR VLM. It is specifically trained to convert documents into clean Markdown files, well suited for RAG applications. It generates thinking tokens to figure out the layout of the document before generating the Markdown file. It is particularly good at understanding documents with weird layouts and complex tables.</p>
|
73 |
-
<p>NOTE: In this space we downsize large images and restrict the maximum output of the model, so performance could improve if you run the model yourself.</p>
|
74 |
""")
|
75 |
|
76 |
with gr.Row():
|
@@ -89,6 +99,13 @@ with gr.Blocks(title="NuMarkdown-8B-Thinking", theme=gr.themes.Soft()) as demo:
|
|
89 |
outputs=[thinking, raw_answer, output],
|
90 |
)
|
91 |
|
|
|
|
|
92 |
if __name__ == "__main__":
|
93 |
-
print("
|
94 |
-
demo.launch(
|
|
|
|
|
|
|
|
|
|
|
|
4 |
from PIL import Image
|
5 |
from io import BytesIO
|
6 |
|
7 |
+
print("=== DEBUG: Starting app.py ===")
|
8 |
+
|
9 |
def encode_image_to_base64(image: Image.Image) -> str:
|
10 |
buffered = BytesIO()
|
11 |
image.save(buffered, format="JPEG")
|
|
|
13 |
return f"data:image/jpeg;base64,{img_str}"
|
14 |
|
15 |
def query_vllm_api(image, temperature, max_tokens=12_000):
|
16 |
+
print(f"=== DEBUG: query_vllm_api called with image={image is not None}, temp={temperature} ===")
|
17 |
+
|
18 |
+
if image is None:
|
19 |
+
return "No image provided", "No image provided", "Please upload an image first."
|
20 |
+
|
21 |
+
try:
|
22 |
+
messages = []
|
23 |
# Optional: Resize image if needed (to avoid huge uploads)
|
24 |
max_size = 1024
|
25 |
if max(image.size) > max_size:
|
|
|
35 |
]
|
36 |
})
|
37 |
|
38 |
+
payload = {
|
39 |
+
"model": "numind/NuMarkdown-8B-Thinking",
|
40 |
+
"messages": messages,
|
41 |
+
"max_tokens": max_tokens,
|
42 |
+
"temperature": temperature
|
43 |
+
}
|
44 |
|
45 |
+
print("=== DEBUG: About to make vLLM API request ===")
|
46 |
response = requests.post(
|
47 |
"http://localhost:8000/v1/chat/completions",
|
48 |
json=payload,
|
|
|
52 |
data = response.json()
|
53 |
|
54 |
result = data["choices"][0]["message"]["content"]
|
55 |
+
|
56 |
+
# Handle the thinking/answer parsing
|
57 |
+
try:
|
58 |
+
reasoning = result.split("<think>")[1].split("</think>")[0]
|
59 |
+
answer = result.split("<answer>")[1].split("</answer>")[0]
|
60 |
+
except IndexError:
|
61 |
+
# If no thinking tags, return the full result
|
62 |
+
reasoning = "No thinking trace found"
|
63 |
+
answer = result
|
64 |
|
65 |
return reasoning, answer, answer
|
66 |
+
|
67 |
except requests.exceptions.RequestException as e:
|
68 |
+
error_msg = f"API request failed: {e}"
|
69 |
+
print(f"=== DEBUG: Request error: {error_msg} ===")
|
70 |
+
return error_msg, error_msg, error_msg
|
71 |
+
except Exception as e:
|
72 |
+
error_msg = f"Unexpected error: {e}"
|
73 |
+
print(f"=== DEBUG: Unexpected error: {error_msg} ===")
|
74 |
+
return error_msg, error_msg, error_msg
|
75 |
+
|
76 |
+
print("=== DEBUG: Creating Gradio interface ===")
|
77 |
|
78 |
with gr.Blocks(title="NuMarkdown-8B-Thinking", theme=gr.themes.Soft()) as demo:
|
|
|
79 |
gr.HTML("""
|
80 |
<div style="text-align: center; padding: 20px; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); border-radius: 10px; margin-bottom: 20px;">
|
81 |
<h1 style="color: white; margin: 0; font-size: 2.5em; font-weight: bold;">๐๏ธ NuMarkdown-8B-Thinking</h1>
|
82 |
<p style="color: rgba(255,255,255,0.9); margin: 10px 0; font-size: 1.2em;">Upload an image to convert to Markdown!</p>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
</div>
|
|
|
|
|
|
|
84 |
""")
|
85 |
|
86 |
with gr.Row():
|
|
|
99 |
outputs=[thinking, raw_answer, output],
|
100 |
)
|
101 |
|
102 |
+
print("=== DEBUG: Gradio interface created ===")
|
103 |
+
|
104 |
if __name__ == "__main__":
|
105 |
+
print("=== DEBUG: About to launch Gradio ===")
|
106 |
+
demo.launch(
|
107 |
+
server_name="0.0.0.0",
|
108 |
+
server_port=7860,
|
109 |
+
share=False
|
110 |
+
)
|
111 |
+
print("=== DEBUG: Gradio launched ===")
|