liamcripwell commited on
Commit
152508e
·
verified ·
1 Parent(s): b9ef50d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -38
app.py CHANGED
@@ -1,42 +1,93 @@
1
  import gradio as gr
2
- import os
3
- import sys
4
-
5
- print("=== DEBUG: Starting minimal Gradio test ===")
6
- print(f"Python version: {sys.version}")
7
- print(f"Gradio version: {gr.__version__}")
8
- print(f"Working directory: {os.getcwd()}")
9
- print(f"PORT env var: {os.environ.get('PORT', 'Not set')}")
10
-
11
- def simple_function(text):
12
- return f"You entered: {text}"
13
-
14
- # Minimal interface
15
- with gr.Blocks(title="Debug Test") as demo:
16
- gr.Markdown("# Debug Test - If you see this, Gradio is working!")
17
-
18
- with gr.Row():
19
- text_input = gr.Textbox(label="Test Input")
20
- text_output = gr.Textbox(label="Test Output")
21
- btn = gr.Button("Test")
22
-
23
- btn.click(simple_function, inputs=text_input, outputs=text_output)
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
- if __name__ == "__main__":
26
- port = int(os.environ.get("PORT", 7860))
27
- print(f"=== Attempting to launch on 0.0.0.0:{port} ===")
28
-
29
  try:
30
- demo.launch(
31
- server_name="0.0.0.0",
32
- server_port=port,
33
- share=False,
34
- show_error=True
35
  )
36
- print("=== Gradio launched successfully ===")
37
- except Exception as e:
38
- print(f"=== ERROR: Gradio failed to launch ===")
39
- print(f"Error: {e}")
40
- import traceback
41
- traceback.print_exc()
42
- sys.exit(1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import requests
3
+ import base64
4
+ from PIL import Image
5
+ from io import BytesIO
6
+
7
+ def encode_image_to_base64(image: Image.Image) -> str:
8
+ buffered = BytesIO()
9
+ image.save(buffered, format="JPEG")
10
+ img_str = base64.b64encode(buffered.getvalue()).decode()
11
+ return f"data:image/jpeg;base64,{img_str}"
12
+
13
+ def query_vllm_api(image, temperature, max_tokens=12_000):
14
+ messages = []
15
+ if image is not None:
16
+ # Optional: Resize image if needed (to avoid huge uploads)
17
+ max_size = 1024
18
+ if max(image.size) > max_size:
19
+ ratio = max_size / max(image.size)
20
+ new_size = tuple(int(dim * ratio) for dim in image.size)
21
+ image = image.resize(new_size, Image.Resampling.LANCZOS)
22
+
23
+ image_b64 = encode_image_to_base64(image)
24
+ messages.append({
25
+ "role": "user",
26
+ "content": [
27
+ {"type": "image_url", "image_url": {"url": image_b64}}
28
+ ]
29
+ })
30
+
31
+ payload = {
32
+ "model": "numind/NuMarkdown-8B-Thinking",
33
+ "messages": messages,
34
+ "max_tokens": max_tokens,
35
+ "temperature": temperature
36
+ }
37
 
 
 
 
 
38
  try:
39
+ response = requests.post(
40
+ "http://localhost:8000/v1/chat/completions",
41
+ json=payload,
42
+ timeout=60
 
43
  )
44
+ response.raise_for_status()
45
+ data = response.json()
46
+
47
+ result = data["choices"][0]["message"]["content"]
48
+ reasoning = result.split("<think>")[1].split("</think>")[0]
49
+ answer = result.split("<answer>")[1].split("</answer>")[0]
50
+
51
+ return reasoning, answer, answer
52
+ except requests.exceptions.RequestException as e:
53
+ return f"API request failed: {e}"
54
+
55
+ with gr.Blocks(title="NuMarkdown-8B-Thinking", theme=gr.themes.Soft()) as demo:
56
+ # Clean banner with centered content
57
+ gr.HTML("""
58
+ <div style="text-align: center; padding: 20px; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); border-radius: 10px; margin-bottom: 20px;">
59
+ <h1 style="color: white; margin: 0; font-size: 2.5em; font-weight: bold;">👁️ NuMarkdown-8B-Thinking</h1>
60
+ <p style="color: rgba(255,255,255,0.9); margin: 10px 0; font-size: 1.2em;">Upload an image to convert to Markdown!</p>
61
+ <div style="margin-top: 15px;">
62
+ <a href="https://nuextract.ai/" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">🖥️ API / Platform</a>
63
+ <span style="color: rgba(255,255,255,0.7);">|</span>
64
+ <a href="https://discord.gg/3tsEtJNCDe" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">🗣️ Discord</a>
65
+ <span style="color: rgba(255,255,255,0.7);">|</span>
66
+ <a href="https://github.com/numindai/NuMarkdown" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">🔗 GitHub</a>
67
+ <span style="color: rgba(255,255,255,0.7);">|</span>
68
+ <a href="https://huggingface.co/numind/NuMarkdown-8B-Thinking" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">🤗 Model</a>
69
+ </div>
70
+ </div>
71
+
72
+ <p>NuMarkdown-8B-Thinking is the first reasoning OCR VLM. It is specifically trained to convert documents into clean Markdown files, well suited for RAG applications. It generates thinking tokens to figure out the layout of the document before generating the Markdown file. It is particularly good at understanding documents with weird layouts and complex tables.</p>
73
+ <p>NOTE: In this space we downsize large images and restrict the maximum output of the model, so performance could improve if you run the model yourself.</p>
74
+ """)
75
+
76
+ with gr.Row():
77
+ with gr.Column():
78
+ temperature = gr.Slider(0.1, 1.5, value=0.6, step=0.1, label="Temperature")
79
+ img_in = gr.Image(type="pil", label="Upload Image")
80
+ btn = gr.Button("Generate Response")
81
+ with gr.Column():
82
+ thinking = gr.Textbox(label="Thinking Trace", lines=10)
83
+ raw_answer = gr.Textbox(label="Raw Output", lines=5)
84
+ output = gr.Markdown(label="Response")
85
+
86
+ btn.click(
87
+ query_vllm_api,
88
+ inputs=[img_in, temperature],
89
+ outputs=[thinking, raw_answer, output],
90
+ )
91
+
92
+ if __name__ == "__main__":
93
+ demo.launch(share=True)