Spaces:

numind
/

NuMarkdown-8B-Thinking

Sleeping

App Files Files Community

NuMarkdown-8B-Thinking / app.py

liamcripwell

Update app.py

841fa30 verified 4 months ago

raw

history blame

4.01 kB

	import gradio as gr
	import requests
	import base64
	from PIL import Image
	from io import BytesIO

	print("=== DEBUG: Starting app.py ===")

	def encode_image_to_base64(image: Image.Image) -> str:
	buffered = BytesIO()
	image.save(buffered, format="JPEG")
	img_str = base64.b64encode(buffered.getvalue()).decode()
	return f"data:image/jpeg;base64,{img_str}"

	def query_vllm_api(image, temperature, max_tokens=12_000):
	print(f"=== DEBUG: query_vllm_api called with image={image is not None}, temp={temperature} ===")

	if image is None:
	return "No image provided", "No image provided", "Please upload an image first."

	try:
	messages = []
	# Optional: Resize image if needed (to avoid huge uploads)
	max_size = 1024
	if max(image.size) > max_size:
	ratio = max_size / max(image.size)
	new_size = tuple(int(dim * ratio) for dim in image.size)
	image = image.resize(new_size, Image.Resampling.LANCZOS)

	image_b64 = encode_image_to_base64(image)
	messages.append({
	"role": "user",
	"content": [
	{"type": "image_url", "image_url": {"url": image_b64}}
	]
	})

	payload = {
	"model": "numind/NuMarkdown-8B-Thinking",
	"messages": messages,
	"max_tokens": max_tokens,
	"temperature": temperature
	}

	print("=== DEBUG: About to make vLLM API request ===")
	response = requests.post(
	"http://localhost:8000/v1/chat/completions",
	json=payload,
	timeout=60
	)
	response.raise_for_status()
	data = response.json()

	result = data["choices"][0]["message"]["content"]

	# Handle the thinking/answer parsing
	try:
	reasoning = result.split("<think>")[1].split("</think>")[0]
	answer = result.split("<answer>")[1].split("</answer>")[0]
	except IndexError:
	# If no thinking tags, return the full result
	reasoning = "No thinking trace found"
	answer = result

	return reasoning, answer, answer

	except requests.exceptions.RequestException as e:
	error_msg = f"API request failed: {e}"
	print(f"=== DEBUG: Request error: {error_msg} ===")
	return error_msg, error_msg, error_msg
	except Exception as e:
	error_msg = f"Unexpected error: {e}"
	print(f"=== DEBUG: Unexpected error: {error_msg} ===")
	return error_msg, error_msg, error_msg

	print("=== DEBUG: Creating Gradio interface ===")

	with gr.Blocks(title="NuMarkdown-8B-Thinking", theme=gr.themes.Soft()) as demo:
	gr.HTML("""
	<div style="text-align: center; padding: 20px; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); border-radius: 10px; margin-bottom: 20px;">
	<h1 style="color: white; margin: 0; font-size: 2.5em; font-weight: bold;">👁️ NuMarkdown-8B-Thinking</h1>
	<p style="color: rgba(255,255,255,0.9); margin: 10px 0; font-size: 1.2em;">Upload an image to convert to Markdown!</p>
	</div>
	""")

	with gr.Row():
	with gr.Column():
	temperature = gr.Slider(0.1, 1.5, value=0.6, step=0.1, label="Temperature")
	img_in = gr.Image(type="pil", label="Upload Image")
	btn = gr.Button("Generate Response")
	with gr.Column():
	thinking = gr.Textbox(label="Thinking Trace", lines=10)
	raw_answer = gr.Textbox(label="Raw Output", lines=5)
	output = gr.Markdown(label="Response")

	btn.click(
	query_vllm_api,
	inputs=[img_in, temperature],
	outputs=[thinking, raw_answer, output],
	)

	print("=== DEBUG: Gradio interface created ===")

	if __name__ == "__main__":
	print("=== DEBUG: About to launch Gradio ===")
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False
	)
	print("=== DEBUG: Gradio launched ===")