Spaces:

rdezwart
/

FoodIdentifier

Running

FoodIdentifier / app.py

Move Gradio app creation into main block

60a2be3 about 1 year ago

1.74 kB

	from threading import Thread

	import gradio as gr
	from transformers import PreTrainedModel
	from transformers import TextIteratorStreamer, AutoModelForCausalLM, AutoTokenizer

	# Moondream does not support the HuggingFace pipeline system, so we have to do it manually
	moondream_id = "vikhyatk/moondream2"
	moondream_revision = "2024-04-02"
	moondream_tokenizer = AutoTokenizer.from_pretrained(moondream_id, revision=moondream_revision)
	moondream: PreTrainedModel = AutoModelForCausalLM.from_pretrained(
	moondream_id, trust_remote_code=True, revision=moondream_revision, torch_dtype="auto"
	)
	moondream.eval()


	def answer_question(_img, _prompt):
	image_embeds = moondream.encode_image(_img)
	streamer = TextIteratorStreamer(moondream_tokenizer, skip_special_tokens=True)
	thread = Thread(
	target=moondream.answer_question,
	kwargs={
	"image_embeds": image_embeds,
	"question": _prompt,
	"tokenizer": moondream_tokenizer,
	"streamer": streamer,
	},
	)
	thread.start()

	buffer = ""
	for new_text in streamer:
	buffer += new_text
	yield buffer.strip()


	if __name__ == "__main__":
	with gr.Blocks() as app:
	gr.Markdown(
	"""
	# Food Identifier

	Final project for IAT 481 at Simon Fraser University, Spring 2024.
	"""
	)
	with gr.Row():
	prompt = gr.Textbox(label="Input", value="Describe this image.")
	submit = gr.Button("Submit")
	with gr.Row():
	img = gr.Image(label="Image", type="pil")
	output = gr.TextArea(label="Output")

	submit.click(answer_question, [img, prompt], output)

	app.queue().launch()