Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from threading import Thread | |
| import re | |
| import time | |
| from PIL import Image | |
| import torch | |
| import spaces | |
| import subprocess | |
| subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True) | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| 'qnguyen3/nanoLLaVA', | |
| trust_remote_code=True) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| 'qnguyen3/nanoLLaVA', | |
| torch_dtype=torch.float16, | |
| device_map='auto', | |
| trust_remote_code=True) | |
| model.to("cuda:0") | |
| def bot_streaming(message, history): | |
| chat_history = [] | |
| if message["files"]: | |
| image = message["files"][-1]["path"] | |
| else: | |
| for i, hist in enumerate(history): | |
| if type(hist[0])==tuple: | |
| image = hist[0][0] | |
| image_turn = i | |
| if len(history) > 0 and image is not None: | |
| chat_history.append({"role": "user", "content": f'<image>\n{history[1][0]}'}) | |
| chat_history.append({"role": "assistant", "content": history[1][1] }) | |
| for human, assistant in history[2:]: | |
| chat_history.append({"role": "user", "content": human }) | |
| chat_history.append({"role": "assistant", "content": assistant }) | |
| chat_history.append({"role": "user", "content": message['text']}) | |
| elif len(history) > 0 and image is None: | |
| for human, assistant in history: | |
| chat_history.append({"role": "user", "content": human }) | |
| chat_history.append({"role": "assistant", "content": assistant }) | |
| chat_history.append({"role": "user", "content": message['text']}) | |
| elif len(history) == 0 and image is not None: | |
| chat_history.append({"role": "user", "content": f"<image>\n{message['text']}"}) | |
| elif len(history) == 0 and image is None: | |
| chat_history.append({"role": "user", "content": message['text'] }) | |
| # if image is None: | |
| # gr.Error("You need to upload an image for LLaVA to work.") | |
| prompt=f"[INST] <image>\n{message['text']} [/INST]" | |
| image = Image.open(image).convert("RGB") | |
| text = tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True) | |
| text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')] | |
| input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0) | |
| streamer = TextIteratorStreamer(input_ids, **{"skip_special_tokens": True}) | |
| image = Image.open(image) | |
| image_tensor = model.process_images([image], model.config).to(dtype=model.dtype) | |
| generation_kwargs = dict(inputs, images=image_tensor, streamer=streamer, max_new_tokens=100) | |
| generated_text = "" | |
| thread = Thread(target=model.generate, kwargs=generation_kwargs) | |
| thread.start() | |
| text_prompt =f"<|im_start|>user\n{message['text']}<|im_end|>" | |
| buffer = "" | |
| for new_text in streamer: | |
| buffer += new_text | |
| generated_text_without_prompt = buffer[len(text_prompt):] | |
| time.sleep(0.04) | |
| yield generated_text_without_prompt | |
| demo = gr.ChatInterface(fn=bot_streaming, title="LLaVA NeXT", examples=[{"text": "What is on the flower?", "files":["./bee.jpg"]}, | |
| {"text": "How to make this pastry?", "files":["./baklava.png"]}], | |
| description="Try [LLaVA NeXT](https://huggingface.co/docs/transformers/main/en/model_doc/llava_next) in this demo (more specifically, the [Mistral-7B variant](https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf)). Upload an image and start chatting about it, or simply try one of the examples below. If you don't upload an image, you will receive an error.", | |
| stop_btn="Stop Generation", multimodal=True) | |
| demo.launch(debug=True) |