Spaces:
Running
Running
File size: 1,607 Bytes
8273d5f 2a438ba 8273d5f 2a438ba 580cc25 a41d9f9 580cc25 a41d9f9 580cc25 2a438ba 8273d5f a41d9f9 8273d5f 580cc25 8273d5f 2a438ba d652f80 8273d5f a41d9f9 8273d5f d652f80 8273d5f d652f80 8273d5f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import torch
import gradio as gr
from transformers import AutoModel, pipeline, AutoTokenizer
# from issue: https://discuss.huggingface.co/t/how-to-install-flash-attention-on-hf-gradio-space/70698/2
import subprocess
# InternVL2 需要的 flash_attn 这个依赖只能这样运行时装
subprocess.run(
"pip install flash-attn --no-build-isolation",
env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
shell=True,
)
model_name = "OpenGVLab/InternVL2-8B"
model = (
AutoModel.from_pretrained(
model_name,
torch_dtype=torch.bfloat16,
# low_cpu_mem_usage=True,
trust_remote_code=True,
)
.eval()
.cuda()
)
try:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
inference = pipeline(
task="visual-question-answering", model=model, tokenizer=tokenizer
)
except Exception as error:
raise gr.Error("👌" + str(error), duration=30)
def predict(input_img, questions):
try:
predictions = inference(question=questions, image=input_img)
return str(predictions)
except Exception as e:
# 捕获异常,并将错误信息转换为字符串
error_message = "❌" + str(e)
# 抛出gradio.Error来展示错误弹窗
raise gr.Error(error_message, duration=25)
gradio_app = gr.Interface(
predict,
inputs=[
gr.Image(label="Select A Image", sources=["upload", "webcam"], type="pil"),
"text",
],
outputs="text",
title="Plz ask my anything",
)
if __name__ == "__main__":
gradio_app.launch(show_error=True, debug=True)
|