Spaces:
Running
Running
| import torch | |
| import gradio as gr | |
| from transformers import AutoModel, pipeline, AutoTokenizer | |
| # from issue: https://discuss.huggingface.co/t/how-to-install-flash-attention-on-hf-gradio-space/70698/2 | |
| import subprocess | |
| # InternVL2 需要的 flash_attn 这个依赖只能这样运行时装 | |
| subprocess.run( | |
| "pip install flash-attn --no-build-isolation", | |
| env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"}, | |
| shell=True, | |
| ) | |
| model_name = "OpenGVLab/InternVL2-8B" | |
| model = ( | |
| AutoModel.from_pretrained( | |
| model_name, | |
| torch_dtype=torch.bfloat16, | |
| # low_cpu_mem_usage=True, | |
| trust_remote_code=True, | |
| ) | |
| .eval() | |
| .cuda() | |
| ) | |
| try: | |
| tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) | |
| inference = pipeline( | |
| task="visual-question-answering", model=model, tokenizer=tokenizer | |
| ) | |
| except Exception as error: | |
| raise gr.Error("👌" + str(error), duration=30) | |
| def predict(input_img, questions): | |
| try: | |
| predictions = inference(question=questions, image=input_img) | |
| return str(predictions) | |
| except Exception as e: | |
| # 捕获异常,并将错误信息转换为字符串 | |
| error_message = "❌" + str(e) | |
| # 抛出gradio.Error来展示错误弹窗 | |
| raise gr.Error(error_message, duration=25) | |
| gradio_app = gr.Interface( | |
| predict, | |
| inputs=[ | |
| gr.Image(label="Select A Image", sources=["upload", "webcam"], type="pil"), | |
| "text", | |
| ], | |
| outputs="text", | |
| title="Plz ask my anything", | |
| ) | |
| if __name__ == "__main__": | |
| gradio_app.launch(show_error=True, debug=True) | |