Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import gradio as gr
|
2 |
import spaces
|
3 |
-
from transformers import AutoModelForCausalLM, AutoProcessor
|
4 |
import torch
|
5 |
from PIL import Image
|
6 |
import subprocess
|
@@ -8,23 +8,45 @@ import subprocess
|
|
8 |
# Install flash-attn with no CUDA build isolation
|
9 |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
10 |
|
11 |
-
#
|
12 |
models = {
|
13 |
-
"microsoft/Phi-3.5-vision-instruct": AutoModelForCausalLM.from_pretrained(
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
}
|
|
|
15 |
processors = {
|
16 |
-
"microsoft/Phi-3.5-vision-instruct": AutoProcessor.from_pretrained(
|
|
|
|
|
|
|
|
|
17 |
}
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
# Default description and prompt
|
20 |
-
DESCRIPTION = ""
|
21 |
default_question = "You are an image to prompt converter. Your work is to observe each and every detail of the image and craft a detailed prompt under 100 words."
|
22 |
|
23 |
-
# Gradio function for generating output from image input
|
24 |
@spaces.GPU
|
25 |
def run_example(image, text_input=default_question, model_id="microsoft/Phi-3.5-vision-instruct"):
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
28 |
user_prompt = '<|user|>\n'
|
29 |
assistant_prompt = '<|assistant|>\n'
|
30 |
prompt_suffix = "<|end|>\n"
|
|
|
1 |
import gradio as gr
|
2 |
import spaces
|
3 |
+
from transformers import AutoModelForCausalLM, AutoProcessor, GPT2LMHeadModel, GPT2Tokenizer
|
4 |
import torch
|
5 |
from PIL import Image
|
6 |
import subprocess
|
|
|
8 |
# Install flash-attn with no CUDA build isolation
|
9 |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
10 |
|
11 |
+
# Define models and processors with pinning to a stable revision
|
12 |
models = {
|
13 |
+
"microsoft/Phi-3.5-vision-instruct": AutoModelForCausalLM.from_pretrained(
|
14 |
+
"microsoft/Phi-3.5-vision-instruct",
|
15 |
+
revision="specific-revision-hash", # Pinning to a specific revision for stability
|
16 |
+
trust_remote_code=True,
|
17 |
+
torch_dtype="auto",
|
18 |
+
_attn_implementation="flash_attention_2"
|
19 |
+
).cuda().eval()
|
20 |
}
|
21 |
+
|
22 |
processors = {
|
23 |
+
"microsoft/Phi-3.5-vision-instruct": AutoProcessor.from_pretrained(
|
24 |
+
"microsoft/Phi-3.5-vision-instruct",
|
25 |
+
revision="specific-revision-hash", # Pinning to a specific revision for stability
|
26 |
+
trust_remote_code=True
|
27 |
+
)
|
28 |
}
|
29 |
|
30 |
+
# Fallback to GPT-2 for testing
|
31 |
+
def load_fallback_model():
|
32 |
+
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
33 |
+
model = GPT2LMHeadModel.from_pretrained("gpt2").cuda().eval()
|
34 |
+
return model, tokenizer
|
35 |
+
|
36 |
# Default description and prompt
|
37 |
+
DESCRIPTION = "[Phi-3.5-vision Demo](https://huggingface.co/microsoft/Phi-3.5-vision-instruct)"
|
38 |
default_question = "You are an image to prompt converter. Your work is to observe each and every detail of the image and craft a detailed prompt under 100 words."
|
39 |
|
40 |
+
# Gradio function for generating output from image input with error handling
|
41 |
@spaces.GPU
|
42 |
def run_example(image, text_input=default_question, model_id="microsoft/Phi-3.5-vision-instruct"):
|
43 |
+
try:
|
44 |
+
model = models[model_id]
|
45 |
+
processor = processors[model_id]
|
46 |
+
except KeyError as e:
|
47 |
+
print(f"Error loading model: {e}. Falling back to GPT-2.")
|
48 |
+
model, processor = load_fallback_model()
|
49 |
+
|
50 |
user_prompt = '<|user|>\n'
|
51 |
assistant_prompt = '<|assistant|>\n'
|
52 |
prompt_suffix = "<|end|>\n"
|