mrbeliever commited on
Commit
ebb3eb4
Β·
verified Β·
1 Parent(s): d5899cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -8
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
  import spaces
3
- from transformers import AutoModelForCausalLM, AutoProcessor
4
  import torch
5
  from PIL import Image
6
  import subprocess
@@ -8,23 +8,45 @@ import subprocess
8
  # Install flash-attn with no CUDA build isolation
9
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
10
 
11
- # Load model and processor
12
  models = {
13
- "microsoft/Phi-3.5-vision-instruct": AutoModelForCausalLM.from_pretrained("microsoft/Phi-3.5-vision-instruct", trust_remote_code=True, torch_dtype="auto", _attn_implementation="flash_attention_2").cuda().eval()
 
 
 
 
 
 
14
  }
 
15
  processors = {
16
- "microsoft/Phi-3.5-vision-instruct": AutoProcessor.from_pretrained("microsoft/Phi-3.5-vision-instruct", trust_remote_code=True)
 
 
 
 
17
  }
18
 
 
 
 
 
 
 
19
  # Default description and prompt
20
- DESCRIPTION = ""
21
  default_question = "You are an image to prompt converter. Your work is to observe each and every detail of the image and craft a detailed prompt under 100 words."
22
 
23
- # Gradio function for generating output from image input
24
  @spaces.GPU
25
  def run_example(image, text_input=default_question, model_id="microsoft/Phi-3.5-vision-instruct"):
26
- model = models[model_id]
27
- processor = processors[model_id]
 
 
 
 
 
28
  user_prompt = '<|user|>\n'
29
  assistant_prompt = '<|assistant|>\n'
30
  prompt_suffix = "<|end|>\n"
 
1
  import gradio as gr
2
  import spaces
3
+ from transformers import AutoModelForCausalLM, AutoProcessor, GPT2LMHeadModel, GPT2Tokenizer
4
  import torch
5
  from PIL import Image
6
  import subprocess
 
8
  # Install flash-attn with no CUDA build isolation
9
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
10
 
11
+ # Define models and processors with pinning to a stable revision
12
  models = {
13
+ "microsoft/Phi-3.5-vision-instruct": AutoModelForCausalLM.from_pretrained(
14
+ "microsoft/Phi-3.5-vision-instruct",
15
+ revision="specific-revision-hash", # Pinning to a specific revision for stability
16
+ trust_remote_code=True,
17
+ torch_dtype="auto",
18
+ _attn_implementation="flash_attention_2"
19
+ ).cuda().eval()
20
  }
21
+
22
  processors = {
23
+ "microsoft/Phi-3.5-vision-instruct": AutoProcessor.from_pretrained(
24
+ "microsoft/Phi-3.5-vision-instruct",
25
+ revision="specific-revision-hash", # Pinning to a specific revision for stability
26
+ trust_remote_code=True
27
+ )
28
  }
29
 
30
+ # Fallback to GPT-2 for testing
31
+ def load_fallback_model():
32
+ tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
33
+ model = GPT2LMHeadModel.from_pretrained("gpt2").cuda().eval()
34
+ return model, tokenizer
35
+
36
  # Default description and prompt
37
+ DESCRIPTION = "[Phi-3.5-vision Demo](https://huggingface.co/microsoft/Phi-3.5-vision-instruct)"
38
  default_question = "You are an image to prompt converter. Your work is to observe each and every detail of the image and craft a detailed prompt under 100 words."
39
 
40
+ # Gradio function for generating output from image input with error handling
41
  @spaces.GPU
42
  def run_example(image, text_input=default_question, model_id="microsoft/Phi-3.5-vision-instruct"):
43
+ try:
44
+ model = models[model_id]
45
+ processor = processors[model_id]
46
+ except KeyError as e:
47
+ print(f"Error loading model: {e}. Falling back to GPT-2.")
48
+ model, processor = load_fallback_model()
49
+
50
  user_prompt = '<|user|>\n'
51
  assistant_prompt = '<|assistant|>\n'
52
  prompt_suffix = "<|end|>\n"