tjw commited on
Commit
b61f54c
·
1 Parent(s): e5315ee
Files changed (1) hide show
  1. app.py +8 -6
app.py CHANGED
@@ -14,10 +14,12 @@ import numpy as np
14
  import spaces
15
 
16
 
17
- adapter_id = "merve/paligemma2-3b-vqav2"
18
- model_id = "google/paligemma2-3b-pt-448"
 
19
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
20
- model = PaliGemmaForConditionalGeneration.from_pretrained(adapter_id).eval().to(device)
 
21
  processor = PaliGemmaProcessor.from_pretrained(model_id)
22
 
23
  ###### Transformers Inference
@@ -28,7 +30,7 @@ def infer(
28
  max_new_tokens: int
29
  ) -> str:
30
  text = "answer en " + text
31
- inputs = processor(text=text, images=image, return_tensors="pt").to(device)
32
  with torch.inference_mode():
33
  generated_ids = model.generate(
34
  **inputs,
@@ -71,8 +73,8 @@ with gr.Blocks(css="style.css") as demo:
71
  label="Max New Tokens",
72
  info="Set to larger for longer generation.",
73
  minimum=20,
74
- maximum=160,
75
- value=80,
76
  step=10,
77
  )
78
 
 
14
  import spaces
15
 
16
 
17
+ #adapter_id = "merve/paligemma2-3b-vqav2"
18
+ adapter_id = "google/paligemma2-10b-pt-448"
19
+ model_id = "google/paligemma2-10b-pt-448"
20
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
21
+ dtype = torch.bfloat16
22
+ model = PaliGemmaForConditionalGeneration.from_pretrained(adapter_id, device_map='cuda', torch_dtype=dtype).eval()
23
  processor = PaliGemmaProcessor.from_pretrained(model_id)
24
 
25
  ###### Transformers Inference
 
30
  max_new_tokens: int
31
  ) -> str:
32
  text = "answer en " + text
33
+ inputs = processor(text=text, images=image, return_tensors="pt").to(device=device, dtype=dtype)
34
  with torch.inference_mode():
35
  generated_ids = model.generate(
36
  **inputs,
 
73
  label="Max New Tokens",
74
  info="Set to larger for longer generation.",
75
  minimum=20,
76
+ maximum=1600,
77
+ value=256,
78
  step=10,
79
  )
80