breadlicker45 commited on
Commit
c580f5e
·
verified ·
1 Parent(s): 33262af

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -13
app.py CHANGED
@@ -1,5 +1,8 @@
1
  import gradio as gr
2
- from transformers import AutoProcessor, AutoModelForImageTextToText
 
 
 
3
  from PIL import Image
4
  import torch
5
  import os
@@ -17,16 +20,12 @@ def load_model():
17
  )
18
 
19
  # Load the processor and model using the correct identifier
20
- processor = AutoProcessor.from_pretrained(
21
- "google/paligemma2-28b-pt-896", use_auth_token=token
22
- )
23
- model = AutoModelForImageTextToText.from_pretrained(
24
- "google/paligemma2-28b-pt-896", use_auth_token=token, torch_dtype=torch.bfloat16
25
- )
26
-
27
- # Move model to GPU if available
28
- if torch.cuda.is_available():
29
- model = model.to("cuda")
30
 
31
  return processor, model
32
 
@@ -35,10 +34,10 @@ def load_model():
35
  def process_image_and_text(image, text_input):
36
  """Extract text from image using PaliGemma2."""
37
  processor, model = load_model()
38
-
39
  # Preprocess the image and text
40
  inputs = processor(text=text_input, images=image, return_tensors="pt").to(
41
- "cuda" if torch.cuda.is_available() else "cpu", dtype=torch.bfloat16
42
  )
43
 
44
  # Generate predictions
 
1
  import gradio as gr
2
+ from transformers import (
3
+ PaliGemmaProcessor,
4
+ PaliGemmaForConditionalGeneration,
5
+ )
6
  from PIL import Image
7
  import torch
8
  import os
 
20
  )
21
 
22
  # Load the processor and model using the correct identifier
23
+ model_id = "google/paligemma2-28b-pt-896"
24
+ processor = PaliGemmaProcessor.from_pretrained(model_id, use_auth_token=token)
25
+ device = "cuda" if torch.cuda.is_available() else "cpu"
26
+ model = PaliGemmaForConditionalGeneration.from_pretrained(
27
+ model_id, use_auth_token=token, torch_dtype=torch.bfloat16
28
+ ).to(device)
 
 
 
 
29
 
30
  return processor, model
31
 
 
34
  def process_image_and_text(image, text_input):
35
  """Extract text from image using PaliGemma2."""
36
  processor, model = load_model()
37
+ device = "cuda" if torch.cuda.is_available() else "cpu"
38
  # Preprocess the image and text
39
  inputs = processor(text=text_input, images=image, return_tensors="pt").to(
40
+ device, dtype=torch.bfloat16
41
  )
42
 
43
  # Generate predictions