vision-rag-sourced-docker-image

Paused

App Files Files Community

manu commited on Jul 9, 2024

Commit

9c66171

verified ·

1 Parent(s): 01531d8

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -12

app.py CHANGED Viewed

@@ -15,13 +15,32 @@ from torch.utils.data import DataLoader
 from tqdm import tqdm
 from transformers import AutoProcessor
 @spaces.GPU
 def search(query: str, ds, images, k):
     qs = []
     with torch.no_grad():
         batch_query = process_queries(processor, [query], mock_image)
-        batch_query = {k: v.to(model.device) for k, v in batch_query.items()}
         embeddings_query = model(**batch_query)
         qs.extend(list(torch.unbind(embeddings_query.to("cpu"))))
@@ -55,29 +74,24 @@ def index(files, ds):
         collate_fn=lambda x: process_images(processor, x),
     )
     print(f"model device: {model.device}")
-    model = model.to(model.device)
     for batch_doc in tqdm(dataloader):
         with torch.no_grad():
-            batch_doc = {k: v.to(model.device) for k, v in batch_doc.items()}
             print(f"model device: {model.device}")
             print(f"model device: {batch_doc['input_ids']}")
             embeddings_doc = model(**batch_doc)
         ds.extend(list(torch.unbind(embeddings_doc.to("cpu"))))
     return f"Uploaded and converted {len(images)} pages", ds, images
-# Load model
-model_name = "vidore/colpali"
-token = os.environ.get("HF_TOKEN")
-model = ColPali.from_pretrained(
-    "google/paligemma-3b-mix-448", torch_dtype=torch.bfloat16, device_map="cuda", token = token).eval()
-model.load_adapter(model_name)
-processor = AutoProcessor.from_pretrained(model_name, token = token)
-mock_image = Image.new("RGB", (448, 448), (255, 255, 255))
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# ColPali: Efficient Document Retrieval with Vision Language Models 📚")

 from tqdm import tqdm
 from transformers import AutoProcessor
+# Load model
+model_name = "vidore/colpali"
+token = os.environ.get("HF_TOKEN")
+model = ColPali.from_pretrained(
+    "google/paligemma-3b-mix-448", torch_dtype=torch.bfloat16, device_map="cuda", token = token).eval()
+model.load_adapter(model_name)
+processor = AutoProcessor.from_pretrained(model_name, token = token)
+mock_image = Image.new("RGB", (448, 448), (255, 255, 255))
 @spaces.GPU
 def search(query: str, ds, images, k):
+    device = "cuda:0" if torch.cuda.is_available() else "cpu"
+    if device != model.device:
+        model.to(device)
+    print(f"model device: {model.device}")
     qs = []
     with torch.no_grad():
         batch_query = process_queries(processor, [query], mock_image)
+        batch_query = {k: v.to(device) for k, v in batch_query.items()}
         embeddings_query = model(**batch_query)
         qs.extend(list(torch.unbind(embeddings_query.to("cpu"))))
         collate_fn=lambda x: process_images(processor, x),
     )
+    device = "cuda:0" if torch.cuda.is_available() else "cpu"
+    if device != model.device:
+        model.to(device)
     print(f"model device: {model.device}")
     for batch_doc in tqdm(dataloader):
         with torch.no_grad():
+            batch_doc = {k: v.to(device) for k, v in batch_doc.items()}
             print(f"model device: {model.device}")
             print(f"model device: {batch_doc['input_ids']}")
             embeddings_doc = model(**batch_doc)
         ds.extend(list(torch.unbind(embeddings_doc.to("cpu"))))
     return f"Uploaded and converted {len(images)} pages", ds, images
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# ColPali: Efficient Document Retrieval with Vision Language Models 📚")