Spaces:
Running
Running
File size: 1,332 Bytes
abd20d0 7df75ff abd20d0 7df75ff abd20d0 7df75ff abd20d0 7df75ff abd20d0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
import os
import wandb
import weave
from byaldi import RAGMultiModalModel
class MultiModalRetriever(weave.Model):
model_name: str
_docs_retrieval_model: RAGMultiModalModel
def __init__(self, model_name: str = "vidore/colpali-v1.2"):
super().__init__(model_name=model_name)
self._docs_retrieval_model = RAGMultiModalModel.from_pretrained(self.model_name)
def index(self, data_artifact_name: str, weave_dataset_name: str, index_name: str):
if wandb.run:
artifact = wandb.use_artifact(data_artifact_name, type="dataset")
artifact_dir = artifact.download()
else:
api = wandb.Api()
artifact = api.artifact(data_artifact_name)
artifact_dir = artifact.download()
self._docs_retrieval_model.index(
input_path=artifact_dir,
index_name=index_name,
store_collection_with_index=False,
overwrite=True,
)
if wandb.run:
artifact = wandb.Artifact(
name=index_name,
type="colpali-index",
metadata={"weave_dataset_name": weave_dataset_name},
)
artifact.add_dir(
local_path=os.path.join(".byaldi", index_name), name="index"
)
artifact.save()
|