File size: 1,332 Bytes
abd20d0
 
 
7df75ff
 
 
 
 
 
 
abd20d0
7df75ff
 
 
abd20d0
7df75ff
 
abd20d0
7df75ff
 
 
 
 
abd20d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import os

import wandb
import weave
from byaldi import RAGMultiModalModel


class MultiModalRetriever(weave.Model):
    model_name: str
    _docs_retrieval_model: RAGMultiModalModel

    def __init__(self, model_name: str = "vidore/colpali-v1.2"):
        super().__init__(model_name=model_name)
        self._docs_retrieval_model = RAGMultiModalModel.from_pretrained(self.model_name)

    def index(self, data_artifact_name: str, weave_dataset_name: str, index_name: str):
        if wandb.run:
            artifact = wandb.use_artifact(data_artifact_name, type="dataset")
            artifact_dir = artifact.download()
        else:
            api = wandb.Api()
            artifact = api.artifact(data_artifact_name)
            artifact_dir = artifact.download()
        self._docs_retrieval_model.index(
            input_path=artifact_dir,
            index_name=index_name,
            store_collection_with_index=False,
            overwrite=True,
        )
        if wandb.run:
            artifact = wandb.Artifact(
                name=index_name,
                type="colpali-index",
                metadata={"weave_dataset_name": weave_dataset_name},
            )
            artifact.add_dir(
                local_path=os.path.join(".byaldi", index_name), name="index"
            )
            artifact.save()