File size: 679 Bytes
5a09914
 
a20a987
5a09914
279453f
5a09914
 
 
 
545c2d2
91e9e23
5a09914
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
from transformers import AutoTokenizer, AutoModel
import torch

class SimpleEmbedder:
    def __init__(self):
        print("Loading tokenizer and model...")
        self.tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
        self.model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
        print("Loaded.")

    def embed_text(self, text):
        inputs = self.tokenizer(text, return_tensors='pt')
        outputs = self.model(**inputs)
        # Mean pooling
        embeddings = outputs.last_hidden_state.mean(dim=1)
        return embeddings

embedder = SimpleEmbedder()
print(embedder.embed_text("Hello world"))