MARI-posa commited on
Commit
fa0bead
·
1 Parent(s): bbe80a8

Create f.py

Browse files
Files changed (1) hide show
  1. f.py +17 -0
f.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pip install transformers sentencepiece
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModel
4
+ tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2")
5
+ model = AutoModel.from_pretrained("cointegrated/rubert-tiny2")
6
+ # model.cuda() # uncomment it if you have a GPU
7
+
8
+ def embed_bert_cls(text, model, tokenizer):
9
+ t = tokenizer(text, padding=True, truncation=True, return_tensors='pt')
10
+ with torch.no_grad():
11
+ model_output = model(**{k: v.to(model.device) for k, v in t.items()})
12
+ embeddings = model_output.last_hidden_state[:, 0, :]
13
+ embeddings = torch.nn.functional.normalize(embeddings)
14
+ return embeddings[0].cpu().numpy()
15
+
16
+ print(embed_bert_cls('привет мир', model, tokenizer).shape)
17
+ # (312,)