rrg92 commited on
Commit
84c4fac
·
verified ·
1 Parent(s): 645dc98

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -12
app.py CHANGED
@@ -4,12 +4,14 @@ from transformers import AutoTokenizer, AutoModel, AutoImageProcessor
4
  import gradio as gr
5
  import spaces
6
 
7
- processor = AutoImageProcessor.from_pretrained("nomic-ai/nomic-embed-vision-v1.5")
8
- vision_model = AutoModel.from_pretrained("nomic-ai/nomic-embed-vision-v1.5", trust_remote_code=True)
9
 
10
- tokenizer = AutoTokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1.5')
11
- text_model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1.5', trust_remote_code=True)
12
- text_model.eval()
 
 
 
13
 
14
  def mean_pooling(model_output, attention_mask):
15
  token_embeddings = model_output[0]
@@ -18,18 +20,28 @@ def mean_pooling(model_output, attention_mask):
18
 
19
  @spaces.GPU
20
  def TxtEmbed(text):
 
 
21
 
22
- sentences = [text]
23
- encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
24
 
25
  with torch.no_grad():
26
- model_output = text_model(**encoded_input)
 
 
 
27
 
28
- text_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
29
- text_embeddings = F.layer_norm(text_embeddings, normalized_shape=(text_embeddings.shape[1],))
30
- text_embeddings = F.normalize(text_embeddings, p=2, dim=1)
 
 
 
 
 
 
31
 
32
- return (text_embeddings.tolist())[0];
33
 
34
 
35
 
 
4
  import gradio as gr
5
  import spaces
6
 
7
+ model = AutoModel.from_pretrained('neuralmind/bert-base-portuguese-cased')
 
8
 
9
+ # processor = AutoImageProcessor.from_pretrained("nomic-ai/nomic-embed-vision-v1.5")
10
+ # vision_model = AutoModel.from_pretrained("nomic-ai/nomic-embed-vision-v1.5", trust_remote_code=True)
11
+
12
+ # tokenizer = AutoTokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1.5')
13
+ # text_model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1.5', trust_remote_code=True)
14
+ # text_model.eval()
15
 
16
  def mean_pooling(model_output, attention_mask):
17
  token_embeddings = model_output[0]
 
20
 
21
  @spaces.GPU
22
  def TxtEmbed(text):
23
+ import torch
24
+
25
 
26
+ input_ids = tokenizer.encode(text, return_tensors='pt')
 
27
 
28
  with torch.no_grad():
29
+ outs = model(input_ids)
30
+ encoded = outs[0][0, 1:-1] # Ignore [CLS] and [SEP] special tokens
31
+
32
+
33
 
34
+ # sentences = [text]
35
+ # encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
36
+ #
37
+ # with torch.no_grad():
38
+ # model_output = text_model(**encoded_input)
39
+ #
40
+ # text_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
41
+ # text_embeddings = F.layer_norm(text_embeddings, normalized_shape=(text_embeddings.shape[1],))
42
+ # text_embeddings = F.normalize(text_embeddings, p=2, dim=1)
43
 
44
+ return (encoded.tolist())[0];
45
 
46
 
47