Update README.md
Browse files
README.md
CHANGED
@@ -5,12 +5,14 @@ tags:
|
|
5 |
- sentence-transformers
|
6 |
- feature-extraction
|
7 |
- sentence-similarity
|
8 |
-
|
|
|
|
|
9 |
---
|
10 |
|
11 |
-
#
|
12 |
|
13 |
-
|
14 |
|
15 |
<!--- Describe your model here -->
|
16 |
|
@@ -26,13 +28,59 @@ Then you can use the model like this:
|
|
26 |
|
27 |
```python
|
28 |
from sentence_transformers import SentenceTransformer
|
29 |
-
sentences = ["This is an example sentence", "
|
30 |
|
31 |
-
model = SentenceTransformer('
|
32 |
embeddings = model.encode(sentences)
|
33 |
print(embeddings)
|
34 |
```
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
|
38 |
## Evaluation Results
|
@@ -90,4 +138,4 @@ SentenceTransformer(
|
|
90 |
|
91 |
## Citing & Authors
|
92 |
|
93 |
-
|
|
|
5 |
- sentence-transformers
|
6 |
- feature-extraction
|
7 |
- sentence-similarity
|
8 |
+
license: cc-by-nc-sa-4.0
|
9 |
+
language:
|
10 |
+
- krc
|
11 |
---
|
12 |
|
13 |
+
# TSjB/labse-krc
|
14 |
|
15 |
+
It maps sentences & paragraphs to a 768 dimensional dense vector space and can be used for tasks like clustering or semantic search.
|
16 |
|
17 |
<!--- Describe your model here -->
|
18 |
|
|
|
28 |
|
29 |
```python
|
30 |
from sentence_transformers import SentenceTransformer
|
31 |
+
sentences = ["This is an example sentence", "Бу айтым юлгюдю"]
|
32 |
|
33 |
+
model = SentenceTransformer('TSjB/labse-krc')
|
34 |
embeddings = model.encode(sentences)
|
35 |
print(embeddings)
|
36 |
```
|
37 |
|
38 |
+
```r
|
39 |
+
library(data.table)
|
40 |
+
library(reticulate)
|
41 |
+
library(ggplot2)
|
42 |
+
library(ggrepel)
|
43 |
+
library(Rtsne)
|
44 |
+
|
45 |
+
py_install("sentence-transformers", pip = TRUE)
|
46 |
+
st <- import("sentence_transformers")
|
47 |
+
|
48 |
+
english_sentences = base::c("dog", "Puppies are nice.", "I enjoy taking long walks along the beach with my dog.")
|
49 |
+
italian_sentences = base::c("cane", "I cuccioli sono carini.", "Mi piace fare lunghe passeggiate lungo la spiaggia con il mio cane.")
|
50 |
+
qarachay_sentences = base::c("ит", "Итле джагъымлыдыла.", "Джагъа юсю бла итим бла айланыргъа сюеме.")
|
51 |
+
|
52 |
+
model = st$SentenceTransformer('TSjB/labse-krc')
|
53 |
+
|
54 |
+
english_embeddings = model$encode(english_sentences)
|
55 |
+
italian_embeddings = model$encode(italian_sentences)
|
56 |
+
qarachay_embeddings = model$encode(qarachay_sentences)
|
57 |
+
|
58 |
+
m <- rbind(english_embeddings,
|
59 |
+
italian_embeddings,
|
60 |
+
qarachay_embeddings) %>% as.matrix
|
61 |
+
|
62 |
+
tsne <- Rtsne(m, perplexity = floor((nrow(m) - 1) / 3))
|
63 |
+
|
64 |
+
|
65 |
+
tSNE_df <- tsne$Y %>%
|
66 |
+
as.data.table() %>%
|
67 |
+
setnames(old = c("V1", "V2"), new = c("tSNE1", "tSNE2")) %>%
|
68 |
+
.[, `:=`(sentence = c(english_sentences, italian_sentences, qarachay_sentences),
|
69 |
+
language = c(rep("english", length(english_sentences)),
|
70 |
+
rep("italian", length(italian_sentences)),
|
71 |
+
rep("qarachay", length(qarachay_sentences))))]
|
72 |
+
|
73 |
+
|
74 |
+
tSNE_df %>%
|
75 |
+
ggplot(aes(x = tSNE1,
|
76 |
+
y = tSNE2,
|
77 |
+
color = language,
|
78 |
+
label = sentence
|
79 |
+
)
|
80 |
+
) +
|
81 |
+
geom_label_repel() +
|
82 |
+
geom_point()
|
83 |
+
```
|
84 |
|
85 |
|
86 |
## Evaluation Results
|
|
|
138 |
|
139 |
## Citing & Authors
|
140 |
|
141 |
+
[Bogdan Tewunalany](https://t.me/bogdan_tewunalany), [Ali Berberov](https://t.me/ali_bulat1990)
|