ppuva1 commited on
Commit
d975336
·
verified ·
1 Parent(s): 589fe41

Add BERTopic model

Browse files
Files changed (4) hide show
  1. README.md +72 -0
  2. config.json +16 -0
  3. topic_embeddings.safetensors +3 -0
  4. topics.json +363 -0
README.md ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ tags:
4
+ - bertopic
5
+ library_name: bertopic
6
+ pipeline_tag: text-classification
7
+ ---
8
+
9
+ # rag-topic-model
10
+
11
+ This is a [BERTopic](https://github.com/MaartenGr/BERTopic) model.
12
+ BERTopic is a flexible and modular topic modeling framework that allows for the generation of easily interpretable topics from large datasets.
13
+
14
+ ## Usage
15
+
16
+ To use this model, please install BERTopic:
17
+
18
+ ```
19
+ pip install -U bertopic
20
+ ```
21
+
22
+ You can use the model as follows:
23
+
24
+ ```python
25
+ from bertopic import BERTopic
26
+ topic_model = BERTopic.load("ppuva1/rag-topic-model")
27
+
28
+ topic_model.get_topic_info()
29
+ ```
30
+
31
+ ## Topic overview
32
+
33
+ * Number of topics: 3
34
+ * Number of training documents: 201
35
+
36
+ <details>
37
+ <summary>Click here for an overview of all topics.</summary>
38
+
39
+ | Topic ID | Topic Keywords | Topic Frequency | Label |
40
+ |----------|----------------|-----------------|-------|
41
+ | -1 | charge - on - account - seeing - random | 75 | -1_charge_on_account_seeing |
42
+ | 0 | my - to - klarna - the - it | 7 | 0_my_to_klarna_the |
43
+ | 1 | refund - my - nike - for - store | 119 | 1_refund_my_nike_for |
44
+
45
+ </details>
46
+
47
+ ## Training hyperparameters
48
+
49
+ * calculate_probabilities: False
50
+ * language: None
51
+ * low_memory: False
52
+ * min_topic_size: 10
53
+ * n_gram_range: (1, 1)
54
+ * nr_topics: None
55
+ * seed_topic_list: None
56
+ * top_n_words: 10
57
+ * verbose: False
58
+ * zeroshot_min_similarity: 0.7
59
+ * zeroshot_topic_list: None
60
+
61
+ ## Framework versions
62
+
63
+ * Numpy: 2.0.2
64
+ * HDBSCAN: 0.8.40
65
+ * UMAP: 0.5.7
66
+ * Pandas: 2.2.3
67
+ * Scikit-Learn: 1.6.1
68
+ * Sentence-transformers: 3.4.1
69
+ * Transformers: 4.48.2
70
+ * Numba: 0.60.0
71
+ * Plotly: 6.0.0
72
+ * Python: 3.9.21
config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "calculate_probabilities": false,
3
+ "language": null,
4
+ "low_memory": false,
5
+ "min_topic_size": 10,
6
+ "n_gram_range": [
7
+ 1,
8
+ 1
9
+ ],
10
+ "nr_topics": null,
11
+ "seed_topic_list": null,
12
+ "top_n_words": 10,
13
+ "verbose": false,
14
+ "zeroshot_min_similarity": 0.7,
15
+ "zeroshot_topic_list": null
16
+ }
topic_embeddings.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de587e014d6a0af6f1f676efdb47fa5bd26b8d8fed32b5c72dfe32dba295d284
3
+ size 4696
topics.json ADDED
@@ -0,0 +1,363 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "topic_representations": {
3
+ "-1": [
4
+ [
5
+ "charge",
6
+ 0.1437641778909261
7
+ ],
8
+ [
9
+ "on",
10
+ 0.1178154768085835
11
+ ],
12
+ [
13
+ "account",
14
+ 0.11021919978363973
15
+ ],
16
+ [
17
+ "seeing",
18
+ 0.1022161186535828
19
+ ],
20
+ [
21
+ "random",
22
+ 0.1022161186535828
23
+ ],
24
+ [
25
+ "saw",
26
+ 0.1022161186535828
27
+ ],
28
+ [
29
+ "my",
30
+ 0.0984618790345695
31
+ ],
32
+ [
33
+ "buy",
34
+ 0.08782214790690118
35
+ ],
36
+ [
37
+ "charged",
38
+ 0.08782214790690118
39
+ ],
40
+ [
41
+ "im",
42
+ 0.0826643998377298
43
+ ]
44
+ ],
45
+ "0": [
46
+ [
47
+ "my",
48
+ 0.10390739366309902
49
+ ],
50
+ [
51
+ "to",
52
+ 0.09071191064477435
53
+ ],
54
+ [
55
+ "klarna",
56
+ 0.08118481702761375
57
+ ],
58
+ [
59
+ "the",
60
+ 0.06553510864541155
61
+ ],
62
+ [
63
+ "it",
64
+ 0.058823167261863776
65
+ ],
66
+ [
67
+ "and",
68
+ 0.05797918596010623
69
+ ],
70
+ [
71
+ "for",
72
+ 0.04867142710037787
73
+ ],
74
+ [
75
+ "email",
76
+ 0.04547149587226583
77
+ ],
78
+ [
79
+ "but",
80
+ 0.04540059233774568
81
+ ],
82
+ [
83
+ "in",
84
+ 0.04494803747556551
85
+ ]
86
+ ],
87
+ "1": [
88
+ [
89
+ "refund",
90
+ 0.11480022821235052
91
+ ],
92
+ [
93
+ "my",
94
+ 0.08942725570982173
95
+ ],
96
+ [
97
+ "nike",
98
+ 0.08905467107379061
99
+ ],
100
+ [
101
+ "for",
102
+ 0.08460308243237606
103
+ ],
104
+ [
105
+ "store",
106
+ 0.07534184980024404
107
+ ],
108
+ [
109
+ "returned",
110
+ 0.07387449643163027
111
+ ],
112
+ [
113
+ "to",
114
+ 0.07041279581631162
115
+ ],
116
+ [
117
+ "credit",
118
+ 0.06273866511558329
119
+ ],
120
+ [
121
+ "week",
122
+ 0.05911540552381123
123
+ ],
124
+ [
125
+ "but",
126
+ 0.05610871184366852
127
+ ]
128
+ ]
129
+ },
130
+ "topics": [
131
+ 0,
132
+ 0,
133
+ 0,
134
+ 0,
135
+ 0,
136
+ 0,
137
+ 1,
138
+ 1,
139
+ 0,
140
+ 0,
141
+ 0,
142
+ 0,
143
+ 0,
144
+ 1,
145
+ 0,
146
+ 0,
147
+ 0,
148
+ -1,
149
+ 0,
150
+ 1,
151
+ 0,
152
+ 0,
153
+ 1,
154
+ 0,
155
+ 0,
156
+ 0,
157
+ 0,
158
+ 0,
159
+ 1,
160
+ 1,
161
+ 1,
162
+ 0,
163
+ 0,
164
+ 0,
165
+ 0,
166
+ 1,
167
+ 1,
168
+ 1,
169
+ 0,
170
+ 0,
171
+ 0,
172
+ 0,
173
+ 1,
174
+ 1,
175
+ 1,
176
+ 0,
177
+ 0,
178
+ 0,
179
+ 0,
180
+ 1,
181
+ 0,
182
+ 0,
183
+ 0,
184
+ 0,
185
+ 0,
186
+ 1,
187
+ 1,
188
+ 1,
189
+ 1,
190
+ 1,
191
+ -1,
192
+ 1,
193
+ 1,
194
+ 1,
195
+ 1,
196
+ 0,
197
+ 0,
198
+ 1,
199
+ 1,
200
+ 1,
201
+ 1,
202
+ 1,
203
+ 1,
204
+ 1,
205
+ 1,
206
+ 1,
207
+ 1,
208
+ 1,
209
+ 1,
210
+ 1,
211
+ 1,
212
+ 1,
213
+ 1,
214
+ 1,
215
+ 1,
216
+ 1,
217
+ 1,
218
+ 1,
219
+ 1,
220
+ 1,
221
+ 1,
222
+ 1,
223
+ 1,
224
+ 1,
225
+ 1,
226
+ 1,
227
+ 1,
228
+ 1,
229
+ 1,
230
+ 0,
231
+ 0,
232
+ 0,
233
+ 0,
234
+ -1,
235
+ 0,
236
+ 1,
237
+ 0,
238
+ 1,
239
+ 0,
240
+ 0,
241
+ 0,
242
+ 0,
243
+ 0,
244
+ 0,
245
+ 0,
246
+ 0,
247
+ 0,
248
+ 0,
249
+ 1,
250
+ 0,
251
+ 0,
252
+ 0,
253
+ 0,
254
+ 0,
255
+ 0,
256
+ 1,
257
+ 0,
258
+ 1,
259
+ 0,
260
+ 1,
261
+ 0,
262
+ 0,
263
+ 1,
264
+ 0,
265
+ 0,
266
+ 1,
267
+ 1,
268
+ 0,
269
+ 1,
270
+ 0,
271
+ 1,
272
+ 0,
273
+ 0,
274
+ 0,
275
+ -1,
276
+ 0,
277
+ 1,
278
+ 0,
279
+ 0,
280
+ 0,
281
+ 0,
282
+ 0,
283
+ 0,
284
+ 1,
285
+ 0,
286
+ 0,
287
+ 0,
288
+ 0,
289
+ 0,
290
+ 1,
291
+ 0,
292
+ 0,
293
+ -1,
294
+ 1,
295
+ 0,
296
+ 1,
297
+ 1,
298
+ 1,
299
+ 0,
300
+ 0,
301
+ 0,
302
+ 0,
303
+ 0,
304
+ 0,
305
+ 0,
306
+ 0,
307
+ 0,
308
+ 0,
309
+ 0,
310
+ 0,
311
+ 0,
312
+ 0,
313
+ 0,
314
+ 0,
315
+ 0,
316
+ 0,
317
+ 0,
318
+ 0,
319
+ 0,
320
+ 0,
321
+ 0,
322
+ 0,
323
+ 0,
324
+ 1,
325
+ 0,
326
+ -1,
327
+ 0,
328
+ 0,
329
+ 0,
330
+ 0,
331
+ -1
332
+ ],
333
+ "topic_sizes": {
334
+ "0": 119,
335
+ "1": 75,
336
+ "-1": 7
337
+ },
338
+ "topic_mapper": [
339
+ [
340
+ -1,
341
+ -1,
342
+ -1
343
+ ],
344
+ [
345
+ 0,
346
+ 0,
347
+ 0
348
+ ],
349
+ [
350
+ 1,
351
+ 1,
352
+ 1
353
+ ]
354
+ ],
355
+ "topic_labels": {
356
+ "-1": "-1_charge_on_account_seeing",
357
+ "0": "0_my_to_klarna_the",
358
+ "1": "1_refund_my_nike_for"
359
+ },
360
+ "custom_labels": null,
361
+ "_outliers": 1,
362
+ "topic_aspects": {}
363
+ }