Add BERTopic model

Browse files

Files changed (4) hide show

README.md +72 -0
config.json +16 -0
topic_embeddings.safetensors +3 -0
topics.json +363 -0

README.md ADDED Viewed

	@@ -0,0 +1,72 @@

+---
+tags:
+- bertopic
+library_name: bertopic
+pipeline_tag: text-classification
+---
+# rag-topic-model
+This is a [BERTopic](https://github.com/MaartenGr/BERTopic) model.
+BERTopic is a flexible and modular topic modeling framework that allows for the generation of easily interpretable topics from large datasets.
+## Usage
+To use this model, please install BERTopic:
+```
+pip install -U bertopic
+```
+You can use the model as follows:
+```python
+from bertopic import BERTopic
+topic_model = BERTopic.load("ppuva1/rag-topic-model")
+topic_model.get_topic_info()
+```
+## Topic overview
+* Number of topics: 3
+* Number of training documents: 201
+<details>
+  <summary>Click here for an overview of all topics.</summary>
+  | Topic ID | Topic Keywords | Topic Frequency | Label |
+|----------|----------------|-----------------|-------|
+| -1 | charge - on - account - seeing - random | 75 | -1_charge_on_account_seeing |
+| 0 | my - to - klarna - the - it | 7 | 0_my_to_klarna_the |
+| 1 | refund - my - nike - for - store | 119 | 1_refund_my_nike_for |
+</details>
+## Training hyperparameters
+* calculate_probabilities: False
+* language: None
+* low_memory: False
+* min_topic_size: 10
+* n_gram_range: (1, 1)
+* nr_topics: None
+* seed_topic_list: None
+* top_n_words: 10
+* verbose: False
+* zeroshot_min_similarity: 0.7
+* zeroshot_topic_list: None
+## Framework versions
+* Numpy: 2.0.2
+* HDBSCAN: 0.8.40
+* UMAP: 0.5.7
+* Pandas: 2.2.3
+* Scikit-Learn: 1.6.1
+* Sentence-transformers: 3.4.1
+* Transformers: 4.48.2
+* Numba: 0.60.0
+* Plotly: 6.0.0
+* Python: 3.9.21

config.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "calculate_probabilities": false,
+  "language": null,
+  "low_memory": false,
+  "min_topic_size": 10,
+  "n_gram_range": [
+    1,
+    1
+  ],
+  "nr_topics": null,
+  "seed_topic_list": null,
+  "top_n_words": 10,
+  "verbose": false,
+  "zeroshot_min_similarity": 0.7,
+  "zeroshot_topic_list": null
+}

topic_embeddings.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de587e014d6a0af6f1f676efdb47fa5bd26b8d8fed32b5c72dfe32dba295d284
+size 4696

topics.json ADDED Viewed

	@@ -0,0 +1,363 @@

+{
+  "topic_representations": {
+    "-1": [
+      [
+        "charge",
+        0.1437641778909261
+      ],
+      [
+        "on",
+        0.1178154768085835
+      ],
+      [
+        "account",
+        0.11021919978363973
+      ],
+      [
+        "seeing",
+        0.1022161186535828
+      ],
+      [
+        "random",
+        0.1022161186535828
+      ],
+      [
+        "saw",
+        0.1022161186535828
+      ],
+      [
+        "my",
+        0.0984618790345695
+      ],
+      [
+        "buy",
+        0.08782214790690118
+      ],
+      [
+        "charged",
+        0.08782214790690118
+      ],
+      [
+        "im",
+        0.0826643998377298
+      ]
+    ],
+    "0": [
+      [
+        "my",
+        0.10390739366309902
+      ],
+      [
+        "to",
+        0.09071191064477435
+      ],
+      [
+        "klarna",
+        0.08118481702761375
+      ],
+      [
+        "the",
+        0.06553510864541155
+      ],
+      [
+        "it",
+        0.058823167261863776
+      ],
+      [
+        "and",
+        0.05797918596010623
+      ],
+      [
+        "for",
+        0.04867142710037787
+      ],
+      [
+        "email",
+        0.04547149587226583
+      ],
+      [
+        "but",
+        0.04540059233774568
+      ],
+      [
+        "in",
+        0.04494803747556551
+      ]
+    ],
+    "1": [
+      [
+        "refund",
+        0.11480022821235052
+      ],
+      [
+        "my",
+        0.08942725570982173
+      ],
+      [
+        "nike",
+        0.08905467107379061
+      ],
+      [
+        "for",
+        0.08460308243237606
+      ],
+      [
+        "store",
+        0.07534184980024404
+      ],
+      [
+        "returned",
+        0.07387449643163027
+      ],
+      [
+        "to",
+        0.07041279581631162
+      ],
+      [
+        "credit",
+        0.06273866511558329
+      ],
+      [
+        "week",
+        0.05911540552381123
+      ],
+      [
+        "but",
+        0.05610871184366852
+      ]
+    ]
+  },
+  "topics": [
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    1,
+    1,
+    0,
+    0,
+    0,
+    0,
+    0,
+    1,
+    0,
+    0,
+    0,
+    -1,
+    0,
+    1,
+    0,
+    0,
+    1,
+    0,
+    0,
+    0,
+    0,
+    0,
+    1,
+    1,
+    1,
+    0,
+    0,
+    0,
+    0,
+    1,
+    1,
+    1,
+    0,
+    0,
+    0,
+    0,
+    1,
+    1,
+    1,
+    0,
+    0,
+    0,
+    0,
+    1,
+    0,
+    0,
+    0,
+    0,
+    0,
+    1,
+    1,
+    1,
+    1,
+    1,
+    -1,
+    1,
+    1,
+    1,
+    1,
+    0,
+    0,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    1,
+    0,
+    0,
+    0,
+    0,
+    -1,
+    0,
+    1,
+    0,
+    1,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    1,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    1,
+    0,
+    1,
+    0,
+    1,
+    0,
+    0,
+    1,
+    0,
+    0,
+    1,
+    1,
+    0,
+    1,
+    0,
+    1,
+    0,
+    0,
+    0,
+    -1,
+    0,
+    1,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    1,
+    0,
+    0,
+    0,
+    0,
+    0,
+    1,
+    0,
+    0,
+    -1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    1,
+    0,
+    -1,
+    0,
+    0,
+    0,
+    0,
+    -1
+  ],
+  "topic_sizes": {
+    "0": 119,
+    "1": 75,
+    "-1": 7
+  },
+  "topic_mapper": [
+    [
+      -1,
+      -1,
+      -1
+    ],
+    [
+      0,
+      0,
+      0
+    ],
+    [
+      1,
+      1,
+      1
+    ]
+  ],
+  "topic_labels": {
+    "-1": "-1_charge_on_account_seeing",
+    "0": "0_my_to_klarna_the",
+    "1": "1_refund_my_nike_for"
+  },
+  "custom_labels": null,
+  "_outliers": 1,
+  "topic_aspects": {}
+}