google_ads_space / Functionalities /TopicClustering.py
zayed-upal
Google ads format download added, topic name rename option added
4c25316
import pandas as pd
from sentence_transformers import SentenceTransformer
from bertopic import BERTopic
from umap import UMAP
from Functionalities import NLP_Helper
# Visualization
import plotly.graph_objects as go
class TopicClustering:
def __init__(self, keyword_df, text_col, representation_model, sentence_model):
self.topic_names = None
self.topic_model = None
self.embeddings = None
self.topic_name_mapping = {}
self.keyword_df, self.text_col = keyword_df, text_col
self.sentence_model = SentenceTransformer(sentence_model)
self.representation_model = NLP_Helper.get_bertopic_representation(representation_model)
def topic_cluster_bert(self) -> None:
self.embeddings = self.sentence_model.encode(self.keyword_df[self.text_col], show_progress_bar=True)
self.topic_model = BERTopic(representation_model=self.representation_model,
embedding_model=self.sentence_model,
n_gram_range=(1, 3), top_n_words=2)
topics, _ = self.topic_model.fit_transform(self.keyword_df[self.text_col])
topic_labels = self.topic_model.generate_topic_labels(nr_words=1, topic_prefix=False)
if self.topic_model.get_topic_info()['Topic'].values[0] == -1:
topic_labels[0] = 'Unknown'
self.topic_model.set_topic_labels(topic_labels)
self.keyword_df['Topic'] = topics
topic_info = self.topic_model.get_topic_info()
topic_info['Name'] = topic_labels
self.keyword_df = pd.merge(topic_info, self.keyword_df, on=['Topic'])
self.keyword_df.rename(columns={'Name': 'Topic Name'}, inplace=True)
self.keyword_df.drop(columns=['CustomName'], inplace=True)
self.topic_names = topic_labels
def visualize_documents(self, n_neighbors) -> go.Figure:
reduced_embeddings = UMAP(n_neighbors=n_neighbors, n_components=2, min_dist=0.0, metric='cosine').fit_transform(
self.embeddings)
fig = self.topic_model.visualize_documents(self.keyword_df[self.text_col],
reduced_embeddings=reduced_embeddings,
custom_labels=True)
return fig
def visualize_topic_distribution(self) -> go.Figure:
fig = self.topic_model.visualize_barchart(custom_labels=True, top_n_topics=5, n_words=20,
title='Topic Distribution')
return fig
def update_topic_names(self):
for k in self.topic_name_mapping:
self.keyword_df['Topic Name'][self.keyword_df['Topic Name'] == k] = self.topic_name_mapping[k]
self.topic_names = self.topic_name_mapping.values()
self.topic_name_mapping = {}
def get_df_in_google_ads_format(self, campaign_name):
keyword_df_google_ads = pd.DataFrame(
columns=['Action', 'Keyword status', 'Campaign', 'Ad group', 'Keyword', 'Match Type'])
keyword_df_google_ads['Ad group'] = self.keyword_df['Topic Name']
keyword_df_google_ads['Keyword'] = self.keyword_df[self.text_col]
keyword_df_google_ads['Match Type'] = 'Phrase'
keyword_df_google_ads['Action'] = 'Add'
keyword_df_google_ads['Keyword status'] = 'Enabled'
keyword_df_google_ads['Campaign'] = campaign_name
return keyword_df_google_ads