Spaces:

MarMont
/

MARITESS

Sleeping

MarMont commited on Oct 15, 2023

Commit

802e30e

1 Parent(s): 3d86f74

try bertopic

Files changed (1) hide show

app.py CHANGED Viewed

@@ -449,7 +449,7 @@ def compute_coherence_value_bertopic(topic_model):
     return coherence_score
-def base_bertopic():
     df['lemma_tokens_string'] = df['lemma_tokens'].apply(lambda x: ' '.join(x))
     global id2word
     id2word = Dictionary(df['lemma_tokens'])
@@ -472,7 +472,7 @@ def base_bertopic():
     except:
         print('Unable to generate meaningful topics (Base BERTopic model)')
-def optimized_bertopic():
     vectorizer_model = CountVectorizer(max_features=1_000, stop_words="english")
     optimized_topic_model = BERTopic(umap_model=umap_model,
             language="multilingual",
@@ -505,6 +505,7 @@ def optimized_bertopic():
             tweets.append(df.loc[index, 'original_tweets'])
             print(tweets)
         top_tweets.append(tweets)
 global examples
@@ -536,8 +537,8 @@ def main(dataset, model, progress=gr.Progress(track_tqdm=True)):
         print('done lda')
         place_data = 'test'
     else:
-        base_bertopic()
-        optimized_bertopic()
     print('doing topic summarization')
     headlines = topic_summarization(top_tweets)

     return coherence_score
+def base_bertopic(df):
     df['lemma_tokens_string'] = df['lemma_tokens'].apply(lambda x: ' '.join(x))
     global id2word
     id2word = Dictionary(df['lemma_tokens'])
     except:
         print('Unable to generate meaningful topics (Base BERTopic model)')
+def optimized_bertopic(df):
     vectorizer_model = CountVectorizer(max_features=1_000, stop_words="english")
     optimized_topic_model = BERTopic(umap_model=umap_model,
             language="multilingual",
             tweets.append(df.loc[index, 'original_tweets'])
             print(tweets)
         top_tweets.append(tweets)
+    return top_tweets
 global examples
         print('done lda')
         place_data = 'test'
     else:
+        base_bertopic(df)
+        top_tweets = optimized_bertopic()
     print('doing topic summarization')
     headlines = topic_summarization(top_tweets)