MarMont commited on
Commit
802e30e
·
1 Parent(s): 3d86f74

try bertopic

Browse files
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -449,7 +449,7 @@ def compute_coherence_value_bertopic(topic_model):
449
 
450
  return coherence_score
451
 
452
- def base_bertopic():
453
  df['lemma_tokens_string'] = df['lemma_tokens'].apply(lambda x: ' '.join(x))
454
  global id2word
455
  id2word = Dictionary(df['lemma_tokens'])
@@ -472,7 +472,7 @@ def base_bertopic():
472
  except:
473
  print('Unable to generate meaningful topics (Base BERTopic model)')
474
 
475
- def optimized_bertopic():
476
  vectorizer_model = CountVectorizer(max_features=1_000, stop_words="english")
477
  optimized_topic_model = BERTopic(umap_model=umap_model,
478
  language="multilingual",
@@ -505,6 +505,7 @@ def optimized_bertopic():
505
  tweets.append(df.loc[index, 'original_tweets'])
506
  print(tweets)
507
  top_tweets.append(tweets)
 
508
 
509
  global examples
510
 
@@ -536,8 +537,8 @@ def main(dataset, model, progress=gr.Progress(track_tqdm=True)):
536
  print('done lda')
537
  place_data = 'test'
538
  else:
539
- base_bertopic()
540
- optimized_bertopic()
541
 
542
  print('doing topic summarization')
543
  headlines = topic_summarization(top_tweets)
 
449
 
450
  return coherence_score
451
 
452
+ def base_bertopic(df):
453
  df['lemma_tokens_string'] = df['lemma_tokens'].apply(lambda x: ' '.join(x))
454
  global id2word
455
  id2word = Dictionary(df['lemma_tokens'])
 
472
  except:
473
  print('Unable to generate meaningful topics (Base BERTopic model)')
474
 
475
+ def optimized_bertopic(df):
476
  vectorizer_model = CountVectorizer(max_features=1_000, stop_words="english")
477
  optimized_topic_model = BERTopic(umap_model=umap_model,
478
  language="multilingual",
 
505
  tweets.append(df.loc[index, 'original_tweets'])
506
  print(tweets)
507
  top_tweets.append(tweets)
508
+ return top_tweets
509
 
510
  global examples
511
 
 
537
  print('done lda')
538
  place_data = 'test'
539
  else:
540
+ base_bertopic(df)
541
+ top_tweets = optimized_bertopic()
542
 
543
  print('doing topic summarization')
544
  headlines = topic_summarization(top_tweets)