openfree commited on
Commit
2134133
·
verified ·
1 Parent(s): 6efc05e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +148 -168
app.py CHANGED
@@ -12,20 +12,31 @@ from nltk.sentiment import SentimentIntensityAnalyzer
12
  from sklearn.cluster import KMeans
13
  import torch
14
 
 
15
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
16
 
17
  # Initialize WBGDocTopic
18
  clf = wbgtopic.WBGDocTopic(device=device)
19
 
 
20
  try:
21
  nltk.download('punkt', quiet=True)
22
  nltk.download('vader_lexicon', quiet=True)
23
- except:
24
- pass
25
 
26
- SAMPLE_TEXT = """Your sample text here ..."""
 
 
 
 
27
 
28
  def safe_process(func):
 
 
 
 
 
29
  def wrapper(*args, **kwargs):
30
  try:
31
  return func(*args, **kwargs)
@@ -34,176 +45,144 @@ def safe_process(func):
34
  return None
35
  return wrapper
36
 
37
-
38
- ################################################################
39
- # 1) Convert Raw Results into a Consistent Format #
40
- ################################################################
41
-
42
  @safe_process
43
  def parse_wbg_results(raw_output):
44
  """
45
- Example: raw_output might be something like:
46
- [
47
- { 'Innovation and Entrepreneurship': 0.32,
48
- 'Digital Development': 0.27,
49
- ...}
50
- ]
51
- or it might be [ [ {...}, {...} ] ]
52
-
53
- Adjust logic so we end up with a list of dicts:
54
- [
55
- {'label': 'Innovation and Entrepreneurship', 'score_mean': 0.32, 'score_std': 0.0},
56
- {'label': 'Digital Development', 'score_mean': 0.27, 'score_std': 0.0},
57
- ...
58
- ]
59
  """
60
  if not raw_output:
61
  return []
62
-
63
- # If the library returns a list with a single dictionary:
64
- # raw_output[0] might be a dict of {topic: score}
65
- # or it might be a list of dicts with 'label'/'score_mean' keys
66
  first_item = raw_output[0]
67
-
68
- # If it's already a list of dicts with 'label', 'score_mean', etc.
69
- if isinstance(first_item, dict) and 'label' in first_item:
70
- # Possibly we already have the correct format
71
  return raw_output
72
-
73
- # If it's a dict of {topic_label: numeric_score}
74
  if isinstance(first_item, dict):
75
- # Then let's convert it
76
  parsed_list = []
77
  for label, val in first_item.items():
78
  parsed_list.append({
79
- 'label': label,
80
- 'score_mean': float(val),
81
- 'score_std': 0.0 # If no std is given, default 0
82
  })
83
  return parsed_list
84
-
85
- # If it’s something else, handle it
86
- return []
87
 
88
-
89
- ################################################################
90
- # 2) Section-based Analysis #
91
- ################################################################
92
 
93
  @safe_process
94
  def analyze_text_sections(text):
95
  """
96
- Splits text into sections, calls clf.suggest_topics on each,
97
- and returns a list-of-lists:
98
- section_topics = [
99
- [ {'label':'...', 'score_mean':...}, {...} ],
100
- [ {'label':'...', 'score_mean':...}, {...} ],
101
- ...
102
- ]
103
  """
104
  sentences = sent_tokenize(text)
105
- # e.g. group 3 sentences per section
106
  sections = [' '.join(sentences[i:i+3]) for i in range(0, len(sentences), 3)]
107
-
108
  section_topics = []
109
  for section in sections:
110
  raw_sec = clf.suggest_topics(section)
111
  parsed_sec = parse_wbg_results(raw_sec)
112
  section_topics.append(parsed_sec)
113
-
114
- return section_topics
115
 
116
-
117
- ################################################################
118
- # 3) Basic Summaries (Correlation, Sentiment, Clusters etc.) #
119
- ################################################################
120
 
121
  @safe_process
122
  def calculate_topic_correlations(topic_dicts):
123
  """
124
- If we only want a single dimension correlation (like score_mean),
125
- we can do a simple correlation across different topics.
126
- But typically you'd want multiple texts or some multi-dimensional approach.
 
127
  """
128
  if len(topic_dicts) < 2:
129
- # Not enough to do correlation
130
  return np.array([[1.0]]), ["Insufficient topics"]
131
-
132
  labels = [d['label'] for d in topic_dicts]
133
- scores = [d['score_mean'] for d in topic_dicts] # single dimension
134
-
135
  if len(scores) < 2:
136
  return np.array([[1.0]]), ["Insufficient topics"]
137
-
138
  corr_matrix = np.corrcoef(scores)
139
  return corr_matrix, labels
140
 
141
-
142
  @safe_process
143
  def perform_sentiment_analysis(text):
 
 
 
 
 
144
  sia = SentimentIntensityAnalyzer()
145
  sents = sent_tokenize(text)
146
  results = [sia.polarity_scores(s) for s in sents]
147
  return pd.DataFrame(results)
148
 
149
-
150
  @safe_process
151
  def create_topic_clusters(topic_dicts):
 
 
 
 
152
  if len(topic_dicts) < 3:
153
- return [0]*len(topic_dicts) # trivial cluster
154
-
155
- # Must have 'score_mean' and 'score_std' or something else
156
  X = []
157
  for t in topic_dicts:
158
  X.append([t['score_mean'], t.get('score_std', 0.0)])
159
-
160
  X = np.array(X)
161
  if X.shape[0] < 3:
162
- return [0]*X.shape[0]
163
-
164
  kmeans = KMeans(n_clusters=min(3, X.shape[0]), random_state=42)
165
  clusters = kmeans.fit_predict(X)
166
- return clusters.tolist() # safe to JSON-encode
167
-
168
-
169
- ################################################################
170
- # 4) Charts (Bar, Radar, Correlation Heatmap, etc.) #
171
- ################################################################
172
 
173
  @safe_process
174
  def create_main_charts(topic_dicts):
175
  """
176
- Expects a list of dicts with keys: 'label', 'score_mean', ...
177
- We'll just use 'score_mean' (or a scaled version).
178
  """
179
  if not topic_dicts:
180
  return go.Figure(), go.Figure()
181
-
182
- # Bar chart
183
  labels = [t['label'] for t in topic_dicts]
184
- scores = [t['score_mean']*100 for t in topic_dicts] # convert to %
185
-
 
186
  bar_fig = go.Figure(
187
  data=[go.Bar(x=labels, y=scores, marker_color='rgb(55, 83, 109)')]
188
  )
189
  bar_fig.update_layout(
190
- title='주제 분석 결과',
191
- xaxis_title='주제',
192
- yaxis_title='관련도 (%)',
193
  template='plotly_white',
194
  height=500,
195
  )
196
-
197
  # Radar chart
198
  radar_fig = go.Figure()
199
  radar_fig.add_trace(go.Scatterpolar(
200
  r=scores,
201
  theta=labels,
202
  fill='toself',
203
- name='주제 분포'
204
  ))
205
  radar_fig.update_layout(
206
- title='주제 레이더 차트',
207
  template='plotly_white',
208
  height=500,
209
  polar=dict(radialaxis=dict(visible=True)),
@@ -211,19 +190,22 @@ def create_main_charts(topic_dicts):
211
  )
212
  return bar_fig, radar_fig
213
 
214
-
215
  @safe_process
216
  def create_correlation_heatmap(corr_matrix, labels):
 
 
 
 
217
  if corr_matrix.ndim == 0:
218
  # It's a scalar => shape ()
219
  corr_matrix = np.array([[corr_matrix]])
220
-
221
- if corr_matrix.shape == (1,1):
222
- # Usually means not enough data
223
  fig = go.Figure()
224
  fig.add_annotation(text="Not enough topics for correlation", showarrow=False)
225
  return fig
226
-
227
  fig = go.Figure(data=go.Heatmap(
228
  z=corr_matrix,
229
  x=labels,
@@ -231,78 +213,75 @@ def create_correlation_heatmap(corr_matrix, labels):
231
  colorscale='Viridis'
232
  ))
233
  fig.update_layout(
234
- title='주제 상관관계',
235
  height=500,
236
  template='plotly_white'
237
  )
238
  return fig
239
 
240
-
241
  @safe_process
242
  def create_topic_evolution(section_topics):
243
  """
244
- section_topics: list of [ {label:..., score_mean:...}, ...]
245
- one element per section
 
246
  """
247
  fig = go.Figure()
248
  if not section_topics or len(section_topics) == 0:
249
  return fig
250
-
251
- # Take the first section’s list as reference
252
  if not section_topics[0]:
253
  return fig
254
-
255
- # For each topic in the first section, gather its evolution
256
  for topic_dict in section_topics[0]:
257
  label = topic_dict['label']
258
  score_list = []
259
  for sec_list in section_topics:
260
- # find matching label
261
  match = next((d for d in sec_list if d['label'] == label), None)
262
  if match:
263
  score_list.append(match['score_mean'])
264
  else:
265
  score_list.append(0.0)
266
-
267
  fig.add_trace(go.Scatter(
268
  x=list(range(len(section_topics))),
269
  y=score_list,
270
  name=label,
271
  mode='lines+markers'
272
  ))
273
-
274
  fig.update_layout(
275
- title='주제 변화 추이',
276
- xaxis_title='섹션',
277
- yaxis_title='score_mean',
278
  height=500,
279
  template='plotly_white'
280
  )
281
  return fig
282
 
283
-
284
  @safe_process
285
  def create_confidence_gauge(topic_dicts):
286
  """
287
- If your data doesn’t actually have a separate confidence measure,
288
- you may skip or adapt. For example, you might define confidence
289
- = (1 - score_std)*100
290
  """
291
  if not topic_dicts:
292
  return go.Figure()
293
-
294
  fig = go.Figure()
295
  num_topics = len(topic_dicts)
296
-
297
  for i, t in enumerate(topic_dicts):
298
- confidence_val = 100.0*(1.0 - t.get('score_std', 0.0)) # an example
 
299
  fig.add_trace(go.Indicator(
300
  mode="gauge+number",
301
- value=confidence_val,
302
  title={'text': t['label']},
303
  domain={'row': 0, 'column': i}
304
  ))
305
-
306
  fig.update_layout(
307
  grid={'rows': 1, 'columns': num_topics},
308
  height=400,
@@ -310,45 +289,43 @@ def create_confidence_gauge(topic_dicts):
310
  )
311
  return fig
312
 
313
-
314
- ################################################################
315
- # 5) Putting Everything into `process_all_analysis` #
316
- ################################################################
317
-
318
  @spaces.GPU()
319
  def process_all_analysis(text):
 
 
 
 
320
  try:
321
- # 1) Suggest topics on the entire text
322
  raw_results = clf.suggest_topics(text)
323
- all_topics = parse_wbg_results(raw_results) # keep full list of dicts
324
-
325
- # 2) Top 5 (if you want to highlight them)
326
- # Sort by score_mean descending
327
  sorted_topics = sorted(all_topics, key=lambda x: x['score_mean'], reverse=True)
328
  top_topics = sorted_topics[:5]
329
-
330
- # 3) Section-based
331
- section_topics = analyze_text_sections(text) # list of lists
332
-
333
  # 4) Extra analyses
334
  corr_matrix, corr_labels = calculate_topic_correlations(all_topics)
335
  sentiments_df = perform_sentiment_analysis(text)
336
  clusters = create_topic_clusters(all_topics)
337
-
338
  # 5) Build charts
339
- bar_chart, radar_chart = create_main_charts(top_topics) # show top 5 on bar
340
  heatmap = create_correlation_heatmap(corr_matrix, corr_labels)
341
  evolution_chart = create_topic_evolution(section_topics)
342
  gauge_chart = create_confidence_gauge(top_topics)
343
-
344
- # 6) Prepare output for the JSON field
345
- # Make sure everything is JSON-serializable with string keys
346
  results = {
347
- "top_topics": top_topics, # list of dict
348
- "clusters": clusters, # list of ints
349
- "sentiments": sentiments_df.to_dict(orient="records"),
350
  }
351
-
 
352
  return (
353
  results, # JSON output
354
  bar_chart, # plot1
@@ -356,9 +333,9 @@ def process_all_analysis(text):
356
  heatmap, # plot3
357
  evolution_chart,# plot4
358
  gauge_chart, # plot5
359
- go.Figure() # plot6 (placeholder for sentiment plot, or skip)
360
  )
361
-
362
  except Exception as e:
363
  print(f"Analysis error: {str(e)}")
364
  empty_fig = go.Figure()
@@ -372,40 +349,43 @@ def process_all_analysis(text):
372
  empty_fig
373
  )
374
 
 
 
 
375
 
376
- ################################################################
377
- # 6) Gradio UI #
378
- ################################################################
 
 
 
379
 
380
- with gr.Blocks(title="고급 문서 주제 분석기") as demo:
381
- gr.Markdown("## 📊 고급 문서 주제 분석기")
382
-
383
  with gr.Row():
384
  text_input = gr.Textbox(
385
  value=SAMPLE_TEXT,
386
- label="분석할 텍스트",
387
  lines=8
388
  )
389
  with gr.Row():
390
- submit_btn = gr.Button("분석 시작", variant="primary")
391
-
392
  with gr.Tabs():
393
- with gr.TabItem("주요 분석"):
394
  with gr.Row():
395
- plot1 = gr.Plot(label="주제 분포")
396
- plot2 = gr.Plot(label="레이더 차트")
397
- with gr.TabItem("상세 분석"):
398
  with gr.Row():
399
- plot3 = gr.Plot(label="상관관계 히트맵")
400
- plot4 = gr.Plot(label="주제 변화 추이")
401
- with gr.TabItem("신뢰도 분석"):
402
- plot5 = gr.Plot(label="신뢰도 게이지")
403
- with gr.TabItem("감성 분석"):
404
- plot6 = gr.Plot(label="감성 분석 결과")
405
-
406
  with gr.Row():
407
- output_json = gr.JSON(label="상세 분석 결과")
408
-
409
  submit_btn.click(
410
  fn=process_all_analysis,
411
  inputs=[text_input],
@@ -417,6 +397,6 @@ if __name__ == "__main__":
417
  demo.launch(
418
  server_name="0.0.0.0",
419
  server_port=7860,
420
- share=False,
421
  debug=True
422
  )
 
12
  from sklearn.cluster import KMeans
13
  import torch
14
 
15
+ # Set GPU if available
16
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
17
 
18
  # Initialize WBGDocTopic
19
  clf = wbgtopic.WBGDocTopic(device=device)
20
 
21
+ # Download NLTK data if needed
22
  try:
23
  nltk.download('punkt', quiet=True)
24
  nltk.download('vader_lexicon', quiet=True)
25
+ except Exception as e:
26
+ print(f"NLTK data download error: {e}")
27
 
28
+ # Sample text for demonstration
29
+ SAMPLE_TEXT = """
30
+ The three reportedly discussed the Stargate Project, a large-scale AI initiative led by OpenAI, SoftBank, and U.S. software giant Oracle. The project aims to invest $500 billion over the next four years in building new AI infrastructure in the U.S. The U.S. government has shown a strong commitment to the initiative, with President Donald Trump personally announcing it at the White House the day after his inauguration last month. If Samsung participates, the project will lead to a Korea-U.S.-Japan AI alliance.
31
+ The AI sector requires massive investments and extensive resources, including advanced models, high-performance AI chips to power the models, and large-scale data centers to operate them. Nvidia and TSMC currently dominate the AI sector, but a partnership between Samsung, SoftBank, and OpenAI could pave the way for a competitive alternative.
32
+ """
33
 
34
  def safe_process(func):
35
+ """
36
+ A decorator that catches and logs exceptions inside a function,
37
+ returning None if an error occurs. This helps ensure that
38
+ the Gradio interface does not crash from unexpected exceptions.
39
+ """
40
  def wrapper(*args, **kwargs):
41
  try:
42
  return func(*args, **kwargs)
 
45
  return None
46
  return wrapper
47
 
 
 
 
 
 
48
  @safe_process
49
  def parse_wbg_results(raw_output):
50
  """
51
+ Convert the raw output from WBGDocTopic into a list of dictionaries with
52
+ 'label', 'score_mean', and 'score_std'. Adjust logic according to the
53
+ actual structure of raw_output.
 
 
 
 
 
 
 
 
 
 
 
54
  """
55
  if not raw_output:
56
  return []
57
+
58
+ # Example logic: If raw_output is something like:
59
+ # [ { "Innovation and Entrepreneurship": 0.74, "Digital Development": 0.65, ... } ]
60
+ # We'll parse it accordingly.
61
  first_item = raw_output[0]
62
+
63
+ # If the first item is already a dict with a 'label' key, it might already be in the right format
64
+ if isinstance(first_item, dict) and "label" in first_item:
 
65
  return raw_output
66
+
67
+ # If it's a dict containing topic -> score
68
  if isinstance(first_item, dict):
 
69
  parsed_list = []
70
  for label, val in first_item.items():
71
  parsed_list.append({
72
+ "label": label,
73
+ "score_mean": float(val),
74
+ "score_std": 0.0 # If std is not provided, default to 0
75
  })
76
  return parsed_list
 
 
 
77
 
78
+ return []
 
 
 
79
 
80
  @safe_process
81
  def analyze_text_sections(text):
82
  """
83
+ Splits the text into sections and calls clf.suggest_topics for each section.
84
+ Returns a list of topic lists, where each element is the parsed WBG result
85
+ for that section.
 
 
 
 
86
  """
87
  sentences = sent_tokenize(text)
88
+ # Example: group every 3 sentences into one section
89
  sections = [' '.join(sentences[i:i+3]) for i in range(0, len(sentences), 3)]
90
+
91
  section_topics = []
92
  for section in sections:
93
  raw_sec = clf.suggest_topics(section)
94
  parsed_sec = parse_wbg_results(raw_sec)
95
  section_topics.append(parsed_sec)
 
 
96
 
97
+ return section_topics
 
 
 
98
 
99
  @safe_process
100
  def calculate_topic_correlations(topic_dicts):
101
  """
102
+ Calculates correlation between topics based on 'score_mean'.
103
+ This is usually a single-dimensional correlation across different topics,
104
+ which can be conceptually limited, but shown here as an example.
105
+ Returns (corr_matrix, labels).
106
  """
107
  if len(topic_dicts) < 2:
 
108
  return np.array([[1.0]]), ["Insufficient topics"]
109
+
110
  labels = [d['label'] for d in topic_dicts]
111
+ scores = [d['score_mean'] for d in topic_dicts]
112
+
113
  if len(scores) < 2:
114
  return np.array([[1.0]]), ["Insufficient topics"]
115
+
116
  corr_matrix = np.corrcoef(scores)
117
  return corr_matrix, labels
118
 
 
119
  @safe_process
120
  def perform_sentiment_analysis(text):
121
+ """
122
+ Uses NLTK's VADER sentiment analyzer to produce sentiment scores
123
+ (neg, neu, pos, compound) for each sentence in the text.
124
+ Returns a pandas DataFrame of results.
125
+ """
126
  sia = SentimentIntensityAnalyzer()
127
  sents = sent_tokenize(text)
128
  results = [sia.polarity_scores(s) for s in sents]
129
  return pd.DataFrame(results)
130
 
 
131
  @safe_process
132
  def create_topic_clusters(topic_dicts):
133
+ """
134
+ Applies a KMeans clustering on (score_mean, score_std).
135
+ If there are fewer than 3 topics, returns trivial cluster assignments.
136
+ """
137
  if len(topic_dicts) < 3:
138
+ return [0] * len(topic_dicts)
139
+
 
140
  X = []
141
  for t in topic_dicts:
142
  X.append([t['score_mean'], t.get('score_std', 0.0)])
143
+
144
  X = np.array(X)
145
  if X.shape[0] < 3:
146
+ return [0] * X.shape[0]
147
+
148
  kmeans = KMeans(n_clusters=min(3, X.shape[0]), random_state=42)
149
  clusters = kmeans.fit_predict(X)
150
+ return clusters.tolist()
 
 
 
 
 
151
 
152
  @safe_process
153
  def create_main_charts(topic_dicts):
154
  """
155
+ Creates a bar chart and a radar chart for the given list of topics.
156
+ Uses 'score_mean' as the base score.
157
  """
158
  if not topic_dicts:
159
  return go.Figure(), go.Figure()
160
+
 
161
  labels = [t['label'] for t in topic_dicts]
162
+ scores = [t['score_mean'] * 100 for t in topic_dicts] # scale to %
163
+
164
+ # Bar chart
165
  bar_fig = go.Figure(
166
  data=[go.Bar(x=labels, y=scores, marker_color='rgb(55, 83, 109)')]
167
  )
168
  bar_fig.update_layout(
169
+ title='Topic Analysis Results',
170
+ xaxis_title='Topics',
171
+ yaxis_title='Relevance (%)',
172
  template='plotly_white',
173
  height=500,
174
  )
175
+
176
  # Radar chart
177
  radar_fig = go.Figure()
178
  radar_fig.add_trace(go.Scatterpolar(
179
  r=scores,
180
  theta=labels,
181
  fill='toself',
182
+ name='Topic Distribution'
183
  ))
184
  radar_fig.update_layout(
185
+ title='Topic Radar Chart',
186
  template='plotly_white',
187
  height=500,
188
  polar=dict(radialaxis=dict(visible=True)),
 
190
  )
191
  return bar_fig, radar_fig
192
 
 
193
  @safe_process
194
  def create_correlation_heatmap(corr_matrix, labels):
195
+ """
196
+ Creates a heatmap figure of the provided correlation matrix.
197
+ If there's insufficient data, shows a placeholder message.
198
+ """
199
  if corr_matrix.ndim == 0:
200
  # It's a scalar => shape ()
201
  corr_matrix = np.array([[corr_matrix]])
202
+
203
+ if corr_matrix.shape == (1, 1):
204
+ # Not enough data for correlation
205
  fig = go.Figure()
206
  fig.add_annotation(text="Not enough topics for correlation", showarrow=False)
207
  return fig
208
+
209
  fig = go.Figure(data=go.Heatmap(
210
  z=corr_matrix,
211
  x=labels,
 
213
  colorscale='Viridis'
214
  ))
215
  fig.update_layout(
216
+ title='Topic Correlation Heatmap',
217
  height=500,
218
  template='plotly_white'
219
  )
220
  return fig
221
 
 
222
  @safe_process
223
  def create_topic_evolution(section_topics):
224
  """
225
+ Plots topic evolution across sections.
226
+ section_topics: list of lists, where each inner list
227
+ is a list of dicts [{'label':..., 'score_mean':...}, ...]
228
  """
229
  fig = go.Figure()
230
  if not section_topics or len(section_topics) == 0:
231
  return fig
232
+
 
233
  if not section_topics[0]:
234
  return fig
235
+
236
+ # For each topic in the first section, track the score across all sections
237
  for topic_dict in section_topics[0]:
238
  label = topic_dict['label']
239
  score_list = []
240
  for sec_list in section_topics:
 
241
  match = next((d for d in sec_list if d['label'] == label), None)
242
  if match:
243
  score_list.append(match['score_mean'])
244
  else:
245
  score_list.append(0.0)
246
+
247
  fig.add_trace(go.Scatter(
248
  x=list(range(len(section_topics))),
249
  y=score_list,
250
  name=label,
251
  mode='lines+markers'
252
  ))
253
+
254
  fig.update_layout(
255
+ title='Topic Evolution Across Sections',
256
+ xaxis_title='Section',
257
+ yaxis_title='Score Mean',
258
  height=500,
259
  template='plotly_white'
260
  )
261
  return fig
262
 
 
263
  @safe_process
264
  def create_confidence_gauge(topic_dicts):
265
  """
266
+ Creates individual gauge indicators for each topic's confidence.
267
+ A simple heuristic: confidence = (1 - score_std) * 100.
 
268
  """
269
  if not topic_dicts:
270
  return go.Figure()
271
+
272
  fig = go.Figure()
273
  num_topics = len(topic_dicts)
274
+
275
  for i, t in enumerate(topic_dicts):
276
+ # If score_std not present, default to 0 => confidence = 100%
277
+ conf_val = 100.0 * (1.0 - t.get("score_std", 0.0))
278
  fig.add_trace(go.Indicator(
279
  mode="gauge+number",
280
+ value=conf_val,
281
  title={'text': t['label']},
282
  domain={'row': 0, 'column': i}
283
  ))
284
+
285
  fig.update_layout(
286
  grid={'rows': 1, 'columns': num_topics},
287
  height=400,
 
289
  )
290
  return fig
291
 
 
 
 
 
 
292
  @spaces.GPU()
293
  def process_all_analysis(text):
294
+ """
295
+ Main function that calls all analysis steps and returns
296
+ structured JSON plus various Plotly figures.
297
+ """
298
  try:
299
+ # 1) Suggest topics for the entire text
300
  raw_results = clf.suggest_topics(text)
301
+ all_topics = parse_wbg_results(raw_results)
302
+
303
+ # 2) Sort by 'score_mean' descending to get top 5
 
304
  sorted_topics = sorted(all_topics, key=lambda x: x['score_mean'], reverse=True)
305
  top_topics = sorted_topics[:5]
306
+
307
+ # 3) Analyze by sections
308
+ section_topics = analyze_text_sections(text)
309
+
310
  # 4) Extra analyses
311
  corr_matrix, corr_labels = calculate_topic_correlations(all_topics)
312
  sentiments_df = perform_sentiment_analysis(text)
313
  clusters = create_topic_clusters(all_topics)
314
+
315
  # 5) Build charts
316
+ bar_chart, radar_chart = create_main_charts(top_topics)
317
  heatmap = create_correlation_heatmap(corr_matrix, corr_labels)
318
  evolution_chart = create_topic_evolution(section_topics)
319
  gauge_chart = create_confidence_gauge(top_topics)
320
+
321
+ # 6) Prepare JSON output (ensure valid JSON with string keys)
 
322
  results = {
323
+ "top_topics": top_topics, # list of dict
324
+ "clusters": clusters, # list of ints
325
+ "sentiments": sentiments_df.to_dict(orient="records")
326
  }
327
+
328
+ # Return JSON + Figures
329
  return (
330
  results, # JSON output
331
  bar_chart, # plot1
 
333
  heatmap, # plot3
334
  evolution_chart,# plot4
335
  gauge_chart, # plot5
336
+ go.Figure() # plot6 (placeholder for sentiment plot, if desired)
337
  )
338
+
339
  except Exception as e:
340
  print(f"Analysis error: {str(e)}")
341
  empty_fig = go.Figure()
 
349
  empty_fig
350
  )
351
 
352
+ ######################################################
353
+ # Gradio UI Definition #
354
+ ######################################################
355
 
356
+ with gr.Blocks(title="Advanced Document Topic Analyzer") as demo:
357
+ gr.Markdown("## 📝 Advanced Document Topic Analyzer")
358
+ gr.Markdown(
359
+ "Enter text, then click 'Start Analysis' to see topic analysis, correlation, "
360
+ "confidence gauges, sentiment, and more."
361
+ )
362
 
 
 
 
363
  with gr.Row():
364
  text_input = gr.Textbox(
365
  value=SAMPLE_TEXT,
366
+ label="Text to Analyze",
367
  lines=8
368
  )
369
  with gr.Row():
370
+ submit_btn = gr.Button("Start Analysis", variant="primary")
371
+
372
  with gr.Tabs():
373
+ with gr.TabItem("Main Analysis"):
374
  with gr.Row():
375
+ plot1 = gr.Plot(label="Topic Distribution")
376
+ plot2 = gr.Plot(label="Radar Chart")
377
+ with gr.TabItem("Detailed Analysis"):
378
  with gr.Row():
379
+ plot3 = gr.Plot(label="Correlation Heatmap")
380
+ plot4 = gr.Plot(label="Topic Evolution")
381
+ with gr.TabItem("Confidence Analysis"):
382
+ plot5 = gr.Plot(label="Confidence Gauge")
383
+ with gr.TabItem("Sentiment Analysis"):
384
+ plot6 = gr.Plot(label="Sentiment Results")
385
+
386
  with gr.Row():
387
+ output_json = gr.JSON(label="Detailed Analysis Output")
388
+
389
  submit_btn.click(
390
  fn=process_all_analysis,
391
  inputs=[text_input],
 
397
  demo.launch(
398
  server_name="0.0.0.0",
399
  server_port=7860,
400
+ share=False, # Set True if you want a public share link
401
  debug=True
402
  )