openfree commited on
Commit
5ad5437
ยท
verified ยท
1 Parent(s): a13171a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +136 -95
app.py CHANGED
@@ -12,20 +12,20 @@ from nltk.sentiment import SentimentIntensityAnalyzer
12
  from sklearn.cluster import KMeans
13
  import torch
14
 
15
- # Set GPU if available
16
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
17
 
18
- # Initialize WBGDocTopic
19
  clf = wbgtopic.WBGDocTopic(device=device)
20
 
21
- # Download NLTK data if needed
22
  try:
23
  nltk.download('punkt', quiet=True)
24
  nltk.download('vader_lexicon', quiet=True)
25
  except Exception as e:
26
- print(f"NLTK data download error: {e}")
27
 
28
- # Sample text for demonstration
29
  SAMPLE_TEXT = """
30
  The three reportedly discussed the Stargate Project, a large-scale AI initiative led by OpenAI, SoftBank, and U.S. software giant Oracle. The project aims to invest $500 billion over the next four years in building new AI infrastructure in the U.S. The U.S. government has shown a strong commitment to the initiative, with President Donald Trump personally announcing it at the White House the day after his inauguration last month. If Samsung participates, the project will lead to a Korea-U.S.-Japan AI alliance.
31
  The AI sector requires massive investments and extensive resources, including advanced models, high-performance AI chips to power the models, and large-scale data centers to operate them. Nvidia and TSMC currently dominate the AI sector, but a partnership between Samsung, SoftBank, and OpenAI could pave the way for a competitive alternative.
@@ -33,9 +33,8 @@ The AI sector requires massive investments and extensive resources, including ad
33
 
34
  def safe_process(func):
35
  """
36
- A decorator that catches and logs exceptions inside a function,
37
- returning None if an error occurs. This helps ensure that
38
- the Gradio interface does not crash from unexpected exceptions.
39
  """
40
  def wrapper(*args, **kwargs):
41
  try:
@@ -48,44 +47,89 @@ def safe_process(func):
48
  @safe_process
49
  def parse_wbg_results(raw_output):
50
  """
51
- Convert the raw output from WBGDocTopic into a list of dictionaries with
52
- 'label', 'score_mean', and 'score_std'. Adjust logic according to the
53
- actual structure of raw_output.
 
 
 
 
 
 
 
 
 
54
  """
 
 
 
 
55
  if not raw_output:
56
  return []
57
 
58
- # Example logic: If raw_output is something like:
59
- # [ { "Innovation and Entrepreneurship": 0.74, "Digital Development": 0.65, ... } ]
60
- # We'll parse it accordingly.
61
  first_item = raw_output[0]
62
 
63
- # If the first item is already a dict with a 'label' key, it might already be in the right format
64
- if isinstance(first_item, dict) and "label" in first_item:
65
- return raw_output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
- # If it's a dict containing topic -> score
 
 
 
 
 
 
 
 
68
  if isinstance(first_item, dict):
 
 
 
 
 
 
 
 
 
69
  parsed_list = []
70
- for label, val in first_item.items():
71
  parsed_list.append({
72
  "label": label,
73
  "score_mean": float(val),
74
- "score_std": 0.0 # If std is not provided, default to 0
75
  })
76
  return parsed_list
77
 
 
78
  return []
79
 
80
  @safe_process
81
  def analyze_text_sections(text):
82
  """
83
- Splits the text into sections and calls clf.suggest_topics for each section.
84
- Returns a list of topic lists, where each element is the parsed WBG result
85
- for that section.
86
  """
87
  sentences = sent_tokenize(text)
88
- # Example: group every 3 sentences into one section
89
  sections = [' '.join(sentences[i:i+3]) for i in range(0, len(sentences), 3)]
90
 
91
  section_topics = []
@@ -99,10 +143,11 @@ def analyze_text_sections(text):
99
  @safe_process
100
  def calculate_topic_correlations(topic_dicts):
101
  """
102
- Calculates correlation between topics based on 'score_mean'.
103
- This is usually a single-dimensional correlation across different topics,
104
- which can be conceptually limited, but shown here as an example.
105
- Returns (corr_matrix, labels).
 
106
  """
107
  if len(topic_dicts) < 2:
108
  return np.array([[1.0]]), ["Insufficient topics"]
@@ -119,9 +164,8 @@ def calculate_topic_correlations(topic_dicts):
119
  @safe_process
120
  def perform_sentiment_analysis(text):
121
  """
122
- Uses NLTK's VADER sentiment analyzer to produce sentiment scores
123
- (neg, neu, pos, compound) for each sentence in the text.
124
- Returns a pandas DataFrame of results.
125
  """
126
  sia = SentimentIntensityAnalyzer()
127
  sents = sent_tokenize(text)
@@ -131,8 +175,8 @@ def perform_sentiment_analysis(text):
131
  @safe_process
132
  def create_topic_clusters(topic_dicts):
133
  """
134
- Applies a KMeans clustering on (score_mean, score_std).
135
- If there are fewer than 3 topics, returns trivial cluster assignments.
136
  """
137
  if len(topic_dicts) < 3:
138
  return [0] * len(topic_dicts)
@@ -152,37 +196,37 @@ def create_topic_clusters(topic_dicts):
152
  @safe_process
153
  def create_main_charts(topic_dicts):
154
  """
155
- Creates a bar chart and a radar chart for the given list of topics.
156
- Uses 'score_mean' as the base score.
157
  """
158
  if not topic_dicts:
159
  return go.Figure(), go.Figure()
160
 
161
  labels = [t['label'] for t in topic_dicts]
162
- scores = [t['score_mean'] * 100 for t in topic_dicts] # scale to %
163
 
164
- # Bar chart
165
  bar_fig = go.Figure(
166
  data=[go.Bar(x=labels, y=scores, marker_color='rgb(55, 83, 109)')]
167
  )
168
  bar_fig.update_layout(
169
- title='Topic Analysis Results',
170
- xaxis_title='Topics',
171
- yaxis_title='Relevance (%)',
172
  template='plotly_white',
173
  height=500,
174
  )
175
 
176
- # Radar chart
177
  radar_fig = go.Figure()
178
  radar_fig.add_trace(go.Scatterpolar(
179
  r=scores,
180
  theta=labels,
181
  fill='toself',
182
- name='Topic Distribution'
183
  ))
184
  radar_fig.update_layout(
185
- title='Topic Radar Chart',
186
  template='plotly_white',
187
  height=500,
188
  polar=dict(radialaxis=dict(visible=True)),
@@ -193,15 +237,15 @@ def create_main_charts(topic_dicts):
193
  @safe_process
194
  def create_correlation_heatmap(corr_matrix, labels):
195
  """
196
- Creates a heatmap figure of the provided correlation matrix.
197
- If there's insufficient data, shows a placeholder message.
198
  """
199
  if corr_matrix.ndim == 0:
200
- # It's a scalar => shape ()
201
  corr_matrix = np.array([[corr_matrix]])
202
 
203
  if corr_matrix.shape == (1, 1):
204
- # Not enough data for correlation
205
  fig = go.Figure()
206
  fig.add_annotation(text="Not enough topics for correlation", showarrow=False)
207
  return fig
@@ -213,7 +257,7 @@ def create_correlation_heatmap(corr_matrix, labels):
213
  colorscale='Viridis'
214
  ))
215
  fig.update_layout(
216
- title='Topic Correlation Heatmap',
217
  height=500,
218
  template='plotly_white'
219
  )
@@ -222,9 +266,8 @@ def create_correlation_heatmap(corr_matrix, labels):
222
  @safe_process
223
  def create_topic_evolution(section_topics):
224
  """
225
- Plots topic evolution across sections.
226
- section_topics: list of lists, where each inner list
227
- is a list of dicts [{'label':..., 'score_mean':...}, ...]
228
  """
229
  fig = go.Figure()
230
  if not section_topics or len(section_topics) == 0:
@@ -233,7 +276,7 @@ def create_topic_evolution(section_topics):
233
  if not section_topics[0]:
234
  return fig
235
 
236
- # For each topic in the first section, track the score across all sections
237
  for topic_dict in section_topics[0]:
238
  label = topic_dict['label']
239
  score_list = []
@@ -252,9 +295,9 @@ def create_topic_evolution(section_topics):
252
  ))
253
 
254
  fig.update_layout(
255
- title='Topic Evolution Across Sections',
256
- xaxis_title='Section',
257
- yaxis_title='Score Mean',
258
  height=500,
259
  template='plotly_white'
260
  )
@@ -263,8 +306,8 @@ def create_topic_evolution(section_topics):
263
  @safe_process
264
  def create_confidence_gauge(topic_dicts):
265
  """
266
- Creates individual gauge indicators for each topic's confidence.
267
- A simple heuristic: confidence = (1 - score_std) * 100.
268
  """
269
  if not topic_dicts:
270
  return go.Figure()
@@ -273,7 +316,6 @@ def create_confidence_gauge(topic_dicts):
273
  num_topics = len(topic_dicts)
274
 
275
  for i, t in enumerate(topic_dicts):
276
- # If score_std not present, default to 0 => confidence = 100%
277
  conf_val = 100.0 * (1.0 - t.get("score_std", 0.0))
278
  fig.add_trace(go.Indicator(
279
  mode="gauge+number",
@@ -292,48 +334,47 @@ def create_confidence_gauge(topic_dicts):
292
  @spaces.GPU()
293
  def process_all_analysis(text):
294
  """
295
- Main function that calls all analysis steps and returns
296
- structured JSON plus various Plotly figures.
297
  """
298
  try:
299
- # 1) Suggest topics for the entire text
300
  raw_results = clf.suggest_topics(text)
301
  all_topics = parse_wbg_results(raw_results)
302
 
303
- # 2) Sort by 'score_mean' descending to get top 5
304
  sorted_topics = sorted(all_topics, key=lambda x: x['score_mean'], reverse=True)
305
  top_topics = sorted_topics[:5]
306
 
307
- # 3) Analyze by sections
308
  section_topics = analyze_text_sections(text)
309
 
310
- # 4) Extra analyses
311
  corr_matrix, corr_labels = calculate_topic_correlations(all_topics)
312
  sentiments_df = perform_sentiment_analysis(text)
313
  clusters = create_topic_clusters(all_topics)
314
 
315
- # 5) Build charts
316
  bar_chart, radar_chart = create_main_charts(top_topics)
317
  heatmap = create_correlation_heatmap(corr_matrix, corr_labels)
318
  evolution_chart = create_topic_evolution(section_topics)
319
  gauge_chart = create_confidence_gauge(top_topics)
320
 
321
- # 6) Prepare JSON output (ensure valid JSON with string keys)
322
  results = {
323
- "top_topics": top_topics, # list of dict
324
- "clusters": clusters, # list of ints
325
- "sentiments": sentiments_df.to_dict(orient="records")
326
  }
327
 
328
- # Return JSON + Figures
329
  return (
330
- results, # JSON output
331
- bar_chart, # plot1
332
- radar_chart, # plot2
333
- heatmap, # plot3
334
- evolution_chart,# plot4
335
- gauge_chart, # plot5
336
- go.Figure() # plot6 (placeholder for sentiment plot, if desired)
337
  )
338
 
339
  except Exception as e:
@@ -353,38 +394,38 @@ def process_all_analysis(text):
353
  # Gradio UI Definition #
354
  ######################################################
355
 
356
- with gr.Blocks(title="Advanced Document Topic Analyzer") as demo:
357
- gr.Markdown("## ๐Ÿ“ Advanced Document Topic Analyzer")
358
  gr.Markdown(
359
- "Enter text, then click 'Start Analysis' to see topic analysis, correlation, "
360
- "confidence gauges, sentiment, and more."
361
  )
362
 
363
  with gr.Row():
364
  text_input = gr.Textbox(
365
  value=SAMPLE_TEXT,
366
- label="Text to Analyze",
367
  lines=8
368
  )
369
  with gr.Row():
370
- submit_btn = gr.Button("Start Analysis", variant="primary")
371
 
372
  with gr.Tabs():
373
- with gr.TabItem("Main Analysis"):
374
  with gr.Row():
375
- plot1 = gr.Plot(label="Topic Distribution")
376
- plot2 = gr.Plot(label="Radar Chart")
377
- with gr.TabItem("Detailed Analysis"):
378
  with gr.Row():
379
- plot3 = gr.Plot(label="Correlation Heatmap")
380
- plot4 = gr.Plot(label="Topic Evolution")
381
- with gr.TabItem("Confidence Analysis"):
382
- plot5 = gr.Plot(label="Confidence Gauge")
383
- with gr.TabItem("Sentiment Analysis"):
384
- plot6 = gr.Plot(label="Sentiment Results")
385
 
386
  with gr.Row():
387
- output_json = gr.JSON(label="Detailed Analysis Output")
388
 
389
  submit_btn.click(
390
  fn=process_all_analysis,
@@ -397,6 +438,6 @@ if __name__ == "__main__":
397
  demo.launch(
398
  server_name="0.0.0.0",
399
  server_port=7860,
400
- share=False, # Set True if you want a public share link
401
  debug=True
402
  )
 
12
  from sklearn.cluster import KMeans
13
  import torch
14
 
15
+ # GPU ์‚ฌ์šฉ ๊ฐ€๋Šฅ ์‹œ ์„ค์ •
16
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
17
 
18
+ # WBGDocTopic ์ดˆ๊ธฐํ™”
19
  clf = wbgtopic.WBGDocTopic(device=device)
20
 
21
+ # NLTK ๋ฐ์ดํ„ฐ ๋‹ค์šด๋กœ๋“œ (ํ•„์š” ์‹œ)
22
  try:
23
  nltk.download('punkt', quiet=True)
24
  nltk.download('vader_lexicon', quiet=True)
25
  except Exception as e:
26
+ print(f"NLTK ๋ฐ์ดํ„ฐ ๋‹ค์šด๋กœ๋“œ ์—๋Ÿฌ: {e}")
27
 
28
+ # ์ƒ˜ํ”Œ ํ…์ŠคํŠธ
29
  SAMPLE_TEXT = """
30
  The three reportedly discussed the Stargate Project, a large-scale AI initiative led by OpenAI, SoftBank, and U.S. software giant Oracle. The project aims to invest $500 billion over the next four years in building new AI infrastructure in the U.S. The U.S. government has shown a strong commitment to the initiative, with President Donald Trump personally announcing it at the White House the day after his inauguration last month. If Samsung participates, the project will lead to a Korea-U.S.-Japan AI alliance.
31
  The AI sector requires massive investments and extensive resources, including advanced models, high-performance AI chips to power the models, and large-scale data centers to operate them. Nvidia and TSMC currently dominate the AI sector, but a partnership between Samsung, SoftBank, and OpenAI could pave the way for a competitive alternative.
 
33
 
34
  def safe_process(func):
35
  """
36
+ ์˜ˆ์™ธ ๋ฐœ์ƒ ์‹œ ๋กœ๊ทธ๋ฅผ ๋‚จ๊ธฐ๊ณ  None์„ ๋ฐ˜ํ™˜ํ•˜๋Š” ๋ฐ์ฝ”๋ ˆ์ดํ„ฐ.
37
+ Gradio ์ธํ„ฐํŽ˜์ด์Šค๊ฐ€ ์˜ˆ์™ธ๋กœ ์ธํ•ด ์ค‘๋‹จ๋˜์ง€ ์•Š๋„๋ก ๋„์™€์ค๋‹ˆ๋‹ค.
 
38
  """
39
  def wrapper(*args, **kwargs):
40
  try:
 
47
  @safe_process
48
  def parse_wbg_results(raw_output):
49
  """
50
+ wbgtopic.WBGDocTopic์˜ suggest_topics() ๊ฒฐ๊ณผ๋ฅผ
51
+ 'label', 'score_mean', 'score_std' ๊ตฌ์กฐ์˜ ๋ฆฌ์ŠคํŠธ๋กœ ํ†ต์ผํ•ด์„œ ๋ฐ˜ํ™˜ํ•œ๋‹ค.
52
+
53
+ ๋ฐ˜ํ™˜ ๊ตฌ์กฐ ์˜ˆ์‹œ:
54
+ [
55
+ {
56
+ "label": "Agriculture",
57
+ "score_mean": 0.32,
58
+ "score_std": 0.05
59
+ },
60
+ ...
61
+ ]
62
  """
63
+ # ๋””๋ฒ„๊ทธ: ์‹ค์ œ ๊ฒฐ๊ณผ ๊ตฌ์กฐ๋ฅผ ํ™•์ธํ•ด๋ณด์„ธ์š”
64
+ print(">>> DEBUG: raw_output =", raw_output)
65
+
66
+ # ๊ฒฐ๊ณผ๊ฐ€ ๋น„์—ˆ์œผ๋ฉด ๋นˆ ๋ฆฌ์ŠคํŠธ ๋ฐ˜ํ™˜
67
  if not raw_output:
68
  return []
69
 
 
 
 
70
  first_item = raw_output[0]
71
 
72
+ # (1) ์ด๋ฏธ 'label' ํ‚ค๊ฐ€ ์žˆ๋Š” ๋”•์…”๋„ˆ๋ฆฌ ํ˜•ํƒœ๋ผ๋ฉด
73
+ # ์˜ˆ: [{"label": "...", "score": ...}, ...] ํ˜น์€ {"label": "...", "score_mean": ...}
74
+ if isinstance(first_item, dict) and ("label" in first_item):
75
+ parsed_list = []
76
+ for item in raw_output:
77
+ label = item.get("label", "")
78
+ # score_mean / score_std๊ฐ€ ์ด๋ฏธ ์žˆ์œผ๋ฉด ์‚ฌ์šฉ
79
+ # ์—†์œผ๋ฉด score ๋“ฑ์—์„œ ์ถ”๋ก 
80
+ score_mean = item.get("score_mean", None)
81
+ score_std = item.get("score_std", None)
82
+
83
+ # ์˜ˆ: score๋งŒ ์žˆ๋Š” ๊ฒฝ์šฐ
84
+ if score_mean is None and "score" in item:
85
+ # ์ ์ˆ˜๊ฐ€ 0~1 ๋ฒ”์œ„์ธ์ง€ 0~100 ๋ฒ”์œ„์ธ์ง€ ํ™•์ธ ํ•„์š”
86
+ # ์šฐ์„  ๊ทธ๋Œ€๋กœ float ์ฒ˜๋ฆฌ
87
+ score_mean = float(item["score"])
88
+ if score_mean is None:
89
+ score_mean = 0.0
90
+
91
+ if score_std is None:
92
+ score_std = 0.0
93
 
94
+ parsed_list.append({
95
+ "label": label,
96
+ "score_mean": float(score_mean),
97
+ "score_std": float(score_std)
98
+ })
99
+ return parsed_list
100
+
101
+ # (2) ํ† ํ”ฝ ์ด๋ฆ„: ์ ์ˆ˜ ํ˜•ํƒœ์˜ ๋”•์…”๋„ˆ๋ฆฌ๊ฐ€ ์žˆ๋Š” ๊ฒฝ์šฐ
102
+ # ์˜ˆ: [{"Agriculture": 0.22, "Climate Change": 0.55}, ...]
103
  if isinstance(first_item, dict):
104
+ # raw_output๊ฐ€ ์—ฌ๋Ÿฌ dict๋ฅผ ๋‹ด๊ณ  ์žˆ์„ ์ˆ˜ ์žˆ์œผ๋ฏ€๋กœ, ํ•˜๋‚˜๋กœ ํ•ฉ์น˜๊ฑฐ๋‚˜
105
+ # ํ˜น์€ ์ฒซ ๋ฒˆ์งธ dict๋งŒ ํŒŒ์‹ฑํ• ์ง€ ๊ฒฐ์ •ํ•ด์•ผ ํ•จ.
106
+ # ์ผ๋‹จ ์—ฌ๊ธฐ์„œ๋Š” ํ•ฉ์น˜๋Š” ๋ฐฉ์‹์œผ๋กœ ์‹œ์—ฐ:
107
+ merged = {}
108
+ for d in raw_output:
109
+ for k, v in d.items():
110
+ # ํ‚ค ์ค‘๋ณต ์‹œ ๋งˆ์ง€๋ง‰ ๊ฐ’์œผ๋กœ overwrite
111
+ merged[k] = v
112
+
113
  parsed_list = []
114
+ for label, val in merged.items():
115
  parsed_list.append({
116
  "label": label,
117
  "score_mean": float(val),
118
+ "score_std": 0.0
119
  })
120
  return parsed_list
121
 
122
+ # ์˜ˆ์ƒ์น˜ ๋ชปํ•œ ๊ตฌ์กฐ์ธ ๊ฒฝ์šฐ
123
  return []
124
 
125
  @safe_process
126
  def analyze_text_sections(text):
127
  """
128
+ ํ…์ŠคํŠธ๋ฅผ ์—ฌ๋Ÿฌ ์„น์…˜(์˜ˆ: 3๋ฌธ์žฅ์”ฉ)์œผ๋กœ ๋‚˜๋ˆ„๊ณ ,
129
+ ๊ฐ ์„น์…˜๋ณ„๋กœ suggest_topics() ๊ฒฐ๊ณผ๋ฅผ parse_wbg_results()๋กœ ํŒŒ์‹ฑํ•ด ๋ฆฌ์ŠคํŠธ๋กœ ๋ชจ์€๋‹ค.
 
130
  """
131
  sentences = sent_tokenize(text)
132
+ # 3๋ฌธ์žฅ์”ฉ ๋ฌถ์–ด์„œ ํ•˜๋‚˜์˜ ์„น์…˜์„ ๊ตฌ์„ฑ
133
  sections = [' '.join(sentences[i:i+3]) for i in range(0, len(sentences), 3)]
134
 
135
  section_topics = []
 
143
  @safe_process
144
  def calculate_topic_correlations(topic_dicts):
145
  """
146
+ topic_dicts: [{'label': ..., 'score_mean': ..., 'score_std': ...}, ...]
147
+
148
+ ์ฃผ์ œ๋ณ„ score_mean๋งŒ ๋ฝ‘์•„์„œ ์ƒ๊ด€๊ด€๊ณ„๋ฅผ ๊ตฌํ•œ๋‹ค.
149
+ ์‹ค์ œ๋กœ๋Š” '์„œ๋กœ ๋‹ค๋ฅธ ๋ฌธ์„œ'๋“ค์— ๋Œ€ํ•œ ์ƒ๊ด€์„ ๊ตฌํ•˜๋Š” ๊ฒƒ์ด ํƒ€๋‹นํ•˜๋‚˜,
150
+ ์—ฌ๊ธฐ์„œ๋Š” ์˜ˆ์‹œ๋กœ ๋‹จ์ผ ํ…์ŠคํŠธ์˜ ์„œ๋กœ ๋‹ค๋ฅธ ํ† ํ”ฝ๋“ค ๊ฐ„ ์ ์ˆ˜ ์ƒ๊ด€๋„๋ฅผ ๊ณ„์‚ฐํ•œ๋‹ค.
151
  """
152
  if len(topic_dicts) < 2:
153
  return np.array([[1.0]]), ["Insufficient topics"]
 
164
  @safe_process
165
  def perform_sentiment_analysis(text):
166
  """
167
+ NLTK VADER๋ฅผ ์‚ฌ์šฉํ•ด ๋ฌธ์žฅ๋ณ„ ๊ฐ์„ฑ ์ ์ˆ˜๋ฅผ ๊ณ„์‚ฐํ•œ๋‹ค.
168
+ ๋ฐ˜ํ™˜๊ฐ’์€ pandas DataFrame ํ˜•์‹.
 
169
  """
170
  sia = SentimentIntensityAnalyzer()
171
  sents = sent_tokenize(text)
 
175
  @safe_process
176
  def create_topic_clusters(topic_dicts):
177
  """
178
+ score_mean, score_std 2์ฐจ์›์œผ๋กœ KMeans ํด๋Ÿฌ์Šคํ„ฐ๋ง.
179
+ ํ† ํ”ฝ ์ˆ˜๊ฐ€ 3๊ฐœ ๋ฏธ๋งŒ์ด๋ฉด trivially 0๋ฒˆ ํด๋Ÿฌ์Šคํ„ฐ๋กœ ์ฒ˜๋ฆฌ.
180
  """
181
  if len(topic_dicts) < 3:
182
  return [0] * len(topic_dicts)
 
196
  @safe_process
197
  def create_main_charts(topic_dicts):
198
  """
199
+ ๋ฐ” ์ฐจํŠธ์™€ ๋ ˆ์ด๋” ์ฐจํŠธ๋ฅผ ์ƒ์„ฑ.
200
+ 'score_mean'์„ 0~1๋กœ ๋ณด๊ณ , 100๋ฐฐ ํ•˜์—ฌ ํผ์„ผํŠธ๋กœ ์‹œ๊ฐํ™”.
201
  """
202
  if not topic_dicts:
203
  return go.Figure(), go.Figure()
204
 
205
  labels = [t['label'] for t in topic_dicts]
206
+ scores = [t['score_mean'] * 100 for t in topic_dicts]
207
 
208
+ # ๋ฐ” ์ฐจํŠธ
209
  bar_fig = go.Figure(
210
  data=[go.Bar(x=labels, y=scores, marker_color='rgb(55, 83, 109)')]
211
  )
212
  bar_fig.update_layout(
213
+ title='์ฃผ์ œ ๋ถ„์„ ๊ฒฐ๊ณผ',
214
+ xaxis_title='์ฃผ์ œ',
215
+ yaxis_title='๊ด€๋ จ๋„(%)',
216
  template='plotly_white',
217
  height=500,
218
  )
219
 
220
+ # ๋ ˆ์ด๋” ์ฐจํŠธ
221
  radar_fig = go.Figure()
222
  radar_fig.add_trace(go.Scatterpolar(
223
  r=scores,
224
  theta=labels,
225
  fill='toself',
226
+ name='์ฃผ์ œ ๋ถ„ํฌ'
227
  ))
228
  radar_fig.update_layout(
229
+ title='์ฃผ์ œ ๋ ˆ์ด๋” ์ฐจํŠธ',
230
  template='plotly_white',
231
  height=500,
232
  polar=dict(radialaxis=dict(visible=True)),
 
237
  @safe_process
238
  def create_correlation_heatmap(corr_matrix, labels):
239
  """
240
+ ์ƒ๊ด€๊ด€๊ณ„ ํ–‰๋ ฌ์„ ํžˆํŠธ๋งต์œผ๋กœ ์‹œ๊ฐํ™”.
241
+ ๋งŒ์•ฝ ๋ฐ์ดํ„ฐ๊ฐ€ ๋ถ€์กฑํ•˜๋ฉด ์•ˆ๋‚ด ๋ฌธ๊ตฌ๋งŒ ํ‘œ์‹œ.
242
  """
243
  if corr_matrix.ndim == 0:
244
+ # ์Šค์นผ๋ผ(0์ฐจ์›)์ด๋ฉด 2์ฐจ์› ๋ฐฐ์—ด๋กœ ๋ฐ”๊ฟ”์คŒ
245
  corr_matrix = np.array([[corr_matrix]])
246
 
247
  if corr_matrix.shape == (1, 1):
248
+ # ๋ฐ์ดํ„ฐ๊ฐ€ ๋ถ€์กฑํ•œ ๊ฒฝ์šฐ
249
  fig = go.Figure()
250
  fig.add_annotation(text="Not enough topics for correlation", showarrow=False)
251
  return fig
 
257
  colorscale='Viridis'
258
  ))
259
  fig.update_layout(
260
+ title='์ฃผ์ œ ์ƒ๊ด€๊ด€๊ณ„ ํžˆํŠธ๋งต',
261
  height=500,
262
  template='plotly_white'
263
  )
 
266
  @safe_process
267
  def create_topic_evolution(section_topics):
268
  """
269
+ ์„น์…˜๋ณ„ ํ† ํ”ฝ ์ ์ˆ˜ ๋ณ€ํ™”๋ฅผ ๋ผ์ธ ์ฐจํŠธ๋กœ ๋‚˜ํƒ€๋‚ธ๋‹ค.
270
+ section_topics: [[{'label':..., 'score_mean':...}, ...], [...], ...]
 
271
  """
272
  fig = go.Figure()
273
  if not section_topics or len(section_topics) == 0:
 
276
  if not section_topics[0]:
277
  return fig
278
 
279
+ # ์ฒซ ์„น์…˜์˜ ํ† ํ”ฝ๋“ค์„ ๊ธฐ์ค€์œผ๋กœ, ๊ฐ ์„น์…˜๋งˆ๋‹ค ํ•ด๋‹น ํ† ํ”ฝ์ด ์กด์žฌํ•˜๋ฉด ์ ์ˆ˜๋ฅผ ์ถ”์ถœ
280
  for topic_dict in section_topics[0]:
281
  label = topic_dict['label']
282
  score_list = []
 
295
  ))
296
 
297
  fig.update_layout(
298
+ title='์„น์…˜๋ณ„ ์ฃผ์ œ ๋ณ€ํ™” ์ถ”์ด',
299
+ xaxis_title='์„น์…˜',
300
+ yaxis_title='score_mean',
301
  height=500,
302
  template='plotly_white'
303
  )
 
306
  @safe_process
307
  def create_confidence_gauge(topic_dicts):
308
  """
309
+ ๊ฐ ํ† ํ”ฝ์˜ ์‹ ๋ขฐ๋„๋ฅผ ๊ฒŒ์ด์ง€ ํ˜•ํƒœ๋กœ ํ‘œ์‹œ.
310
+ ์—ฌ๊ธฐ์„œ๋Š” (1 - score_std) * 100 ๋‹จ์ˆœ ๊ณต์‹ ์‚ฌ์šฉ.
311
  """
312
  if not topic_dicts:
313
  return go.Figure()
 
316
  num_topics = len(topic_dicts)
317
 
318
  for i, t in enumerate(topic_dicts):
 
319
  conf_val = 100.0 * (1.0 - t.get("score_std", 0.0))
320
  fig.add_trace(go.Indicator(
321
  mode="gauge+number",
 
334
  @spaces.GPU()
335
  def process_all_analysis(text):
336
  """
337
+ ์ „์ฒด ํ…์ŠคํŠธ์— ๋Œ€ํ•œ ํ† ํ”ฝ ๋ถ„์„, ์„น์…˜ ๋ถ„์„, ์ƒ๊ด€๊ด€๊ณ„, ๊ฐ์„ฑ๋ถ„์„, ํด๋Ÿฌ์Šคํ„ฐ๋ง ๋“ฑ์„ ์ˆ˜ํ–‰ํ•œ ๋’ค
338
+ JSON ๊ฒฐ๊ณผ์™€ Plotly ์ฐจํŠธ๋“ค์„ ๋ฐ˜ํ™˜ํ•œ๋‹ค.
339
  """
340
  try:
341
+ # 1) ์ „์ฒด ํ…์ŠคํŠธ ๋Œ€์ƒ ํ† ํ”ฝ ๋ถ„์„
342
  raw_results = clf.suggest_topics(text)
343
  all_topics = parse_wbg_results(raw_results)
344
 
345
+ # 2) score_mean ๊ธฐ์ค€ ๋‚ด๋ฆผ์ฐจ์ˆœ ์ •๋ ฌ ํ›„ ์ƒ์œ„ 5๊ฐœ
346
  sorted_topics = sorted(all_topics, key=lambda x: x['score_mean'], reverse=True)
347
  top_topics = sorted_topics[:5]
348
 
349
+ # 3) ์„น์…˜ ๋ณ„ ๋ถ„์„
350
  section_topics = analyze_text_sections(text)
351
 
352
+ # 4) ์ถ”๊ฐ€ ๋ถ„์„(์ƒ๊ด€๊ด€๊ณ„, ๊ฐ์„ฑ๋ถ„์„, ํด๋Ÿฌ์Šคํ„ฐ)
353
  corr_matrix, corr_labels = calculate_topic_correlations(all_topics)
354
  sentiments_df = perform_sentiment_analysis(text)
355
  clusters = create_topic_clusters(all_topics)
356
 
357
+ # 5) ์ฐจํŠธ ์ƒ์„ฑ
358
  bar_chart, radar_chart = create_main_charts(top_topics)
359
  heatmap = create_correlation_heatmap(corr_matrix, corr_labels)
360
  evolution_chart = create_topic_evolution(section_topics)
361
  gauge_chart = create_confidence_gauge(top_topics)
362
 
363
+ # 6) JSON ํ˜•ํƒœ๋กœ ๋ฌถ์–ด์„œ ๋ฐ˜ํ™˜(๋ฌธ์ž์—ด ํ‚ค๋งŒ ์‚ฌ์šฉ)
364
  results = {
365
+ "top_topics": top_topics, # ์ƒ์œ„ 5๊ฐœ ํ† ํ”ฝ
366
+ "clusters": clusters, # ํด๋Ÿฌ์Šคํ„ฐ ๊ฒฐ๊ณผ
367
+ "sentiments": sentiments_df.to_dict(orient="records") # ๊ฐ์„ฑ ๋ถ„์„
368
  }
369
 
 
370
  return (
371
+ results, # JSON output
372
+ bar_chart, # plot1
373
+ radar_chart, # plot2
374
+ heatmap, # plot3
375
+ evolution_chart, # plot4
376
+ gauge_chart, # plot5
377
+ go.Figure() # plot6 (ํ•„์š” ์‹œ ๊ฐ์„ฑ๋ถ„์„ ๊ทธ๋ž˜ํ”„ ์‚ฌ์šฉ)
378
  )
379
 
380
  except Exception as e:
 
394
  # Gradio UI Definition #
395
  ######################################################
396
 
397
+ with gr.Blocks(title="๊ณ ๊ธ‰ ๋ฌธ์„œ ์ฃผ์ œ ๋ถ„์„๊ธฐ") as demo:
398
+ gr.Markdown("## ๊ณ ๊ธ‰ ๋ฌธ์„œ ์ฃผ์ œ ๋ถ„์„๊ธฐ")
399
  gr.Markdown(
400
+ "ํ…์ŠคํŠธ๋ฅผ ์ž…๋ ฅํ•œ ๋’ค, **๋ถ„์„ ์‹œ์ž‘** ๋ฒ„ํŠผ์„ ๋ˆŒ๋Ÿฌ์ฃผ์„ธ์š”. "
401
+ "์ฃผ์š” ํ† ํ”ฝ ๋ถ„์„, ์ƒ๊ด€๊ด€๊ณ„, ์‹ ๋ขฐ๋„ ๊ฒŒ์ด์ง€, ๊ฐ์„ฑ๋ถ„์„ ๊ฒฐ๊ณผ ๋“ฑ์„ ํ™•์ธํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค."
402
  )
403
 
404
  with gr.Row():
405
  text_input = gr.Textbox(
406
  value=SAMPLE_TEXT,
407
+ label="๋ถ„์„ํ•  ํ…์ŠคํŠธ ์ž…๋ ฅ",
408
  lines=8
409
  )
410
  with gr.Row():
411
+ submit_btn = gr.Button("๋ถ„์„ ์‹œ์ž‘", variant="primary")
412
 
413
  with gr.Tabs():
414
+ with gr.TabItem("์ฃผ์š” ๋ถ„์„"):
415
  with gr.Row():
416
+ plot1 = gr.Plot(label="์ฃผ์ œ ๋ถ„ํฌ(Bar Chart)")
417
+ plot2 = gr.Plot(label="๋ ˆ์ด๋” ์ฐจํŠธ")
418
+ with gr.TabItem("์ƒ์„ธ ๋ถ„์„"):
419
  with gr.Row():
420
+ plot3 = gr.Plot(label="์ƒ๊ด€๊ด€๊ณ„ ํžˆํŠธ๋งต")
421
+ plot4 = gr.Plot(label="์„น์…˜๋ณ„ ํ† ํ”ฝ ๋ณ€ํ™”")
422
+ with gr.TabItem("์‹ ๋ขฐ๋„ ๋ถ„์„"):
423
+ plot5 = gr.Plot(label="์‹ ๋ขฐ๋„ ๊ฒŒ์ด์ง€")
424
+ with gr.TabItem("๊ฐ์„ฑ ๋ถ„์„"):
425
+ plot6 = gr.Plot(label="๊ฐ์„ฑ๋ถ„์„ ๊ฒฐ๊ณผ")
426
 
427
  with gr.Row():
428
+ output_json = gr.JSON(label="์ƒ์„ธ ๋ถ„์„ ๊ฒฐ๊ณผ(JSON)")
429
 
430
  submit_btn.click(
431
  fn=process_all_analysis,
 
438
  demo.launch(
439
  server_name="0.0.0.0",
440
  server_port=7860,
441
+ share=False, # ๊ณต๊ฐœ ๋งํฌ ํ•„์š” ์‹œ True
442
  debug=True
443
  )