Kims12 commited on
Commit
bc1cd74
ยท
verified ยท
1 Parent(s): 0fedf9f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -19
app.py CHANGED
@@ -201,7 +201,7 @@ def fetch_related_keywords(keyword):
201
  CUSTOMER_ID = get_env_variable("NAVER_CUSTOMER_ID")
202
 
203
  if not all([API_KEY, SECRET_KEY, CUSTOMER_ID]):
204
- debug_log(f"๋„ค์ด๋ฒ„ ๊ด‘๊ณ  API ํ‚ค ์ •๋ณด ๋ถ€์กฑ์œผ๋กœ '{keyword}' ์—ฐ๊ด€ ํ‚ค์›Œ๋“œ ์กฐํšŒ๋ฅผ ๊ฑด๋„ˆ<0xEB><0xB5>๋‹ˆ๋‹ค.")
205
  return pd.DataFrame()
206
 
207
  BASE_URL = "https://api.naver.com"
@@ -274,7 +274,7 @@ def fetch_blog_count(keyword):
274
  client_secret = get_env_variable("NAVER_SEARCH_CLIENT_SECRET")
275
 
276
  if not client_id or not client_secret:
277
- debug_log(f"๋„ค์ด๋ฒ„ ๊ฒ€์ƒ‰ API ํ‚ค ์ •๋ณด ๋ถ€์กฑ์œผ๋กœ '{keyword}' ๋ธ”๋กœ๊ทธ ์ˆ˜ ์กฐํšŒ๋ฅผ ๊ฑด๋„ˆ<0xEB><0xB5>๋‹ˆ๋‹ค.")
278
  return 0
279
 
280
  url = "https://openapi.naver.com/v1/search/blog.json"
@@ -536,7 +536,8 @@ def combined_analysis(blog_text: str, remove_freq1: bool, direct_keyword_input:
536
  for col in cols:
537
  if col not in df_morph.columns:
538
  df_morph[col] = "" if col == "์ง์ ‘์ž…๋ ฅ" else (0 if col != "๋‹จ์–ด" else "")
539
- df_morph = df_morph[cols]
 
540
  return df_morph, create_excel_file(df_morph)
541
 
542
  # ์ง์ ‘ ์ž…๋ ฅ ํ‚ค์›Œ๋“œ์— ๋Œ€ํ•œ ์ •๋ณด (๋นˆ๋„์ˆ˜, API ์ •๋ณด) ๊ฐ€์ ธ์˜ค๊ธฐ
@@ -562,14 +563,6 @@ def combined_analysis(blog_text: str, remove_freq1: bool, direct_keyword_input:
562
  # df_morph์— ์žˆ๋Š” ๋‹จ์–ด๋Š” df_morph ์ •๋ณด๋ฅผ ์šฐ์„  ์‚ฌ์šฉ (์ง์ ‘์ž…๋ ฅ ํ”Œ๋ž˜๊ทธ๋งŒ ์—…๋ฐ์ดํŠธ)
563
  # df_direct_raw์—์„œ df_morph์— ์—†๋Š” ๋‹จ์–ด๋งŒ ๊ณจ๋ผ์„œ ์ถ”๊ฐ€
564
 
565
- # ํ•ฉ์น˜๊ธฐ: df_morph๋ฅผ ๊ธฐ์ค€์œผ๋กœ df_direct_raw์˜ ์ •๋ณด๋ฅผ ์ถ”๊ฐ€/์—…๋ฐ์ดํŠธ
566
- # Pandas 0.25.0 ์ด์ƒ์—์„œ๋Š” combine_first์˜ overwrite ๋™์ž‘์ด ์•ฝ๊ฐ„ ๋‹ค๋ฅผ ์ˆ˜ ์žˆ์œผ๋ฏ€๋กœ merge ์‚ฌ์šฉ ๊ณ ๋ ค
567
-
568
- # 1. df_morph์˜ ๋‹จ์–ด๋“ค์— ๋Œ€ํ•ด df_direct_raw์˜ ์ •๋ณด๋กœ ์—…๋ฐ์ดํŠธ (API ์ •๋ณด ๋“ฑ)
569
- # ๋‹จ, ๋นˆ๋„์ˆ˜๋Š” ๊ฐ์ž ๊ณ„์‚ฐํ•œ ๊ฒƒ์„ ์œ ์ง€ํ• ์ง€, ์•„๋‹ˆ๋ฉด ํ•œ์ชฝ์„ ํƒํ• ์ง€ ๊ฒฐ์ • ํ•„์š”.
570
- # ์—ฌ๊ธฐ์„œ๋Š” df_morph์˜ ๋นˆ๋„์ˆ˜(ํ˜•ํƒœ์†Œ๋ถ„์„ ๊ธฐ๋ฐ˜)์™€ df_direct_raw์˜ ๋นˆ๋„์ˆ˜(๋‹จ์ˆœ count)๊ฐ€ ๋‹ค๋ฅผ ์ˆ˜ ์žˆ์Œ.
571
- # ์ผ๋‹จ์€ df_morph ๊ธฐ์ค€์œผ๋กœ ํ•˜๊ณ , ์—†๋Š” ์ง์ ‘ ํ‚ค์›Œ๋“œ๋งŒ df_direct_raw์—์„œ ์ถ”๊ฐ€ํ•˜๋Š” ๋ฐฉ์‹.
572
-
573
  # df_morph์˜ '์ง์ ‘์ž…๋ ฅ' ์ปฌ๋Ÿผ์€ ์ด๋ฏธ ์œ„์—์„œ ์ฒ˜๋ฆฌ๋จ.
574
  # ์ด์ œ df_direct_raw์—๋งŒ ์žˆ๋Š” ํ‚ค์›Œ๋“œ๋ฅผ df_morph์— ์ถ”๊ฐ€
575
 
@@ -603,15 +596,16 @@ def combined_analysis(blog_text: str, remove_freq1: bool, direct_keyword_input:
603
 
604
  # NA ๊ฐ’๋“ค์„ ์ ์ ˆํžˆ ์ฒ˜๋ฆฌ (์˜ˆ: 0์œผ๋กœ ์ฑ„์šฐ๊ฑฐ๋‚˜ ๊ทธ๋Œ€๋กœ ๋‘๊ธฐ)
605
  # API ๊ฐ’๋“ค์€ ์ˆซ์ž๊ฐ€ ์•„๋‹ ์ˆ˜ ์žˆ์œผ๋ฏ€๋กœ (์˜ˆ: "< 10"), process_keyword์—์„œ ์ฒ˜๋ฆฌ๋จ. ์—ฌ๊ธฐ์„œ๋Š” intํ˜• ๋ณ€ํ™˜ ์ „์ด๋ฏ€๋กœ ๊ทธ๋Œ€๋กœ ๋‘ .
606
- # Gradio Dataframe์€ None์„ ์ž˜ ํ‘œ์‹œํ•จ.
607
  # ๋นˆ๋„์ˆ˜๋Š” ์ •์ˆ˜ํ˜•์ด์–ด์•ผ ํ•จ
608
  if "๋นˆ๋„์ˆ˜" in combined_df.columns:
609
  combined_df["๋นˆ๋„์ˆ˜"] = combined_df["๋นˆ๋„์ˆ˜"].fillna(0).astype(int)
610
 
611
 
612
- combined_df = combined_df[final_cols_combined].drop_duplicates(subset=['๋‹จ์–ด'], keep='first') # ๋งŒ์•ฝ์„ ์œ„ํ•œ ์ค‘๋ณต ์ œ๊ฑฐ
613
- combined_df.sort_values(by=["์ง์ ‘์ž…๋ ฅ", "๋นˆ๋„์ˆ˜"], ascending=[False, False], inplace=True, na_position='last') # ์ง์ ‘์ž…๋ ฅ ์šฐ์„ , ๊ทธ ๋‹ค์Œ ๋นˆ๋„์ˆ˜
614
- combined_df.reset_index(drop=True, inplace=True)
 
615
 
616
  combined_excel = create_excel_file(combined_df)
617
  debug_log("combined_analysis ํ•จ์ˆ˜ ์™„๋ฃŒ")
@@ -781,11 +775,11 @@ with gr.Blocks(title="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ํ‚ค์›Œ๋“œ ๋ถ„์„ ์„œ๋น„์Šค", css=custo
781
  with gr.Column(scale=3): # ์˜ค๋ฅธ์ชฝ ์ปฌ๋Ÿผ (๊ฒฐ๊ณผ ์˜์—ญ)
782
  with gr.Group(elem_classes="custom-group custom-result"):
783
  gr.Markdown("### ๋ถ„์„ ๊ฒฐ๊ณผ")
784
- result_df_display = gr.Dataframe(
785
  label="ํ†ตํ•ฉ ๋ถ„์„ ๊ฒฐ๊ณผ (๋‹จ์–ด, ๋นˆ๋„์ˆ˜, ๊ฒ€์ƒ‰๋Ÿ‰, ๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜, ์ง์ ‘์ž…๋ ฅ ์—ฌ๋ถ€)",
786
- interactive=False, # ์‚ฌ์šฉ์ž๊ฐ€ ์ง์ ‘ ์ˆ˜์ • ๋ถˆ๊ฐ€
787
- height=600, # ๋†’์ด ์กฐ์ ˆ
788
- wrap=True # ๊ธด ํ…์ŠคํŠธ ์ค„๋ฐ”๊ฟˆ
789
  )
790
  with gr.Group(elem_classes="custom-group"):
791
  gr.Markdown("### ๊ฒฐ๊ณผ ๋‹ค์šด๋กœ๋“œ")
 
201
  CUSTOMER_ID = get_env_variable("NAVER_CUSTOMER_ID")
202
 
203
  if not all([API_KEY, SECRET_KEY, CUSTOMER_ID]):
204
+ debug_log(f"๋„ค์ด๋ฒ„ ๊ด‘๊ณ  API ํ‚ค ์ •๋ณด ๋ถ€์กฑ์œผ๋กœ '{keyword}' ์—ฐ๊ด€ ํ‚ค์›Œ๋“œ ์กฐํšŒ๋ฅผ ๊ฑด๋„ˆ<0xEB><0x8><0xB5>๋‹ˆ๋‹ค.")
205
  return pd.DataFrame()
206
 
207
  BASE_URL = "https://api.naver.com"
 
274
  client_secret = get_env_variable("NAVER_SEARCH_CLIENT_SECRET")
275
 
276
  if not client_id or not client_secret:
277
+ debug_log(f"๋„ค์ด๋ฒ„ ๊ฒ€์ƒ‰ API ํ‚ค ์ •๋ณด ๋ถ€์กฑ์œผ๋กœ '{keyword}' ๋ธ”๋กœ๊ทธ ์ˆ˜ ์กฐํšŒ๋ฅผ ๊ฑด๋„ˆ<0xEB><0x8><0xB5>๋‹ˆ๋‹ค.")
278
  return 0
279
 
280
  url = "https://openapi.naver.com/v1/search/blog.json"
 
536
  for col in cols:
537
  if col not in df_morph.columns:
538
  df_morph[col] = "" if col == "์ง์ ‘์ž…๋ ฅ" else (0 if col != "๋‹จ์–ด" else "")
539
+ if not df_morph.empty: # df_morph๊ฐ€ ๋น„์–ด์žˆ์ง€ ์•Š์„ ๋•Œ๋งŒ ์ปฌ๋Ÿผ ์ˆœ์„œ ์ ์šฉ
540
+ df_morph = df_morph[cols]
541
  return df_morph, create_excel_file(df_morph)
542
 
543
  # ์ง์ ‘ ์ž…๋ ฅ ํ‚ค์›Œ๋“œ์— ๋Œ€ํ•œ ์ •๋ณด (๋นˆ๋„์ˆ˜, API ์ •๋ณด) ๊ฐ€์ ธ์˜ค๊ธฐ
 
563
  # df_morph์— ์žˆ๋Š” ๋‹จ์–ด๋Š” df_morph ์ •๋ณด๋ฅผ ์šฐ์„  ์‚ฌ์šฉ (์ง์ ‘์ž…๋ ฅ ํ”Œ๋ž˜๊ทธ๋งŒ ์—…๋ฐ์ดํŠธ)
564
  # df_direct_raw์—์„œ df_morph์— ์—†๋Š” ๋‹จ์–ด๋งŒ ๊ณจ๋ผ์„œ ์ถ”๊ฐ€
565
 
 
 
 
 
 
 
 
 
566
  # df_morph์˜ '์ง์ ‘์ž…๋ ฅ' ์ปฌ๋Ÿผ์€ ์ด๋ฏธ ์œ„์—์„œ ์ฒ˜๋ฆฌ๋จ.
567
  # ์ด์ œ df_direct_raw์—๋งŒ ์žˆ๋Š” ํ‚ค์›Œ๋“œ๋ฅผ df_morph์— ์ถ”๊ฐ€
568
 
 
596
 
597
  # NA ๊ฐ’๋“ค์„ ์ ์ ˆํžˆ ์ฒ˜๋ฆฌ (์˜ˆ: 0์œผ๋กœ ์ฑ„์šฐ๊ฑฐ๋‚˜ ๊ทธ๋Œ€๋กœ ๋‘๊ธฐ)
598
  # API ๊ฐ’๋“ค์€ ์ˆซ์ž๊ฐ€ ์•„๋‹ ์ˆ˜ ์žˆ์œผ๋ฏ€๋กœ (์˜ˆ: "< 10"), process_keyword์—์„œ ์ฒ˜๋ฆฌ๋จ. ์—ฌ๊ธฐ์„œ๋Š” intํ˜• ๋ณ€ํ™˜ ์ „์ด๋ฏ€๋กœ ๊ทธ๋Œ€๋กœ ๋‘ .
599
+ # Gradio DataFrame์€ None์„ ์ž˜ ํ‘œ์‹œํ•จ.
600
  # ๋นˆ๋„์ˆ˜๋Š” ์ •์ˆ˜ํ˜•์ด์–ด์•ผ ํ•จ
601
  if "๋นˆ๋„์ˆ˜" in combined_df.columns:
602
  combined_df["๋นˆ๋„์ˆ˜"] = combined_df["๋นˆ๋„์ˆ˜"].fillna(0).astype(int)
603
 
604
 
605
+ if not combined_df.empty : # ๋น„์–ด์žˆ์ง€ ์•Š์„ ๋•Œ๋งŒ ์ •๋ ฌ ๋ฐ ์ค‘๋ณต ์ œ๊ฑฐ
606
+ combined_df = combined_df[final_cols_combined].drop_duplicates(subset=['๋‹จ์–ด'], keep='first') # ๋งŒ์•ฝ์„ ์œ„ํ•œ ์ค‘๋ณต ์ œ๊ฑฐ
607
+ combined_df.sort_values(by=["์ง์ ‘์ž…๋ ฅ", "๋นˆ๋„์ˆ˜"], ascending=[False, False], inplace=True, na_position='last') # ์ง์ ‘์ž…๋ ฅ ์šฐ์„ , ๊ทธ ๋‹ค์Œ ๋นˆ๋„์ˆ˜
608
+ combined_df.reset_index(drop=True, inplace=True)
609
 
610
  combined_excel = create_excel_file(combined_df)
611
  debug_log("combined_analysis ํ•จ์ˆ˜ ์™„๋ฃŒ")
 
775
  with gr.Column(scale=3): # ์˜ค๋ฅธ์ชฝ ์ปฌ๋Ÿผ (๊ฒฐ๊ณผ ์˜์—ญ)
776
  with gr.Group(elem_classes="custom-group custom-result"):
777
  gr.Markdown("### ๋ถ„์„ ๊ฒฐ๊ณผ")
778
+ result_df_display = gr.DataFrame( # gr.Dataframe -> gr.DataFrame ์œผ๋กœ ๋ณ€๊ฒฝ
779
  label="ํ†ตํ•ฉ ๋ถ„์„ ๊ฒฐ๊ณผ (๋‹จ์–ด, ๋นˆ๋„์ˆ˜, ๊ฒ€์ƒ‰๋Ÿ‰, ๋ธ”๋กœ๊ทธ๋ฌธ์„œ์ˆ˜, ์ง์ ‘์ž…๋ ฅ ์—ฌ๋ถ€)",
780
+ interactive=False,
781
+ # height=600, # Gradio ๋ฒ„์ „ ํ˜ธํ™˜์„ฑ์„ ์œ„๏ฟฝ๏ฟฝ height ํŒŒ๋ผ๋ฏธํ„ฐ ์ œ๊ฑฐ ๋˜๋Š” ์ฃผ์„ ์ฒ˜๋ฆฌ
782
+ wrap=True
783
  )
784
  with gr.Group(elem_classes="custom-group"):
785
  gr.Markdown("### ๊ฒฐ๊ณผ ๋‹ค์šด๋กœ๋“œ")