seawolf2357 commited on
Commit
f3a07fd
ยท
verified ยท
1 Parent(s): c19847c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -13
app.py CHANGED
@@ -5,8 +5,8 @@ import re
5
  import tempfile
6
  from collections.abc import Iterator
7
  from threading import Thread
8
-
9
- import requests # <-- For SERPHouse web search
10
  import cv2
11
  import gradio as gr
12
  import spaces
@@ -22,7 +22,7 @@ import pandas as pd
22
  import PyPDF2
23
 
24
  ##############################################################################
25
- # SERPHouse API key from environment variable
26
  ##############################################################################
27
  SERPHOUSE_API_KEY = os.getenv("SERPHOUSE_API_KEY", "")
28
 
@@ -44,11 +44,12 @@ def extract_keywords(text: str, top_k: int = 5) -> str:
44
  return " ".join(key_tokens)
45
 
46
  ##############################################################################
47
- # SERPHouse Live endpoint ํ˜ธ์ถœ (์ƒ์œ„ 20๊ฐœ์˜ ์ œ๋ชฉ์„ ์–ป์Œ)
 
48
  ##############################################################################
49
  def do_web_search(query: str) -> str:
50
  """
51
- SERPHouse ๋ผ์ด๋ธŒ ๊ฒ€์ƒ‰ ํ˜ธ์ถœ, ์ƒ์œ„ 20๊ฐœ ๊ฒฐ๊ณผ์˜ 'title'๋งŒ ๋ฌถ์–ด์„œ ๋ฐ˜ํ™˜.
52
  """
53
  try:
54
  url = "https://api.serphouse.com/serp/live"
@@ -70,10 +71,12 @@ def do_web_search(query: str) -> str:
70
  if not organic:
71
  return "No web search results found."
72
 
 
73
  summary_lines = []
74
  for idx, item in enumerate(organic[:20], start=1):
75
- title = item.get("title", "No Title")
76
- summary_lines.append(f"{idx}. {title}")
 
77
 
78
  return "\n".join(summary_lines)
79
  except Exception as e:
@@ -144,7 +147,6 @@ def pdf_to_markdown(pdf_path: str) -> str:
144
  page_text = page.extract_text() or ""
145
  page_text = page_text.strip()
146
  if page_text:
147
- # ํŽ˜์ด์ง€๋ณ„ ํ…์ŠคํŠธ ์ œํ•œ
148
  if len(page_text) > MAX_CONTENT_CHARS // max_pages:
149
  page_text = page_text[:MAX_CONTENT_CHARS // max_pages] + "...(truncated)"
150
  text_chunks.append(f"## Page {page_num+1}\n\n{page_text}\n")
@@ -396,18 +398,17 @@ def run(
396
  return
397
 
398
  try:
399
- # web_search๊ฐ€ True๋ฉด => ์‚ฌ์šฉ์ž๊ฐ€ ์ง์ ‘ ์ž…๋ ฅํ•œ web_search_query ๋Œ€์‹ ,
400
- # message["text"]๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ ํ‚ค์›Œ๋“œ ์ถ”์ถœํ•˜์—ฌ ๊ฒ€์ƒ‰
401
  history_system_msg = None
402
  if use_web_search:
403
  user_text = message["text"]
404
  # 1) ํ‚ค์›Œ๋“œ ์ถ”์ถœ
405
  ws_query = extract_keywords(user_text, top_k=5)
406
  logger.info(f"[Auto WebSearch Keyword] {ws_query!r}")
407
- # 2) ์ƒ์œ„ 20๊ฐœ ๊ฒฐ๊ณผ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
408
  ws_result = do_web_search(ws_query)
409
  # 3) ์ด๋ฅผ system ๋ฉ”์‹œ์ง€๋กœ ์ถ”๊ฐ€
410
- system_search_content = f"[Search top-20 Titles Based on user prompt]\n{ws_result}\n"
411
  if system_search_content.strip():
412
  history_system_msg = {
413
  "role": "system",
@@ -644,7 +645,6 @@ with gr.Blocks(css=css, title="Vidraft-Gemma-3-27B") as demo:
644
  value=False,
645
  info="Check to enable a SERPHouse web search (auto keywords) before the chat reply"
646
  )
647
- # ์‹ค์ œ๋กœ๋Š” ์ž๋™์ถ”์ถœ. ์•„๋ž˜ textbox๋Š” ๋ฏธ์‚ฌ์šฉ.
648
  web_search_text = gr.Textbox(
649
  lines=1,
650
  label="(Unused) Web Search Query",
 
5
  import tempfile
6
  from collections.abc import Iterator
7
  from threading import Thread
8
+ import json # โ† JSON ๋ณ€ํ™˜์„ ์œ„ํ•ด ์ถ”๊ฐ€
9
+ import requests # SERPHouse web search
10
  import cv2
11
  import gradio as gr
12
  import spaces
 
22
  import PyPDF2
23
 
24
  ##############################################################################
25
+ # SERPHouse API key from environment variable (์‚ฌ์šฉ์ž๊ฐ€ ํ™˜๊ฒฝ๋ณ€์ˆ˜๋กœ ์ง€์ •ํ•ด์•ผ ํ•จ)
26
  ##############################################################################
27
  SERPHOUSE_API_KEY = os.getenv("SERPHOUSE_API_KEY", "")
28
 
 
44
  return " ".join(key_tokens)
45
 
46
  ##############################################################################
47
+ # SERPHouse Live endpoint ํ˜ธ์ถœ
48
+ # - ์ƒ์œ„ 20๊ฐœ ๊ฒฐ๊ณผ ๋ชจ๋‘ "์ „์ฒด item"์„ system msg์— ๋‹ด์•„(=JSON ๊ทธ๋Œ€๋กœ) LLM์ด ์ฐธ์กฐ
49
  ##############################################################################
50
  def do_web_search(query: str) -> str:
51
  """
52
+ SERPHouse ๋ผ์ด๋ธŒ ๊ฒ€์ƒ‰ ํ˜ธ์ถœ, ์ƒ์œ„ 20๊ฐœ 'organic' ๊ฒฐ๊ณผ ์ „์ฒด๋ฅผ JSON ํ˜•ํƒœ๋กœ ๋ฌถ์–ด์„œ ๋ฐ˜ํ™˜.
53
  """
54
  try:
55
  url = "https://api.serphouse.com/serp/live"
 
71
  if not organic:
72
  return "No web search results found."
73
 
74
+ # ๊ฐ item์„ JSON(์ „์ฒด ํ•„๋“œ)์œผ๋กœ ๋ณ€ํ™˜ํ•˜์—ฌ ์ €์žฅ
75
  summary_lines = []
76
  for idx, item in enumerate(organic[:20], start=1):
77
+ # item ์ „์ฒด๋ฅผ JSON ๋ฌธ์ž์—ด๋กœ ๋ณ€ํ™˜
78
+ item_json = json.dumps(item, ensure_ascii=False, indent=2)
79
+ summary_lines.append(f"Result {idx}:\n{item_json}\n")
80
 
81
  return "\n".join(summary_lines)
82
  except Exception as e:
 
147
  page_text = page.extract_text() or ""
148
  page_text = page_text.strip()
149
  if page_text:
 
150
  if len(page_text) > MAX_CONTENT_CHARS // max_pages:
151
  page_text = page_text[:MAX_CONTENT_CHARS // max_pages] + "...(truncated)"
152
  text_chunks.append(f"## Page {page_num+1}\n\n{page_text}\n")
 
398
  return
399
 
400
  try:
401
+ # web_search๊ฐ€ True๋ฉด => message["text"]๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ ํ‚ค์›Œ๋“œ ์ถ”์ถœํ•˜์—ฌ SERPHouse ํ˜ธ์ถœ
 
402
  history_system_msg = None
403
  if use_web_search:
404
  user_text = message["text"]
405
  # 1) ํ‚ค์›Œ๋“œ ์ถ”์ถœ
406
  ws_query = extract_keywords(user_text, top_k=5)
407
  logger.info(f"[Auto WebSearch Keyword] {ws_query!r}")
408
+ # 2) ์ƒ์œ„ 20๊ฐœ ๊ฒฐ๊ณผ (item ์ „์ฒด) ๊ฐ€์ ธ์˜ค๊ธฐ
409
  ws_result = do_web_search(ws_query)
410
  # 3) ์ด๋ฅผ system ๋ฉ”์‹œ์ง€๋กœ ์ถ”๊ฐ€
411
+ system_search_content = f"[Search top-20 Full Items Based on user prompt]\n{ws_result}\n"
412
  if system_search_content.strip():
413
  history_system_msg = {
414
  "role": "system",
 
645
  value=False,
646
  info="Check to enable a SERPHouse web search (auto keywords) before the chat reply"
647
  )
 
648
  web_search_text = gr.Textbox(
649
  lines=1,
650
  label="(Unused) Web Search Query",