multimodal-chat-MBTI-ISFP

Running on Zero

App Files Files Community

openfree commited on Mar 17

Commit

70e12bf

verified ·

1 Parent(s): 3a3ac2f

Update app-backup.py

Browse files

Files changed (1) hide show

app-backup.py +124 -170

app-backup.py CHANGED Viewed

@@ -5,8 +5,8 @@ import re
 import tempfile
 from collections.abc import Iterator
 from threading import Thread
-import requests  # <-- For SERPHouse web search
 import cv2
 import gradio as gr
 import spaces
@@ -17,23 +17,36 @@ from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIter
 # CSV/TXT 분석
 import pandas as pd
 # PDF 텍스트 추출
 import PyPDF2
 ##############################################################################
-# SERPHouse API key for web search
 ##############################################################################
-SERPHOUSE_API_KEY = "V38CNn4HXpLtynJQyOeoUensTEYoFy8PBUxKpDqAW1pawT1vfJ2BWtPQ98h6"
 ##############################################################################
-# Simple function to call the SERPHouse Live endpoint
-# https://api.serphouse.com/serp/live
 ##############################################################################
 def do_web_search(query: str) -> str:
     """
-    Calls SERPHouse live endpoint with the given query (q).
-    Returns a simple text summary or error message.
     """
     try:
         url = "https://api.serphouse.com/serp/live"
@@ -43,35 +56,35 @@ def do_web_search(query: str) -> str:
             "lang": "en",
             "device": "desktop",
             "serp_type": "web",
             "api_token": SERPHOUSE_API_KEY,
         }
         resp = requests.get(url, params=params, timeout=30)
-        resp.raise_for_status()  # Raise an exception for 4xx/5xx errors
         data = resp.json()
-        # For demonstration, let's extract top 3 organic results:
         results = data.get("results", {})
         organic = results.get("results", {}).get("organic", [])
         if not organic:
             return "No web search results found."
         summary_lines = []
-        for item in organic[:3]:
-            rank = item.get("position", "-")
-            title = item.get("title", "No Title")
-            link = item.get("link", "No Link")
-            snippet = item.get("snippet", "(No snippet)")
-            summary_lines.append(f"**Rank {rank}:** [{title}]({link})\n\n> {snippet}")
-        return "\n\n".join(summary_lines) if summary_lines else "No web search results found."
     except Exception as e:
         logger.error(f"Web search failed: {e}")
         return f"Web search failed: {str(e)}"
-MAX_CONTENT_CHARS = 4000  # 너무 큰 파일을 막기 위해 최대 표시 4000자
 model_id = os.getenv("MODEL_ID", "google/gemma-3-27b-it")
 processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
 model = Gemma3ForConditionalGeneration.from_pretrained(
     model_id,
@@ -79,23 +92,20 @@ model = Gemma3ForConditionalGeneration.from_pretrained(
     torch_dtype=torch.bfloat16,
     attn_implementation="eager"
 )
 MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "5"))
-##################################################
 # CSV, TXT, PDF 분석 함수
-##################################################
 def analyze_csv_file(path: str) -> str:
     """
     CSV 파일을 전체 문자열로 변환. 너무 길 경우 일부만 표시.
     """
     try:
         df = pd.read_csv(path)
-        # 데이터 프레임 크기 제한 (행/열 수가 많은 경우)
         if df.shape[0] > 50 or df.shape[1] > 10:
             df = df.iloc[:50, :10]
         df_str = df.to_string()
         if len(df_str) > MAX_CONTENT_CHARS:
             df_str = df_str[:MAX_CONTENT_CHARS] + "\n...(truncated)..."
@@ -126,18 +136,15 @@ def pdf_to_markdown(pdf_path: str) -> str:
     try:
         with open(pdf_path, "rb") as f:
             reader = PyPDF2.PdfReader(f)
-            # 최대 5페이지만 처리
             max_pages = min(5, len(reader.pages))
             for page_num in range(max_pages):
                 page = reader.pages[page_num]
                 page_text = page.extract_text() or ""
                 page_text = page_text.strip()
                 if page_text:
-                    # 페이지별 텍스트도 제한
                     if len(page_text) > MAX_CONTENT_CHARS // max_pages:
                         page_text = page_text[:MAX_CONTENT_CHARS // max_pages] + "...(truncated)"
                     text_chunks.append(f"## Page {page_num+1}\n\n{page_text}\n")
             if len(reader.pages) > max_pages:
                 text_chunks.append(f"\n...(Showing {max_pages} of {len(reader.pages)} pages)...")
     except Exception as e:
@@ -150,9 +157,9 @@ def pdf_to_markdown(pdf_path: str) -> str:
     return f"**[PDF File: {os.path.basename(pdf_path)}]**\n\n{full_text}"
-##################################################
 # 이미지/비디오 업로드 제한 검사
-##################################################
 def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
     image_count = 0
     video_count = 0
@@ -181,14 +188,6 @@ def count_files_in_history(history: list[dict]) -> tuple[int, int]:
 def validate_media_constraints(message: dict, history: list[dict]) -> bool:
-    """
-    - 비디오 1개 초과 불가
-    - 비디오와 이미지 혼합 불가
-    - 이미지 개수 MAX_NUM_IMAGES 초과 불가
-    - <image> 태그가 있으면 태그 수와 실제 이미지 수 일치
-    - CSV, TXT, PDF 등은 여기서 제한하지 않음
-    """
-    # 이미지와 비디오 파일만 필터링
     media_files = []
     for f in message["files"]:
         if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE) or f.endswith(".mp4"):
@@ -213,9 +212,7 @@ def validate_media_constraints(message: dict, history: list[dict]) -> bool:
         gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images.")
         return False
-    # 이미지 태그 검증 (실제 이미지 파일만 계산)
     if "<image>" in message["text"]:
-        # 이미지 파일만 필터링
         image_files = [f for f in message["files"] if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE)]
         image_tag_count = message["text"].count("<image>")
         if image_tag_count != len(image_files):
@@ -225,16 +222,14 @@ def validate_media_constraints(message: dict, history: list[dict]) -> bool:
     return True
-##################################################
 # 비디오 처리
-##################################################
 def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
     vidcap = cv2.VideoCapture(video_path)
     fps = vidcap.get(cv2.CAP_PROP_FPS)
     total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
-    # 더 적은 프레임을 추출하도록 조정
-    frame_interval = max(int(fps), int(total_frames / 10))  # 초당 1프레임 또는 최대 10프레임
     frames = []
     for i in range(0, total_frames, frame_interval):
@@ -245,8 +240,6 @@ def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
             pil_image = Image.fromarray(image)
             timestamp = round(i / fps, 2)
             frames.append((pil_image, timestamp))
-            # 최대 5프레임만 사용
             if len(frames) >= 5:
                 break
@@ -267,15 +260,14 @@ def process_video(video_path: str) -> list[dict]:
     return content
-##################################################
 # interleaved <image> 처리
-##################################################
 def process_interleaved_images(message: dict) -> list[dict]:
     parts = re.split(r"(<image>)", message["text"])
     content = []
     image_index = 0
-    # 이미지 파일만 필터링
     image_files = [f for f in message["files"] if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE)]
     for part in parts:
@@ -285,98 +277,81 @@ def process_interleaved_images(message: dict) -> list[dict]:
         elif part.strip():
             content.append({"type": "text", "text": part.strip()})
         else:
-            # 공백이거나 \n 같은 경우
             if isinstance(part, str) and part != "<image>":
                 content.append({"type": "text", "text": part})
     return content
-##################################################
 # PDF + CSV + TXT + 이미지/비디오
-##################################################
 def is_image_file(file_path: str) -> bool:
-    """이미지 파일인지 확인"""
     return bool(re.search(r"\.(png|jpg|jpeg|gif|webp)$", file_path, re.IGNORECASE))
 def is_video_file(file_path: str) -> bool:
-    """비디오 파일인지 확인"""
     return file_path.endswith(".mp4")
 def is_document_file(file_path: str) -> bool:
-    """문서 파일인지 확인 (PDF, CSV, TXT)"""
-    return (file_path.lower().endswith(".pdf") or
-            file_path.lower().endswith(".csv") or
-            file_path.lower().endswith(".txt"))
 def process_new_user_message(message: dict) -> list[dict]:
     if not message["files"]:
         return [{"type": "text", "text": message["text"]}]
-    # 1) 파일 분류
     video_files = [f for f in message["files"] if is_video_file(f)]
     image_files = [f for f in message["files"] if is_image_file(f)]
     csv_files = [f for f in message["files"] if f.lower().endswith(".csv")]
     txt_files = [f for f in message["files"] if f.lower().endswith(".txt")]
     pdf_files = [f for f in message["files"] if f.lower().endswith(".pdf")]
-    # 2) 사용자 원본 text 추가
     content_list = [{"type": "text", "text": message["text"]}]
-    # 3) CSV
     for csv_path in csv_files:
         csv_analysis = analyze_csv_file(csv_path)
         content_list.append({"type": "text", "text": csv_analysis})
-    # 4) TXT
     for txt_path in txt_files:
         txt_analysis = analyze_txt_file(txt_path)
         content_list.append({"type": "text", "text": txt_analysis})
-    # 5) PDF
     for pdf_path in pdf_files:
         pdf_markdown = pdf_to_markdown(pdf_path)
         content_list.append({"type": "text", "text": pdf_markdown})
-    # 6) 비디오 (한 개만 허용)
     if video_files:
         content_list += process_video(video_files[0])
         return content_list
-    # 7) 이미지 처리
     if "<image>" in message["text"] and image_files:
-        # interleaved
         interleaved_content = process_interleaved_images({"text": message["text"], "files": image_files})
-        # 원본 content_list 앞부분(텍스트)을 제거하고 interleaved로 대체
-        if content_list[0]["type"] == "text":
-            content_list = content_list[1:]  # 원본 텍스트 제거
-        return interleaved_content + content_list  # interleaved + 나머지 문서 분석 내용
     else:
-        # 일반 여러 장
         for img_path in image_files:
             content_list.append({"type": "image", "url": img_path})
     return content_list
-##################################################
 # history -> LLM 메시지 변환
-##################################################
 def process_history(history: list[dict]) -> list[dict]:
     messages = []
     current_user_content: list[dict] = []
     for item in history:
         if item["role"] == "assistant":
-            # user_content가 쌓여있다면 user 메시지로 저장
             if current_user_content:
                 messages.append({"role": "user", "content": current_user_content})
                 current_user_content = []
-            # 그 뒤 item은 assistant
             messages.append({"role": "assistant", "content": [{"type": "text", "text": item["content"]}]})
         else:
-            # user
             content = item["content"]
             if isinstance(content, str):
                 current_user_content.append({"type": "text", "text": content})
@@ -385,19 +360,17 @@ def process_history(history: list[dict]) -> list[dict]:
                 if is_image_file(file_path):
                     current_user_content.append({"type": "image", "url": file_path})
                 else:
-                    # 비이미지 파일은 텍스트로 처리
                     current_user_content.append({"type": "text", "text": f"[File: {os.path.basename(file_path)}]"})
-    # 마지막 사용자 메시지가 처리되지 않은 경우 추가
     if current_user_content:
         messages.append({"role": "user", "content": current_user_content})
     return messages
-##################################################
-# 메인 추론 함수
-##################################################
 @spaces.GPU(duration=120)
 def run(
     message: dict,
@@ -407,60 +380,42 @@ def run(
     use_web_search: bool = False,
     web_search_query: str = "",
 ) -> Iterator[str]:
-    """
-    The main inference function. Now extended with optional web_search arguments:
-    - use_web_search: bool
-    - web_search_query: str
-    If `use_web_search` is True, calls SERPHouse for the given `web_search_query`.
-    """
-    # Validate media constraints first
     if not validate_media_constraints(message, history):
         yield ""
         return
     try:
-        # If user opted for "Web Search", do it here and yield a prefix message
-        if use_web_search and web_search_query.strip():
-            ws_result = do_web_search(web_search_query.strip())
-            yield f"**[Web Search Results for '{web_search_query.strip()}':]**\n\n{ws_result}\n\n---\n"
         messages = []
-        if system_prompt:
-            messages.append({"role": "system", "content": [{"type": "text", "text": system_prompt}]})
         messages.extend(process_history(history))
-        # 사용자 메시지 처리
         user_content = process_new_user_message(message)
-        # 토큰 수를 줄이기 위해 너무 긴 텍스트는 잘라내기
         for item in user_content:
             if item["type"] == "text" and len(item["text"]) > MAX_CONTENT_CHARS:
                 item["text"] = item["text"][:MAX_CONTENT_CHARS] + "\n...(truncated)..."
         messages.append({"role": "user", "content": user_content})
-        # 모델 입력 생성 전 최종 확인
-        for msg in messages:
-            if msg["role"] != "user":
-                continue
-            filtered_content = []
-            for item in msg["content"]:
-                if item["type"] == "image":
-                    if is_image_file(item["url"]):
-                        filtered_content.append(item)
-                    else:
-                        # 이미지 파일이 아닌 경우 텍스트로 변환
-                        filtered_content.append({
-                            "type": "text",
-                            "text": f"[Non-image file: {os.path.basename(item['url'])}]"
-                        })
-                else:
-                    filtered_content.append(item)
-            msg["content"] = filtered_content
-        # 모델 입력 생성
         inputs = processor.apply_chat_template(
             messages,
             add_generation_prompt=True,
@@ -469,35 +424,46 @@ def run(
             return_tensors="pt",
         ).to(device=model.device, dtype=torch.bfloat16)
-        # 텍스트 생성 스트리머 설정
         streamer = TextIteratorStreamer(processor, timeout=30.0, skip_prompt=True, skip_special_tokens=True)
         gen_kwargs = dict(
             inputs,
             streamer=streamer,
             max_new_tokens=max_new_tokens,
         )
-        # 별도 스레드에서 텍스트 생성
-        t = Thread(target=model.generate, kwargs=gen_kwargs)
         t.start()
-        # 결과 스트리밍
         output = ""
         for new_text in streamer:
             output += new_text
             yield output
     except Exception as e:
         logger.error(f"Error in run: {str(e)}")
         yield f"죄송합니다. 오류가 발생했습니다: {str(e)}"
-##################################################
-# 예시들 (한글화 버전)
-##################################################
 examples = [
     [
         {
             "text": "두 PDF 파일 내용을 비교하라.",
@@ -505,7 +471,7 @@ examples = [
             "files": [
                 "assets/additional-examples/before.pdf",
                 "assets/additional-examples/after.pdf",
-            ],
         }
     ],
     [
@@ -513,37 +479,37 @@ examples = [
             "text": "CSV 파일 내용을 요약, 분석하라",
             "files": ["assets/additional-examples/sample-csv.csv"],
         }
-    ],
     [
         {
             "text": "이 영상의 내용을 설명하라",
             "files": ["assets/additional-examples/tmp.mp4"],
         }
-    ],
     [
         {
             "text": "표지 내용을 설명하고 글자를 읽어주세요.",
             "files": ["assets/additional-examples/maz.jpg"],
         }
-    ],
     [
         {
             "text": "이미 이 영양제를 <image> 가지고 있고, 이 제품 <image>을 새로 사려 합니다. 함께 섭취할 때 주의해야 할 점이 있을까요?",
             "files": ["assets/additional-examples/pill1.png", "assets/additional-examples/pill2.png"],
         }
-    ],
     [
         {
             "text": "이 적분을 풀어주세요.",
             "files": ["assets/additional-examples/4.png"],
         }
-    ],
     [
         {
             "text": "이 티켓은 언제 발급된 것이고, 가격은 얼마인가요?",
             "files": ["assets/additional-examples/2.png"],
         }
-    ],
     [
         {
             "text": "이미지들의 순서를 바탕으로 짧은 이야기를 만들어 주세요.",
@@ -567,24 +533,19 @@ examples = [
             "text": "동일한 막대 그래프를 그리는 matplotlib 코드를 작성해주세요.",
             "files": ["assets/additional-examples/barchart.png"],
         }
-    ],
     [
         {
             "text": "이 세계에서 살고 있을 생물들을 상상해서 묘사해주세요.",
             "files": ["assets/sample-images/08.png"],
         }
     ],
     [
         {
             "text": "이미지에 있는 텍스트를 그대로 읽어서 마크다운 형태로 적어주세요.",
             "files": ["assets/additional-examples/3.png"],
         }
     ],
     [
         {
             "text": "이 표지판에는 무슨 문구가 적혀 있나요?",
@@ -597,15 +558,11 @@ examples = [
             "files": ["assets/sample-images/03.png"],
         }
     ],
 ]
 ##############################################################################
-# Custom CSS similar to second example (colorful background, panel, etc.)
 ##############################################################################
 css = """
 body {
@@ -662,18 +619,13 @@ button:hover, .btn:hover {
 """
 title_html = """
-<h1 align="center" style="margin-bottom: 0.2em;"> 🤗 Vidraft-Gemma-3-27B </h1>
 <p align="center" style="font-size:1.1em; color:#555;">
     Multimodal Chat Interface + Optional Web Search
 </p>
 """
-##############################################################################
-# Build a Blocks layout that includes:
-#   - A left sidebar with "Web Search" controls
-#   - The main ChatInterface in the center or right
-##############################################################################
-with gr.Blocks(css=css, title="Vidraft-Gemma-3-27B") as demo:
     gr.Markdown(title_html)
     with gr.Row():
@@ -684,12 +636,12 @@ with gr.Blocks(css=css, title="Vidraft-Gemma-3-27B") as demo:
                 web_search_checkbox = gr.Checkbox(
                     label="Web Search",
                     value=False,
-                    info="Check to enable a SERPHouse web search before the chat reply"
                 )
             web_search_text = gr.Textbox(
                 lines=1,
-                label="Web Search Query",
-                placeholder="Enter search keywords..."
             )
             gr.Markdown("---")
@@ -707,12 +659,12 @@ with gr.Blocks(css=css, title="Vidraft-Gemma-3-27B") as demo:
                 minimum=100,
                 maximum=8000,
                 step=50,
-                value=2000,
             )
-            gr.Markdown("<br><br>")  # spacing
-        # Main ChatInterface to the right
         with gr.Column(scale=7):
             chat = gr.ChatInterface(
                 fn=run,
@@ -734,7 +686,7 @@ with gr.Blocks(css=css, title="Vidraft-Gemma-3-27B") as demo:
                     web_search_text,
                 ],
                 stop_btn=False,
-                title="Vidraft-Gemma-3-27B",
                 examples=examples,
                 run_examples_on_click=False,
                 cache_examples=False,
@@ -745,12 +697,14 @@ with gr.Blocks(css=css, title="Vidraft-Gemma-3-27B") as demo:
     with gr.Row(elem_id="examples_row"):
         with gr.Column(scale=12, elem_id="examples_container"):
             gr.Markdown("### Example Inputs (click to load)")
-            # The fix: pass an empty list to avoid the "None" error, so we keep the code structure.
             gr.Examples(
                 examples=examples,
-                inputs=[],  # Instead of None or chat.
                 cache_examples=False
             )
 if __name__ == "__main__":
     demo.launch()

 import tempfile
 from collections.abc import Iterator
 from threading import Thread
+import json
+import requests
 import cv2
 import gradio as gr
 import spaces
 # CSV/TXT 분석
 import pandas as pd
 # PDF 텍스트 추출
 import PyPDF2
 ##############################################################################
+# SERPHouse API key from environment variable
+##############################################################################
+SERPHOUSE_API_KEY = os.getenv("SERPHOUSE_API_KEY", "")
+##############################################################################
+# 간단한 키워드 추출 함수 (한글 + 알파벳 + 숫자 + 공백 보존)
 ##############################################################################
+def extract_keywords(text: str, top_k: int = 5) -> str:
+    """
+    1) 한글(가-힣), 영어(a-zA-Z), 숫자(0-9), 공백만 남김
+    2) 공백 기준 토큰 분리
+    3) 최대 top_k개만
+    """
+    text = re.sub(r"[^a-zA-Z0-9가-힣\s]", "", text)
+    tokens = text.split()
+    key_tokens = tokens[:top_k]
+    return " ".join(key_tokens)
 ##############################################################################
+# SERPHouse Live endpoint 호출
+# - 상위 20개 결과 JSON을 LLM에 넘길 때 link, snippet 등 모두 포함
 ##############################################################################
 def do_web_search(query: str) -> str:
     """
+    상위 20개 'organic' 결과 item 전체(제목, link, snippet 등)를
+    JSON 문자열 형태로 반환
     """
     try:
         url = "https://api.serphouse.com/serp/live"
             "lang": "en",
             "device": "desktop",
             "serp_type": "web",
+            "num_result": "20",
             "api_token": SERPHOUSE_API_KEY,
         }
         resp = requests.get(url, params=params, timeout=30)
+        resp.raise_for_status()
         data = resp.json()
         results = data.get("results", {})
         organic = results.get("results", {}).get("organic", [])
         if not organic:
             return "No web search results found."
         summary_lines = []
+        for idx, item in enumerate(organic[:20], start=1):
+            item_json = json.dumps(item, ensure_ascii=False, indent=2)
+            summary_lines.append(f"Result {idx}:\n{item_json}\n")
+        return "\n".join(summary_lines)
     except Exception as e:
         logger.error(f"Web search failed: {e}")
         return f"Web search failed: {str(e)}"
+##############################################################################
+# 모델/프로세서 로딩
+##############################################################################
+MAX_CONTENT_CHARS = 4000
 model_id = os.getenv("MODEL_ID", "google/gemma-3-27b-it")
 processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
 model = Gemma3ForConditionalGeneration.from_pretrained(
     model_id,
     torch_dtype=torch.bfloat16,
     attn_implementation="eager"
 )
 MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "5"))
+##############################################################################
 # CSV, TXT, PDF 분석 함수
+##############################################################################
 def analyze_csv_file(path: str) -> str:
     """
     CSV 파일을 전체 문자열로 변환. 너무 길 경우 일부만 표시.
     """
     try:
         df = pd.read_csv(path)
         if df.shape[0] > 50 or df.shape[1] > 10:
             df = df.iloc[:50, :10]
         df_str = df.to_string()
         if len(df_str) > MAX_CONTENT_CHARS:
             df_str = df_str[:MAX_CONTENT_CHARS] + "\n...(truncated)..."
     try:
         with open(pdf_path, "rb") as f:
             reader = PyPDF2.PdfReader(f)
             max_pages = min(5, len(reader.pages))
             for page_num in range(max_pages):
                 page = reader.pages[page_num]
                 page_text = page.extract_text() or ""
                 page_text = page_text.strip()
                 if page_text:
                     if len(page_text) > MAX_CONTENT_CHARS // max_pages:
                         page_text = page_text[:MAX_CONTENT_CHARS // max_pages] + "...(truncated)"
                     text_chunks.append(f"## Page {page_num+1}\n\n{page_text}\n")
             if len(reader.pages) > max_pages:
                 text_chunks.append(f"\n...(Showing {max_pages} of {len(reader.pages)} pages)...")
     except Exception as e:
     return f"**[PDF File: {os.path.basename(pdf_path)}]**\n\n{full_text}"
+##############################################################################
 # 이미지/비디오 업로드 제한 검사
+##############################################################################
 def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
     image_count = 0
     video_count = 0
 def validate_media_constraints(message: dict, history: list[dict]) -> bool:
     media_files = []
     for f in message["files"]:
         if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE) or f.endswith(".mp4"):
         gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images.")
         return False
     if "<image>" in message["text"]:
         image_files = [f for f in message["files"] if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE)]
         image_tag_count = message["text"].count("<image>")
         if image_tag_count != len(image_files):
     return True
+##############################################################################
 # 비디오 처리
+##############################################################################
 def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
     vidcap = cv2.VideoCapture(video_path)
     fps = vidcap.get(cv2.CAP_PROP_FPS)
     total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
+    frame_interval = max(int(fps), int(total_frames / 10))
     frames = []
     for i in range(0, total_frames, frame_interval):
             pil_image = Image.fromarray(image)
             timestamp = round(i / fps, 2)
             frames.append((pil_image, timestamp))
             if len(frames) >= 5:
                 break
     return content
+##############################################################################
 # interleaved <image> 처리
+##############################################################################
 def process_interleaved_images(message: dict) -> list[dict]:
     parts = re.split(r"(<image>)", message["text"])
     content = []
     image_index = 0
     image_files = [f for f in message["files"] if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE)]
     for part in parts:
         elif part.strip():
             content.append({"type": "text", "text": part.strip()})
         else:
             if isinstance(part, str) and part != "<image>":
                 content.append({"type": "text", "text": part})
     return content
+##############################################################################
 # PDF + CSV + TXT + 이미지/비디오
+##############################################################################
 def is_image_file(file_path: str) -> bool:
     return bool(re.search(r"\.(png|jpg|jpeg|gif|webp)$", file_path, re.IGNORECASE))
 def is_video_file(file_path: str) -> bool:
     return file_path.endswith(".mp4")
 def is_document_file(file_path: str) -> bool:
+    return (
+        file_path.lower().endswith(".pdf")
+        or file_path.lower().endswith(".csv")
+        or file_path.lower().endswith(".txt")
+    )
 def process_new_user_message(message: dict) -> list[dict]:
     if not message["files"]:
         return [{"type": "text", "text": message["text"]}]
     video_files = [f for f in message["files"] if is_video_file(f)]
     image_files = [f for f in message["files"] if is_image_file(f)]
     csv_files = [f for f in message["files"] if f.lower().endswith(".csv")]
     txt_files = [f for f in message["files"] if f.lower().endswith(".txt")]
     pdf_files = [f for f in message["files"] if f.lower().endswith(".pdf")]
     content_list = [{"type": "text", "text": message["text"]}]
     for csv_path in csv_files:
         csv_analysis = analyze_csv_file(csv_path)
         content_list.append({"type": "text", "text": csv_analysis})
     for txt_path in txt_files:
         txt_analysis = analyze_txt_file(txt_path)
         content_list.append({"type": "text", "text": txt_analysis})
     for pdf_path in pdf_files:
         pdf_markdown = pdf_to_markdown(pdf_path)
         content_list.append({"type": "text", "text": pdf_markdown})
     if video_files:
         content_list += process_video(video_files[0])
         return content_list
     if "<image>" in message["text"] and image_files:
         interleaved_content = process_interleaved_images({"text": message["text"], "files": image_files})
+        if content_list and content_list[0]["type"] == "text":
+            content_list = content_list[1:]
+        return interleaved_content + content_list
     else:
         for img_path in image_files:
             content_list.append({"type": "image", "url": img_path})
     return content_list
+##############################################################################
 # history -> LLM 메시지 변환
+##############################################################################
 def process_history(history: list[dict]) -> list[dict]:
     messages = []
     current_user_content: list[dict] = []
     for item in history:
         if item["role"] == "assistant":
             if current_user_content:
                 messages.append({"role": "user", "content": current_user_content})
                 current_user_content = []
             messages.append({"role": "assistant", "content": [{"type": "text", "text": item["content"]}]})
         else:
             content = item["content"]
             if isinstance(content, str):
                 current_user_content.append({"type": "text", "text": content})
                 if is_image_file(file_path):
                     current_user_content.append({"type": "image", "url": file_path})
                 else:
                     current_user_content.append({"type": "text", "text": f"[File: {os.path.basename(file_path)}]"})
     if current_user_content:
         messages.append({"role": "user", "content": current_user_content})
     return messages
+##############################################################################
+# 메인 추론 함수 (web search 체크 시 자동 키워드추출->검색->결과 system msg)
+##############################################################################
 @spaces.GPU(duration=120)
 def run(
     message: dict,
     use_web_search: bool = False,
     web_search_query: str = "",
 ) -> Iterator[str]:
     if not validate_media_constraints(message, history):
         yield ""
         return
     try:
+        combined_system_msg = ""
+        if system_prompt.strip():
+            combined_system_msg += f"[System Prompt]\n{system_prompt.strip()}\n\n"
+        if use_web_search:
+            user_text = message["text"]
+            ws_query = extract_keywords(user_text, top_k=5)
+            if ws_query.strip():
+                logger.info(f"[Auto WebSearch Keyword] {ws_query!r}")
+                ws_result = do_web_search(ws_query)
+                combined_system_msg += f"[Search top-20 Full Items Based on user prompt]\n{ws_result}\n\n"
+            else:
+                combined_system_msg += "[No valid keywords found, skipping WebSearch]\n\n"
         messages = []
+        if combined_system_msg.strip():
+            messages.append({
+                "role": "system",
+                "content": [{"type": "text", "text": combined_system_msg.strip()}],
+            })
         messages.extend(process_history(history))
         user_content = process_new_user_message(message)
         for item in user_content:
             if item["type"] == "text" and len(item["text"]) > MAX_CONTENT_CHARS:
                 item["text"] = item["text"][:MAX_CONTENT_CHARS] + "\n...(truncated)..."
         messages.append({"role": "user", "content": user_content})
         inputs = processor.apply_chat_template(
             messages,
             add_generation_prompt=True,
             return_tensors="pt",
         ).to(device=model.device, dtype=torch.bfloat16)
         streamer = TextIteratorStreamer(processor, timeout=30.0, skip_prompt=True, skip_special_tokens=True)
         gen_kwargs = dict(
             inputs,
             streamer=streamer,
             max_new_tokens=max_new_tokens,
         )
+        t = Thread(target=_model_gen_with_oom_catch, kwargs=gen_kwargs)
         t.start()
         output = ""
         for new_text in streamer:
             output += new_text
             yield output
     except Exception as e:
         logger.error(f"Error in run: {str(e)}")
         yield f"죄송합니다. 오류가 발생했습니다: {str(e)}"
+##############################################################################
+# [추가] 별도 함수에서 model.generate(...)를 호출, OOM 캐치
+##############################################################################
+def _model_gen_with_oom_catch(**kwargs):
+    """
+    별도 스레드에서 OutOfMemoryError를 잡아주기 위해
+    """
+    try:
+        model.generate(**kwargs)
+    except torch.cuda.OutOfMemoryError:
+        raise RuntimeError(
+            "[OutOfMemoryError] GPU 메모리가 부족합니다. "
+            "Max New Tokens을 줄이거나, 프롬프트 길이를 줄여주세요."
+        )
+##############################################################################
+# 예시들 (한글화)
+##############################################################################
 examples = [
     [
         {
             "text": "두 PDF 파일 내용을 비교하라.",
             "files": [
                 "assets/additional-examples/before.pdf",
                 "assets/additional-examples/after.pdf",
+            ],
         }
     ],
     [
             "text": "CSV 파일 내용을 요약, 분석하라",
             "files": ["assets/additional-examples/sample-csv.csv"],
         }
+    ],
     [
         {
             "text": "이 영상의 내용을 설명하라",
             "files": ["assets/additional-examples/tmp.mp4"],
         }
+    ],
     [
         {
             "text": "표지 내용을 설명하고 글자를 읽어주세요.",
             "files": ["assets/additional-examples/maz.jpg"],
         }
+    ],
     [
         {
             "text": "이미 이 영양제를 <image> 가지고 있고, 이 제품 <image>을 새로 사려 합니다. 함께 섭취할 때 주의해야 할 점이 있을까요?",
             "files": ["assets/additional-examples/pill1.png", "assets/additional-examples/pill2.png"],
         }
+    ],
     [
         {
             "text": "이 적분을 풀어주세요.",
             "files": ["assets/additional-examples/4.png"],
         }
+    ],
     [
         {
             "text": "이 티켓은 언제 발급된 것이고, 가격은 얼마인가요?",
             "files": ["assets/additional-examples/2.png"],
         }
+    ],
     [
         {
             "text": "이미지들의 순서를 바탕으로 짧은 이야기를 만들어 주세요.",
             "text": "동일한 막대 그래프를 그리는 matplotlib 코드를 작성해주세요.",
             "files": ["assets/additional-examples/barchart.png"],
         }
+    ],
     [
         {
             "text": "이 세계에서 살고 있을 생물들을 상상해서 묘사해주세요.",
             "files": ["assets/sample-images/08.png"],
         }
     ],
     [
         {
             "text": "이미지에 있는 텍스트를 그대로 읽어서 마크다운 형태로 적어주세요.",
             "files": ["assets/additional-examples/3.png"],
         }
     ],
     [
         {
             "text": "이 표지판에는 무슨 문구가 적혀 있나요?",
             "files": ["assets/sample-images/03.png"],
         }
     ],
 ]
 ##############################################################################
+# Gradio UI (Blocks) 구성
 ##############################################################################
 css = """
 body {
 """
 title_html = """
+<h1 align="center" style="margin-bottom: 0.2em;"> 🤗 Vidraft-G3-27B : Multimodal + VLM + Deep Research </h1>
 <p align="center" style="font-size:1.1em; color:#555;">
     Multimodal Chat Interface + Optional Web Search
 </p>
 """
+with gr.Blocks(css=css, title="Vidraft-G3-27B") as demo:
     gr.Markdown(title_html)
     with gr.Row():
                 web_search_checkbox = gr.Checkbox(
                     label="Web Search",
                     value=False,
+                    info="Check to enable a Deep Research(auto keywords) before the chat reply"
                 )
             web_search_text = gr.Textbox(
                 lines=1,
+                label="(Unused) Web Search Query",
+                placeholder="No direct input needed"
             )
             gr.Markdown("---")
                 minimum=100,
                 maximum=8000,
                 step=50,
+                value=2000,  # GPU 메모리 절약 위해 기본값 약간 축소
             )
+            gr.Markdown("<br><br>")
+        # Main ChatInterface
         with gr.Column(scale=7):
             chat = gr.ChatInterface(
                 fn=run,
                     web_search_text,
                 ],
                 stop_btn=False,
+                title="Vidraft-G3-27B",
                 examples=examples,
                 run_examples_on_click=False,
                 cache_examples=False,
     with gr.Row(elem_id="examples_row"):
         with gr.Column(scale=12, elem_id="examples_container"):
             gr.Markdown("### Example Inputs (click to load)")
             gr.Examples(
                 examples=examples,
+                inputs=[],
                 cache_examples=False
             )
 if __name__ == "__main__":
+    # share=True 시 HF Spaces에서 경고 발생 - 로컬에서만 동작
+    # demo.launch(share=True)
     demo.launch()