multimodal-chat-MBTI-ISFP

Sleeping

App Files Files Community

seawolf2357 commited on Mar 16

Commit

1670280

verified ·

1 Parent(s): 75b15f6

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -74

app.py CHANGED Viewed

@@ -14,30 +14,26 @@ from loguru import logger
 from PIL import Image
 from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIteratorStreamer
-# [CSV/TXT 분석용]
 import pandas as pd
-##################################################
-# 전체 전문을 넘기되, 너무 클 경우 잘라내기 위한 상수
-##################################################
-MAX_CONTENT_CHARS = 8000  # 예: 8000자 초과 시 잘라냄
 model_id = os.getenv("MODEL_ID", "google/gemma-3-27b-it")
 processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
 model = Gemma3ForConditionalGeneration.from_pretrained(
-    model_id, device_map="auto", torch_dtype=torch.bfloat16, attn_implementation="eager"
 )
 MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "5"))
-##################################################
-# CSV/TXT 전문 처리 함수
-##################################################
 def analyze_csv_file(path: str) -> str:
     """
-    CSV 파일 전체를 문자열로 변환하여 리턴.
-    너무 길면 MAX_CONTENT_CHARS까지만 잘라냄.
     """
     try:
         df = pd.read_csv(path)
@@ -45,37 +41,26 @@ def analyze_csv_file(path: str) -> str:
         if len(df_str) > MAX_CONTENT_CHARS:
             df_str = df_str[:MAX_CONTENT_CHARS] + "\n...(truncated)..."
-        return (
-            f"**[CSV File: {os.path.basename(path)}]**\n\n"
-            f"{df_str}"
-        )
     except Exception as e:
         return f"Failed to read CSV ({os.path.basename(path)}): {str(e)}"
 def analyze_txt_file(path: str) -> str:
     """
-    TXT 파일 전체 내용을 읽어서 모델에 넘김.
-    너무 길면 MAX_CONTENT_CHARS까지만 잘라냄.
     """
     try:
         with open(path, "r", encoding="utf-8") as f:
             text = f.read()
         if len(text) > MAX_CONTENT_CHARS:
             text = text[:MAX_CONTENT_CHARS] + "\n...(truncated)..."
-        return (
-            f"**[TXT File: {os.path.basename(path)}]**\n\n"
-            f"{text}"
-        )
     except Exception as e:
         return f"Failed to read TXT ({os.path.basename(path)}): {str(e)}"
-##################################################
-# 기존 미디어 파일 검사 로직 (이미지/비디오)
-##################################################
 def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
     image_count = 0
     video_count = 0
@@ -105,14 +90,13 @@ def validate_media_constraints(message: dict, history: list[dict]) -> bool:
     - 비디오 1개 초과 불가
     - 비디오/이미지 혼합 불가
     - 이미지 개수 MAX_NUM_IMAGES 초과 불가
-    - <image> 태그가 있으면 태그 수와 이미지 수 일치
-    CSV, TXT, PDF 등은 여기서 제한하지 않음.
     """
     media_files = []
     for f in message["files"]:
-        # mp4나 대표 이미지 확장자만 검사
-        # (파일명에 .png / .jpg / .gif / .webp 등 있을 때)
-        if f.endswith(".mp4") or re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE):
             media_files.append(f)
     new_image_count, new_video_count = count_files_in_new_message(media_files)
@@ -140,9 +124,6 @@ def validate_media_constraints(message: dict, history: list[dict]) -> bool:
     return True
-##################################################
-# 비디오 처리
-##################################################
 def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
     vidcap = cv2.VideoCapture(video_path)
     fps = vidcap.get(cv2.CAP_PROP_FPS)
@@ -177,9 +158,6 @@ def process_video(video_path: str) -> list[dict]:
     return content
-##################################################
-# interleaved <image> 태그 처리
-##################################################
 def process_interleaved_images(message: dict) -> list[dict]:
     logger.debug(f"{message['files']=}")
     parts = re.split(r"(<image>)", message["text"])
@@ -188,7 +166,6 @@ def process_interleaved_images(message: dict) -> list[dict]:
     content = []
     image_index = 0
     for part in parts:
-        logger.debug(f"{part=}")
         if part == "<image>":
             content.append({"type": "image", "url": message["files"][image_index]})
             logger.debug(f"file: {message['files'][image_index]}")
@@ -201,16 +178,7 @@ def process_interleaved_images(message: dict) -> list[dict]:
     return content
-##################################################
-# CSV, TXT 파일도 전문을 LLM에 넘기도록
-##################################################
 def process_new_user_message(message: dict) -> list[dict]:
-    """
-    - mp4 -> 비디오 처리
-    - 이미지 -> interleaved or multiple
-    - CSV -> 전체 df.to_string() (너무 길면 잘라냄)
-    - TXT -> 전체 text (너무 길면 잘라냄)
-    """
     if not message["files"]:
         return [{"type": "text", "text": message["text"]}]
@@ -220,7 +188,7 @@ def process_new_user_message(message: dict) -> list[dict]:
     csv_files = [f for f in message["files"] if f.lower().endswith(".csv")]
     txt_files = [f for f in message["files"] if f.lower().endswith(".txt")]
-    # 사용자 텍스트
     content_list = [{"type": "text", "text": message["text"]}]
     # CSV 전문
@@ -233,7 +201,7 @@ def process_new_user_message(message: dict) -> list[dict]:
         txt_analysis = analyze_txt_file(txt_path)
         content_list.append({"type": "text", "text": txt_analysis})
-    # 비디오
     if video_files:
         content_list += process_video(video_files[0])
         return content_list
@@ -242,7 +210,7 @@ def process_new_user_message(message: dict) -> list[dict]:
     if "<image>" in message["text"]:
         return process_interleaved_images(message)
-    # 일반 이미지(여러 장)
     if image_files:
         for img_path in image_files:
             content_list.append({"type": "image", "url": img_path})
@@ -250,9 +218,6 @@ def process_new_user_message(message: dict) -> list[dict]:
     return content_list
-##################################################
-# history -> LLM 메시지 변환
-##################################################
 def process_history(history: list[dict]) -> list[dict]:
     messages = []
     current_user_content: list[dict] = []
@@ -271,9 +236,6 @@ def process_history(history: list[dict]) -> list[dict]:
     return messages
-##################################################
-# 메인 추론 함수
-##################################################
 @spaces.GPU(duration=120)
 def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
     if not validate_media_constraints(message, history):
@@ -309,9 +271,6 @@ def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tok
         yield output
-##################################################
-# 예시 목록 (기존)
-##################################################
 examples = [
     [
         {
@@ -435,16 +394,16 @@ examples = [
 ]
-##################################################
-# Gradio ChatInterface
-##################################################
 demo = gr.ChatInterface(
     fn=run,
     type="messages",
     chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
-    # 여기서 WEBP를 포함한 모든 이미지, mp4, csv, txt, pdf 허용
     textbox=gr.MultimodalTextbox(
-        file_types=["image/*", ".mp4", ".csv", ".txt", ".pdf"],
         file_count="multiple",
         autofocus=True
     ),
@@ -452,18 +411,9 @@ demo = gr.ChatInterface(
     additional_inputs=[
         gr.Textbox(
             label="System Prompt",
-            value=(
-                "You are a deeply thoughtful AI. Consider problems thoroughly and derive "
-                "correct solutions through systematic reasoning. Please answer in korean."
-            )
-        ),
-        gr.Slider(
-            label="Max New Tokens",
-            minimum=100,
-            maximum=8000,
-            step=50,
-            value=2000
         ),
     ],
     stop_btn=False,
     title="Gemma 3 27B IT",

 from PIL import Image
 from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIteratorStreamer
+# CSV/TXT 분석
 import pandas as pd
+MAX_CONTENT_CHARS = 8000  # 파일에서 읽은 내용이 너무 길 경우 이 정도에서 잘라냄
 model_id = os.getenv("MODEL_ID", "google/gemma-3-27b-it")
 processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
 model = Gemma3ForConditionalGeneration.from_pretrained(
+    model_id,
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+    attn_implementation="eager"
 )
 MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "5"))
 def analyze_csv_file(path: str) -> str:
     """
+    CSV 파일을 읽어 문자열화. 너무 크면 일부만 잘라냄.
     """
     try:
         df = pd.read_csv(path)
         if len(df_str) > MAX_CONTENT_CHARS:
             df_str = df_str[:MAX_CONTENT_CHARS] + "\n...(truncated)..."
+        return f"**[CSV File: {os.path.basename(path)}]**\n\n{df_str}"
     except Exception as e:
         return f"Failed to read CSV ({os.path.basename(path)}): {str(e)}"
 def analyze_txt_file(path: str) -> str:
     """
+    TXT 파일 전문 읽어들이되, 너무 길면 잘라냄.
     """
     try:
         with open(path, "r", encoding="utf-8") as f:
             text = f.read()
         if len(text) > MAX_CONTENT_CHARS:
             text = text[:MAX_CONTENT_CHARS] + "\n...(truncated)..."
+        return f"**[TXT File: {os.path.basename(path)}]**\n\n{text}"
     except Exception as e:
         return f"Failed to read TXT ({os.path.basename(path)}): {str(e)}"
 def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
     image_count = 0
     video_count = 0
     - 비디오 1개 초과 불가
     - 비디오/이미지 혼합 불가
     - 이미지 개수 MAX_NUM_IMAGES 초과 불가
+    - <image> 태그가 있으면 태그 수와 실제 이미지 개수 일치
+    - CSV, TXT, PDF 등은 여기서 제한하지 않음.
     """
     media_files = []
     for f in message["files"]:
+        # 이미지(여러 확장자)나 mp4만 체크
+        if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE) or f.endswith(".mp4"):
             media_files.append(f)
     new_image_count, new_video_count = count_files_in_new_message(media_files)
     return True
 def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
     vidcap = cv2.VideoCapture(video_path)
     fps = vidcap.get(cv2.CAP_PROP_FPS)
     return content
 def process_interleaved_images(message: dict) -> list[dict]:
     logger.debug(f"{message['files']=}")
     parts = re.split(r"(<image>)", message["text"])
     content = []
     image_index = 0
     for part in parts:
         if part == "<image>":
             content.append({"type": "image", "url": message["files"][image_index]})
             logger.debug(f"file: {message['files'][image_index]}")
     return content
 def process_new_user_message(message: dict) -> list[dict]:
     if not message["files"]:
         return [{"type": "text", "text": message["text"]}]
     csv_files = [f for f in message["files"] if f.lower().endswith(".csv")]
     txt_files = [f for f in message["files"] if f.lower().endswith(".txt")]
+    # 사용자 입력 텍스트를 먼저
     content_list = [{"type": "text", "text": message["text"]}]
     # CSV 전문
         txt_analysis = analyze_txt_file(txt_path)
         content_list.append({"type": "text", "text": txt_analysis})
+    # 동영상 처리
     if video_files:
         content_list += process_video(video_files[0])
         return content_list
     if "<image>" in message["text"]:
         return process_interleaved_images(message)
+    # 일반 이미지들
     if image_files:
         for img_path in image_files:
             content_list.append({"type": "image", "url": img_path})
     return content_list
 def process_history(history: list[dict]) -> list[dict]:
     messages = []
     current_user_content: list[dict] = []
     return messages
 @spaces.GPU(duration=120)
 def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
     if not validate_media_constraints(message, history):
         yield output
 examples = [
     [
         {
 ]
 demo = gr.ChatInterface(
     fn=run,
     type="messages",
     chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
+    # .webp, .png, .jpg, .jpeg, .gif, .mp4, .csv, .txt, .pdf 모두 허용
     textbox=gr.MultimodalTextbox(
+        file_types=[
+            ".webp", ".png", ".jpg", ".jpeg", ".gif",
+            ".mp4", ".csv", ".txt", ".pdf"
+        ],
         file_count="multiple",
         autofocus=True
     ),
     additional_inputs=[
         gr.Textbox(
             label="System Prompt",
+            value="You are a deeply thoughtful AI. Consider problems thoroughly and derive correct solutions through systematic reasoning. Please answer in korean."
         ),
+        gr.Slider(label="Max New Tokens", minimum=100, maximum=8000, step=50, value=2000),
     ],
     stop_btn=False,
     title="Gemma 3 27B IT",