multimodal-chat-MBTI-ESTP

Sleeping

App Files Files Community

seawolf2357 commited on Mar 16

Commit

1c72d37

verified ·

1 Parent(s): a9e7179

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -37

app.py CHANGED Viewed

@@ -22,7 +22,7 @@ import PyPDF2
 ##################################################
 # 상수 및 모델 로딩
 ##################################################
-MAX_CONTENT_CHARS = 8000  # 너무 큰 파일 내용은 이 정도까지만 표시
 model_id = os.getenv("MODEL_ID", "google/gemma-3-27b-it")
 processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
@@ -40,7 +40,7 @@ MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "5"))
 # 1) CSV, TXT, PDF 분석 함수
 ##################################################
 def analyze_csv_file(path: str) -> str:
-    """CSV 파일을 읽어 문자열화. 너무 길면 일부만 출력."""
     try:
         df = pd.read_csv(path)
         df_str = df.to_string()
@@ -52,7 +52,7 @@ def analyze_csv_file(path: str) -> str:
 def analyze_txt_file(path: str) -> str:
-    """TXT 파일 전체를 읽어 문자열 반환. 너무 길면 잘라냄."""
     try:
         with open(path, "r", encoding="utf-8") as f:
             text = f.read()
@@ -64,9 +64,9 @@ def analyze_txt_file(path: str) -> str:
 def pdf_to_markdown(pdf_path: str) -> str:
-    """PDF -> 텍스트 추출 -> Markdown 형식으로 변환. 너무 길면 자름."""
-    text_chunks = []
     try:
         with open(pdf_path, "rb") as f:
             reader = PyPDF2.PdfReader(f)
             for page_num, page in enumerate(reader.pages, start=1):
@@ -85,7 +85,7 @@ def pdf_to_markdown(pdf_path: str) -> str:
 ##################################################
-# 2) 이미지/비디오 개수 제한 검사
 ##################################################
 def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
     image_count = 0
@@ -102,11 +102,11 @@ def count_files_in_history(history: list[dict]) -> tuple[int, int]:
     image_count = 0
     video_count = 0
     for item in history:
-        # assistant 메시지이거나 content가 str이면 제외
         if item["role"] != "user" or isinstance(item["content"], str):
             continue
-        # 이미지/비디오 경로로만 카운트
-        if item["content"][0].endswith(".mp4"):
             video_count += 1
         else:
             image_count += 1
@@ -115,13 +115,11 @@ def count_files_in_history(history: list[dict]) -> tuple[int, int]:
 def validate_media_constraints(message: dict, history: list[dict]) -> bool:
     """
-    - 이미지/비디오만 대상으로 개수·혼합 제한
-    - CSV, PDF, TXT 등은 대상 제외
-    - <image> 태그와 실제 이미지 수가 일치하는지 등
     """
     media_files = []
     for f in message["files"]:
-        # 이미지 확장자 또는 .mp4
         if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE) or f.endswith(".mp4"):
             media_files.append(f)
@@ -146,7 +144,7 @@ def validate_media_constraints(message: dict, history: list[dict]) -> bool:
     if video_count == 0 and image_count > MAX_NUM_IMAGES:
         gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images.")
         return False
-    # <image> 태그와 실제 이미지 수가 일치?
     if "<image>" in message["text"] and message["text"].count("<image>") != new_image_count:
         gr.Warning("The number of <image> tags in the text does not match the number of images.")
         return False
@@ -158,7 +156,6 @@ def validate_media_constraints(message: dict, history: list[dict]) -> bool:
 # 3) 비디오 처리
 ##################################################
 def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
-    """영상에서 일정 간격으로 프레임을 추출, PIL 이미지와 timestamp 반환."""
     vidcap = cv2.VideoCapture(video_path)
     fps = vidcap.get(cv2.CAP_PROP_FPS)
     total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
@@ -203,14 +200,13 @@ def process_interleaved_images(message: dict) -> list[dict]:
         elif part.strip():
             content.append({"type": "text", "text": part.strip()})
         else:
-            # 공백만 있는 경우
             if isinstance(part, str) and part != "<image>":
                 content.append({"type": "text", "text": part})
     return content
 ##################################################
-# 5) CSV/PDF/TXT는 텍스트로만, 이미지/비디오는 경로로
 ##################################################
 def process_new_user_message(message: dict) -> list[dict]:
     if not message["files"]:
@@ -223,13 +219,12 @@ def process_new_user_message(message: dict) -> list[dict]:
     txt_files = [f for f in message["files"] if f.lower().endswith(".txt")]
     pdf_files = [f for f in message["files"] if f.lower().endswith(".pdf")]
-    # user 텍스트 먼저 추가
     content_list = [{"type": "text", "text": message["text"]}]
     # CSV
     for csv_path in csv_files:
         csv_analysis = analyze_csv_file(csv_path)
-        # 분석 내용만 넣음 (파일 경로를 히스토리에 추가하지 않음)
         content_list.append({"type": "text", "text": csv_analysis})
     # TXT
@@ -249,10 +244,8 @@ def process_new_user_message(message: dict) -> list[dict]:
     # 이미지
     if "<image>" in message["text"]:
-        # interleaved
         return process_interleaved_images(message)
     else:
-        # 여러 장 이미지
         for img_path in image_files:
             content_list.append({"type": "image", "url": img_path})
@@ -260,45 +253,58 @@ def process_new_user_message(message: dict) -> list[dict]:
 ##################################################
-# 6) history -> LLM 메시지 변환
 ##################################################
 def process_history(history: list[dict]) -> list[dict]:
     messages = []
-    current_user_content: list[dict] = []
     for item in history:
         if item["role"] == "assistant":
             if current_user_content:
                 messages.append({"role": "user", "content": current_user_content})
                 current_user_content = []
             messages.append({"role": "assistant", "content": [{"type": "text", "text": item["content"]}]})
         else:
             content = item["content"]
             if isinstance(content, str):
                 current_user_content.append({"type": "text", "text": content})
             else:
-                # 이미지 or 기타 파일 url
-                current_user_content.append({"type": "image", "url": content[0]})
     return messages
 ##################################################
-# 7) 메인 추론 함수
 ##################################################
 @spaces.GPU(duration=120)
 def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
-    # a) 이미지/비디오 제한 검사
     if not validate_media_constraints(message, history):
         yield ""
         return
-    # b) 대화 기록 + 이번 메시지
     messages = []
     if system_prompt:
         messages.append({"role": "system", "content": [{"type": "text", "text": system_prompt}]})
     messages.extend(process_history(history))
     messages.append({"role": "user", "content": process_new_user_message(message)})
-    # c) 모델 추론
     inputs = processor.apply_chat_template(
         messages,
         add_generation_prompt=True,
@@ -308,11 +314,11 @@ def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tok
     ).to(device=model.device, dtype=torch.bfloat16)
     streamer = TextIteratorStreamer(processor, timeout=30.0, skip_prompt=True, skip_special_tokens=True)
-    gen_kwargs = dict(
-        inputs,
-        streamer=streamer,
-        max_new_tokens=max_new_tokens,
-    )
     t = Thread(target=model.generate, kwargs=gen_kwargs)
     t.start()
@@ -322,6 +328,8 @@ def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tok
         yield output
 ##################################################
 # 예시들 (한글화 버전)
 ##################################################
@@ -457,6 +465,7 @@ examples = [
 ##################################################
 # 9) Gradio ChatInterface
 ##################################################
@@ -464,7 +473,7 @@ demo = gr.ChatInterface(
     fn=run,
     type="messages",
     chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
-    # 이미지/동영상 + CSV/TXT/PDF 허용 (이미지: webp 포함)
     textbox=gr.MultimodalTextbox(
         file_types=[
             ".png", ".jpg", ".jpeg", ".gif", ".webp",
@@ -496,8 +505,7 @@ demo = gr.ChatInterface(
     delete_cache=(1800, 1800),
 )
 if __name__ == "__main__":
     demo.launch()

 ##################################################
 # 상수 및 모델 로딩
 ##################################################
+MAX_CONTENT_CHARS = 8000  # 텍스트로 전달 시 최대 8000자까지만
 model_id = os.getenv("MODEL_ID", "google/gemma-3-27b-it")
 processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
 # 1) CSV, TXT, PDF 분석 함수
 ##################################################
 def analyze_csv_file(path: str) -> str:
+    """CSV 파일 -> 문자열. 길면 잘라냄."""
     try:
         df = pd.read_csv(path)
         df_str = df.to_string()
 def analyze_txt_file(path: str) -> str:
+    """TXT 파일 -> 전체 문자열. 길면 잘라냄."""
     try:
         with open(path, "r", encoding="utf-8") as f:
             text = f.read()
 def pdf_to_markdown(pdf_path: str) -> str:
+    """PDF -> 텍스트 추출 -> Markdown. 길면 잘라냄."""
     try:
+        text_chunks = []
         with open(pdf_path, "rb") as f:
             reader = PyPDF2.PdfReader(f)
             for page_num, page in enumerate(reader.pages, start=1):
 ##################################################
+# 2) 이미지/비디오 제한 검사 (CSV, PDF, TXT 제외)
 ##################################################
 def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
     image_count = 0
     image_count = 0
     video_count = 0
     for item in history:
         if item["role"] != "user" or isinstance(item["content"], str):
             continue
+        # item["content"]가 ["경로"] 형태일 때, 확장자를 확인
+        file_path = item["content"][0]
+        if file_path.endswith(".mp4"):
             video_count += 1
         else:
             image_count += 1
 def validate_media_constraints(message: dict, history: list[dict]) -> bool:
     """
+    이미지 & 비디오 제한
     """
     media_files = []
     for f in message["files"]:
+        # 이미지/비디오만
         if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE) or f.endswith(".mp4"):
             media_files.append(f)
     if video_count == 0 and image_count > MAX_NUM_IMAGES:
         gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images.")
         return False
+    # <image> 태그와 실제 이미지 수 일치?
     if "<image>" in message["text"] and message["text"].count("<image>") != new_image_count:
         gr.Warning("The number of <image> tags in the text does not match the number of images.")
         return False
 # 3) 비디오 처리
 ##################################################
 def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
     vidcap = cv2.VideoCapture(video_path)
     fps = vidcap.get(cv2.CAP_PROP_FPS)
     total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
         elif part.strip():
             content.append({"type": "text", "text": part.strip()})
         else:
             if isinstance(part, str) and part != "<image>":
                 content.append({"type": "text", "text": part})
     return content
 ##################################################
+# 5) CSV/PDF/TXT는 텍스트 변환만, 이미지/비디오는 경로로
 ##################################################
 def process_new_user_message(message: dict) -> list[dict]:
     if not message["files"]:
     txt_files = [f for f in message["files"] if f.lower().endswith(".txt")]
     pdf_files = [f for f in message["files"] if f.lower().endswith(".pdf")]
+    # user 텍스트 추가
     content_list = [{"type": "text", "text": message["text"]}]
     # CSV
     for csv_path in csv_files:
         csv_analysis = analyze_csv_file(csv_path)
         content_list.append({"type": "text", "text": csv_analysis})
     # TXT
     # 이미지
     if "<image>" in message["text"]:
         return process_interleaved_images(message)
     else:
         for img_path in image_files:
             content_list.append({"type": "image", "url": img_path})
 ##################################################
+# 6) 히스토리 -> LLM 메시지 변환
 ##################################################
 def process_history(history: list[dict]) -> list[dict]:
+    """
+    여기서, 이미지/비디오 외의 파일(.csv, .pdf, .txt) 경로는
+    모델로 전달되지 않도록 제거 (or 무시)
+    """
     messages = []
+    current_user_content = []
     for item in history:
         if item["role"] == "assistant":
             if current_user_content:
                 messages.append({"role": "user", "content": current_user_content})
                 current_user_content = []
+            # assistant -> 그냥 텍스트로
             messages.append({"role": "assistant", "content": [{"type": "text", "text": item["content"]}]})
         else:
+            # user
             content = item["content"]
             if isinstance(content, str):
+                # 단순 텍스트
                 current_user_content.append({"type": "text", "text": content})
             else:
+                # 보통 [파일경로] 형태
+                file_path = content[0]
+                # 만약 이미지나 mp4가 아니라면 -> 무시
+                if re.search(r"\.(png|jpg|jpeg|gif|webp)$", file_path, re.IGNORECASE) or file_path.endswith(".mp4"):
+                    current_user_content.append({"type": "image", "url": file_path})
+                else:
+                    # csv, pdf, txt 등은 제거
+                    pass
     return messages
 ##################################################
+# 7) 메인 추론
 ##################################################
 @spaces.GPU(duration=120)
 def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
+    # a) 미디어 제한 검사
     if not validate_media_constraints(message, history):
         yield ""
         return
+    # b) 기존 히스토리 -> LLM 메시지
     messages = []
     if system_prompt:
         messages.append({"role": "system", "content": [{"type": "text", "text": system_prompt}]})
     messages.extend(process_history(history))
     messages.append({"role": "user", "content": process_new_user_message(message)})
+    # c) 모델 호출
     inputs = processor.apply_chat_template(
         messages,
         add_generation_prompt=True,
     ).to(device=model.device, dtype=torch.bfloat16)
     streamer = TextIteratorStreamer(processor, timeout=30.0, skip_prompt=True, skip_special_tokens=True)
+    gen_kwargs = {
+        "inputs": inputs,
+        "streamer": streamer,
+        "max_new_tokens": max_new_tokens,
+    }
     t = Thread(target=model.generate, kwargs=gen_kwargs)
     t.start()
         yield output
 ##################################################
 # 예시들 (한글화 버전)
 ##################################################
 ##################################################
 # 9) Gradio ChatInterface
 ##################################################
     fn=run,
     type="messages",
     chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
+    # 이미지(여러 확장자), mp4, csv, txt, pdf 허용
     textbox=gr.MultimodalTextbox(
         file_types=[
             ".png", ".jpg", ".jpeg", ".gif", ".webp",
     delete_cache=(1800, 1800),
 )
 if __name__ == "__main__":
     demo.launch()