seawolf2357 commited on
Commit
3634066
Β·
verified Β·
1 Parent(s): 05dc4f5

Update app-backup.py

Browse files
Files changed (1) hide show
  1. app-backup.py +399 -142
app-backup.py CHANGED
@@ -6,6 +6,7 @@ import tempfile
6
  from collections.abc import Iterator
7
  from threading import Thread
8
 
 
9
  import cv2
10
  import gradio as gr
11
  import spaces
@@ -20,7 +21,55 @@ import pandas as pd
20
  # PDF ν…μŠ€νŠΈ μΆ”μΆœ
21
  import PyPDF2
22
 
23
- MAX_CONTENT_CHARS = 8000 # λ„ˆλ¬΄ 큰 νŒŒμΌμ„ 막기 μœ„ν•΄ μ΅œλŒ€ ν‘œμ‹œ 8000자
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  model_id = os.getenv("MODEL_ID", "google/gemma-3-27b-it")
26
  processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
@@ -43,6 +92,10 @@ def analyze_csv_file(path: str) -> str:
43
  """
44
  try:
45
  df = pd.read_csv(path)
 
 
 
 
46
  df_str = df.to_string()
47
  if len(df_str) > MAX_CONTENT_CHARS:
48
  df_str = df_str[:MAX_CONTENT_CHARS] + "\n...(truncated)..."
@@ -73,11 +126,20 @@ def pdf_to_markdown(pdf_path: str) -> str:
73
  try:
74
  with open(pdf_path, "rb") as f:
75
  reader = PyPDF2.PdfReader(f)
76
- for page_num, page in enumerate(reader.pages, start=1):
 
 
 
77
  page_text = page.extract_text() or ""
78
  page_text = page_text.strip()
79
  if page_text:
80
- text_chunks.append(f"## Page {page_num}\n\n{page_text}\n")
 
 
 
 
 
 
81
  except Exception as e:
82
  return f"Failed to read PDF ({os.path.basename(pdf_path)}): {str(e)}"
83
 
@@ -97,7 +159,7 @@ def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
97
  for path in paths:
98
  if path.endswith(".mp4"):
99
  video_count += 1
100
- else:
101
  image_count += 1
102
  return image_count, video_count
103
 
@@ -108,10 +170,13 @@ def count_files_in_history(history: list[dict]) -> tuple[int, int]:
108
  for item in history:
109
  if item["role"] != "user" or isinstance(item["content"], str):
110
  continue
111
- if item["content"][0].endswith(".mp4"):
112
- video_count += 1
113
- else:
114
- image_count += 1
 
 
 
115
  return image_count, video_count
116
 
117
 
@@ -123,11 +188,9 @@ def validate_media_constraints(message: dict, history: list[dict]) -> bool:
123
  - <image> νƒœκ·Έκ°€ 있으면 νƒœκ·Έ μˆ˜μ™€ μ‹€μ œ 이미지 수 일치
124
  - CSV, TXT, PDF 등은 μ—¬κΈ°μ„œ μ œν•œν•˜μ§€ μ•ŠμŒ
125
  """
 
126
  media_files = []
127
  for f in message["files"]:
128
- # 이미지: png/jpg/jpeg/gif/webp
129
- # λΉ„λ””μ˜€: mp4
130
- # cf) PDF, CSV, TXT 등은 μ œμ™Έ
131
  if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE) or f.endswith(".mp4"):
132
  media_files.append(f)
133
 
@@ -149,9 +212,15 @@ def validate_media_constraints(message: dict, history: list[dict]) -> bool:
149
  if video_count == 0 and image_count > MAX_NUM_IMAGES:
150
  gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images.")
151
  return False
152
- if "<image>" in message["text"] and message["text"].count("<image>") != new_image_count:
153
- gr.Warning("The number of <image> tags in the text does not match the number of images.")
154
- return False
 
 
 
 
 
 
155
 
156
  return True
157
 
@@ -164,7 +233,8 @@ def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
164
  fps = vidcap.get(cv2.CAP_PROP_FPS)
165
  total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
166
 
167
- frame_interval = int(fps / 3)
 
168
  frames = []
169
 
170
  for i in range(0, total_frames, frame_interval):
@@ -175,6 +245,10 @@ def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
175
  pil_image = Image.fromarray(image)
176
  timestamp = round(i / fps, 2)
177
  frames.append((pil_image, timestamp))
 
 
 
 
178
 
179
  vidcap.release()
180
  return frames
@@ -200,9 +274,13 @@ def process_interleaved_images(message: dict) -> list[dict]:
200
  parts = re.split(r"(<image>)", message["text"])
201
  content = []
202
  image_index = 0
 
 
 
 
203
  for part in parts:
204
- if part == "<image>":
205
- content.append({"type": "image", "url": message["files"][image_index]})
206
  image_index += 1
207
  elif part.strip():
208
  content.append({"type": "text", "text": part.strip()})
@@ -216,13 +294,30 @@ def process_interleaved_images(message: dict) -> list[dict]:
216
  ##################################################
217
  # PDF + CSV + TXT + 이미지/λΉ„λ””μ˜€
218
  ##################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  def process_new_user_message(message: dict) -> list[dict]:
220
  if not message["files"]:
221
  return [{"type": "text", "text": message["text"]}]
222
 
223
  # 1) 파일 λΆ„λ₯˜
224
- video_files = [f for f in message["files"] if f.endswith(".mp4")]
225
- image_files = [f for f in message["files"] if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE)]
226
  csv_files = [f for f in message["files"] if f.lower().endswith(".csv")]
227
  txt_files = [f for f in message["files"] if f.lower().endswith(".txt")]
228
  pdf_files = [f for f in message["files"] if f.lower().endswith(".pdf")]
@@ -251,9 +346,13 @@ def process_new_user_message(message: dict) -> list[dict]:
251
  return content_list
252
 
253
  # 7) 이미지 처리
254
- if "<image>" in message["text"]:
255
  # interleaved
256
- return process_interleaved_images(message)
 
 
 
 
257
  else:
258
  # 일반 μ—¬λŸ¬ μž₯
259
  for img_path in image_files:
@@ -281,9 +380,18 @@ def process_history(history: list[dict]) -> list[dict]:
281
  content = item["content"]
282
  if isinstance(content, str):
283
  current_user_content.append({"type": "text", "text": content})
284
- else:
285
- # μ΄λ―Έμ§€λ‚˜ 기타
286
- current_user_content.append({"type": "image", "url": content[0]})
 
 
 
 
 
 
 
 
 
287
  return messages
288
 
289
 
@@ -291,43 +399,100 @@ def process_history(history: list[dict]) -> list[dict]:
291
  # 메인 μΆ”λ‘  ν•¨μˆ˜
292
  ##################################################
293
  @spaces.GPU(duration=120)
294
- def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
  if not validate_media_constraints(message, history):
296
  yield ""
297
  return
298
 
299
- messages = []
300
- if system_prompt:
301
- messages.append({"role": "system", "content": [{"type": "text", "text": system_prompt}]})
302
- messages.extend(process_history(history))
303
- messages.append({"role": "user", "content": process_new_user_message(message)})
304
-
305
- inputs = processor.apply_chat_template(
306
- messages,
307
- add_generation_prompt=True,
308
- tokenize=True,
309
- return_dict=True,
310
- return_tensors="pt",
311
- ).to(device=model.device, dtype=torch.bfloat16)
312
-
313
- streamer = TextIteratorStreamer(processor, timeout=30.0, skip_prompt=True, skip_special_tokens=True)
314
- gen_kwargs = dict(
315
- inputs,
316
- streamer=streamer,
317
- max_new_tokens=max_new_tokens,
318
- )
319
- t = Thread(target=model.generate, kwargs=gen_kwargs)
320
- t.start()
321
-
322
- output = ""
323
- for new_text in streamer:
324
- output += new_text
325
- yield output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
 
327
 
328
- ##################################################
329
- # μ˜ˆμ‹œλ“€ (κΈ°μ‘΄)
330
- ##################################################
331
  ##################################################
332
  # μ˜ˆμ‹œλ“€ (ν•œκΈ€ν™” 버전)
333
  ##################################################
@@ -335,8 +500,12 @@ examples = [
335
 
336
  [
337
  {
338
- "text": "PDF 파일 λ‚΄μš©μ„ μš”μ•½, λΆ„μ„ν•˜λΌ.",
339
  "files": ["assets/additional-examples/pdf.pdf"],
 
 
 
 
340
  }
341
  ],
342
  [
@@ -347,45 +516,34 @@ examples = [
347
  ],
348
  [
349
  {
350
- "text": "λ™μΌν•œ λ§‰λŒ€ κ·Έλž˜ν”„λ₯Ό κ·Έλ¦¬λŠ” matplotlib μ½”λ“œλ₯Ό μž‘μ„±ν•΄μ£Όμ„Έμš”.",
351
- "files": ["assets/additional-examples/barchart.png"],
352
  }
353
- ],
354
  [
355
  {
356
- "text": "이 μ˜μƒμ—μ„œ μ΄μƒν•œ 점이 λ¬΄μ—‡μΈκ°€μš”?",
357
- "files": ["assets/additional-examples/tmp.mp4"],
358
  }
359
- ],
360
  [
361
  {
362
  "text": "이미 이 μ˜μ–‘μ œλ₯Ό <image> κ°€μ§€κ³  있고, 이 μ œν’ˆ <image>을 μƒˆλ‘œ 사렀 ν•©λ‹ˆλ‹€. ν•¨κ»˜ μ„­μ·¨ν•  λ•Œ μ£Όμ˜ν•΄μ•Ό ν•  점이 μžˆμ„κΉŒμš”?",
363
  "files": ["assets/additional-examples/pill1.png", "assets/additional-examples/pill2.png"],
364
  }
365
- ],
366
- [
367
- {
368
- "text": "μ΄λ―Έμ§€μ˜ μ‹œκ°μ  μš”μ†Œμ—μ„œ μ˜κ°μ„ λ°›μ•„ μ‹œλ₯Ό μž‘μ„±ν•΄μ£Όμ„Έμš”.",
369
- "files": ["assets/sample-images/06-1.png", "assets/sample-images/06-2.png"],
370
- }
371
- ],
372
  [
373
  {
374
- "text": "μ΄λ―Έμ§€μ˜ μ‹œκ°μ  μš”μ†Œλ₯Ό ν† λŒ€λ‘œ 짧은 악곑을 μž‘κ³‘ν•΄μ£Όμ„Έμš”.",
375
- "files": [
376
- "assets/sample-images/07-1.png",
377
- "assets/sample-images/07-2.png",
378
- "assets/sample-images/07-3.png",
379
- "assets/sample-images/07-4.png",
380
- ],
381
  }
382
- ],
383
  [
384
  {
385
- "text": "이 μ§‘μ—μ„œ 무슨 일이 μžˆμ—ˆμ„μ§€ 짧은 이야기λ₯Ό μ§€μ–΄λ³΄μ„Έμš”.",
386
- "files": ["assets/sample-images/08.png"],
387
  }
388
- ],
389
  [
390
  {
391
  "text": "μ΄λ―Έμ§€λ“€μ˜ μˆœμ„œλ₯Ό λ°”νƒ•μœΌλ‘œ 짧은 이야기λ₯Ό λ§Œλ“€μ–΄ μ£Όμ„Έμš”.",
@@ -400,40 +558,33 @@ examples = [
400
  ],
401
  [
402
  {
403
- "text": "이 μ„Έκ³„μ—μ„œ μ‚΄κ³  μžˆμ„ 생물듀을 μƒμƒν•΄μ„œ λ¬˜μ‚¬ν•΄μ£Όμ„Έμš”.",
404
- "files": ["assets/sample-images/10.png"],
405
  }
406
  ],
407
  [
408
  {
409
- "text": "이미지에 적힌 ν…μŠ€νŠΈλ₯Ό μ½μ–΄μ£Όμ„Έμš”.",
410
- "files": ["assets/additional-examples/1.png"],
411
  }
412
- ],
 
413
  [
414
  {
415
- "text": "이 티켓은 μ–Έμ œ λ°œκΈ‰λœ 것이고, 가격은 μ–Όλ§ˆμΈκ°€μš”?",
416
- "files": ["assets/additional-examples/2.png"],
417
  }
418
  ],
 
 
419
  [
420
  {
421
  "text": "이미지에 μžˆλŠ” ν…μŠ€νŠΈλ₯Ό κ·ΈλŒ€λ‘œ μ½μ–΄μ„œ λ§ˆν¬λ‹€μš΄ ν˜•νƒœλ‘œ μ μ–΄μ£Όμ„Έμš”.",
422
  "files": ["assets/additional-examples/3.png"],
423
  }
424
  ],
425
- [
426
- {
427
- "text": "이 적뢄을 ν’€μ–΄μ£Όμ„Έμš”.",
428
- "files": ["assets/additional-examples/4.png"],
429
- }
430
- ],
431
- [
432
- {
433
- "text": "이 이미지λ₯Ό κ°„λ‹¨νžˆ μΊ‘μ…˜μœΌλ‘œ μ„€λͺ…ν•΄μ£Όμ„Έμš”.",
434
- "files": ["assets/sample-images/01.png"],
435
- }
436
- ],
437
  [
438
  {
439
  "text": "이 ν‘œμ§€νŒμ—λŠ” 무슨 문ꡬ가 μ ν˜€ μžˆλ‚˜μš”?",
@@ -446,54 +597,160 @@ examples = [
446
  "files": ["assets/sample-images/03.png"],
447
  }
448
  ],
449
- [
450
- {
451
- "text": "이미지에 λ³΄μ΄λŠ” λͺ¨λ“  사물과 κ·Έ 색상을 λ‚˜μ—΄ν•΄μ£Όμ„Έμš”.",
452
- "files": ["assets/sample-images/04.png"],
453
- }
454
- ],
455
- [
456
- {
457
- "text": "μž₯면의 λΆ„μœ„κΈ°λ₯Ό λ¬˜μ‚¬ν•΄μ£Όμ„Έμš”.",
458
- "files": ["assets/sample-images/05.png"],
459
- }
460
- ],
461
  ]
462
 
463
 
464
 
465
- demo = gr.ChatInterface(
466
- fn=run,
467
- type="messages",
468
- chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
469
- # .webp, .png, .jpg, .jpeg, .gif, .mp4, .csv, .txt, .pdf λͺ¨λ‘ ν—ˆμš©
470
- textbox=gr.MultimodalTextbox(
471
- file_types=[
472
- ".webp", ".png", ".jpg", ".jpeg", ".gif",
473
- ".mp4", ".csv", ".txt", ".pdf"
474
- ],
475
- file_count="multiple",
476
- autofocus=True
477
- ),
478
- multimodal=True,
479
- additional_inputs=[
480
- gr.Textbox(
481
- label="System Prompt",
482
- value=(
483
- "You are a deeply thoughtful AI. Consider problems thoroughly and derive "
484
- "correct solutions through systematic reasoning. Please answer in korean."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
485
  )
486
- ),
487
- gr.Slider(label="Max New Tokens", minimum=100, maximum=8000, step=50, value=2000),
488
- ],
489
- stop_btn=False,
490
- title="Vidraft-Gemma-3-27B",
491
- examples=examples,
492
- run_examples_on_click=False,
493
- cache_examples=False,
494
- css_paths="style.css",
495
- delete_cache=(1800, 1800),
496
- )
497
 
498
  if __name__ == "__main__":
499
  demo.launch()
 
6
  from collections.abc import Iterator
7
  from threading import Thread
8
 
9
+ import requests # <-- For SERPHouse web search
10
  import cv2
11
  import gradio as gr
12
  import spaces
 
21
  # PDF ν…μŠ€νŠΈ μΆ”μΆœ
22
  import PyPDF2
23
 
24
+ ##############################################################################
25
+ # SERPHouse API key for web search
26
+ ##############################################################################
27
+ SERPHOUSE_API_KEY = "V38CNn4HXpLtynJQyOeoUensTEYoFy8PBUxKpDqAW1pawT1vfJ2BWtPQ98h6"
28
+
29
+ ##############################################################################
30
+ # Simple function to call the SERPHouse Live endpoint
31
+ # https://api.serphouse.com/serp/live
32
+ ##############################################################################
33
+ def do_web_search(query: str) -> str:
34
+ """
35
+ Calls SERPHouse live endpoint with the given query (q).
36
+ Returns a simple text summary or error message.
37
+ """
38
+ try:
39
+ url = "https://api.serphouse.com/serp/live"
40
+ params = {
41
+ "q": query,
42
+ "domain": "google.com",
43
+ "lang": "en",
44
+ "device": "desktop",
45
+ "serp_type": "web",
46
+ "api_token": SERPHOUSE_API_KEY,
47
+ }
48
+ resp = requests.get(url, params=params, timeout=30)
49
+ resp.raise_for_status() # Raise an exception for 4xx/5xx errors
50
+ data = resp.json()
51
+
52
+ # For demonstration, let's extract top 3 organic results:
53
+ results = data.get("results", {})
54
+ organic = results.get("results", {}).get("organic", [])
55
+ if not organic:
56
+ return "No web search results found."
57
+
58
+ summary_lines = []
59
+ for item in organic[:3]:
60
+ rank = item.get("position", "-")
61
+ title = item.get("title", "No Title")
62
+ link = item.get("link", "No Link")
63
+ snippet = item.get("snippet", "(No snippet)")
64
+ summary_lines.append(f"**Rank {rank}:** [{title}]({link})\n\n> {snippet}")
65
+
66
+ return "\n\n".join(summary_lines) if summary_lines else "No web search results found."
67
+ except Exception as e:
68
+ logger.error(f"Web search failed: {e}")
69
+ return f"Web search failed: {str(e)}"
70
+
71
+
72
+ MAX_CONTENT_CHARS = 4000 # λ„ˆλ¬΄ 큰 νŒŒμΌμ„ 막기 μœ„ν•΄ μ΅œλŒ€ ν‘œμ‹œ 4000자
73
 
74
  model_id = os.getenv("MODEL_ID", "google/gemma-3-27b-it")
75
  processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
 
92
  """
93
  try:
94
  df = pd.read_csv(path)
95
+ # 데이터 ν”„λ ˆμž„ 크기 μ œν•œ (ν–‰/μ—΄ μˆ˜κ°€ λ§Žμ€ 경우)
96
+ if df.shape[0] > 50 or df.shape[1] > 10:
97
+ df = df.iloc[:50, :10]
98
+
99
  df_str = df.to_string()
100
  if len(df_str) > MAX_CONTENT_CHARS:
101
  df_str = df_str[:MAX_CONTENT_CHARS] + "\n...(truncated)..."
 
126
  try:
127
  with open(pdf_path, "rb") as f:
128
  reader = PyPDF2.PdfReader(f)
129
+ # μ΅œλŒ€ 5νŽ˜μ΄μ§€λ§Œ 처리
130
+ max_pages = min(5, len(reader.pages))
131
+ for page_num in range(max_pages):
132
+ page = reader.pages[page_num]
133
  page_text = page.extract_text() or ""
134
  page_text = page_text.strip()
135
  if page_text:
136
+ # νŽ˜μ΄μ§€λ³„ ν…μŠ€νŠΈλ„ μ œν•œ
137
+ if len(page_text) > MAX_CONTENT_CHARS // max_pages:
138
+ page_text = page_text[:MAX_CONTENT_CHARS // max_pages] + "...(truncated)"
139
+ text_chunks.append(f"## Page {page_num+1}\n\n{page_text}\n")
140
+
141
+ if len(reader.pages) > max_pages:
142
+ text_chunks.append(f"\n...(Showing {max_pages} of {len(reader.pages)} pages)...")
143
  except Exception as e:
144
  return f"Failed to read PDF ({os.path.basename(pdf_path)}): {str(e)}"
145
 
 
159
  for path in paths:
160
  if path.endswith(".mp4"):
161
  video_count += 1
162
+ elif re.search(r"\.(png|jpg|jpeg|gif|webp)$", path, re.IGNORECASE):
163
  image_count += 1
164
  return image_count, video_count
165
 
 
170
  for item in history:
171
  if item["role"] != "user" or isinstance(item["content"], str):
172
  continue
173
+ if isinstance(item["content"], list) and len(item["content"]) > 0:
174
+ file_path = item["content"][0]
175
+ if isinstance(file_path, str):
176
+ if file_path.endswith(".mp4"):
177
+ video_count += 1
178
+ elif re.search(r"\.(png|jpg|jpeg|gif|webp)$", file_path, re.IGNORECASE):
179
+ image_count += 1
180
  return image_count, video_count
181
 
182
 
 
188
  - <image> νƒœκ·Έκ°€ 있으면 νƒœκ·Έ μˆ˜μ™€ μ‹€μ œ 이미지 수 일치
189
  - CSV, TXT, PDF 등은 μ—¬κΈ°μ„œ μ œν•œν•˜μ§€ μ•ŠμŒ
190
  """
191
+ # 이미지와 λΉ„λ””μ˜€ 파일만 필터링
192
  media_files = []
193
  for f in message["files"]:
 
 
 
194
  if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE) or f.endswith(".mp4"):
195
  media_files.append(f)
196
 
 
212
  if video_count == 0 and image_count > MAX_NUM_IMAGES:
213
  gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images.")
214
  return False
215
+
216
+ # 이미지 νƒœκ·Έ 검증 (μ‹€μ œ 이미지 파일만 계산)
217
+ if "<image>" in message["text"]:
218
+ # 이미지 파일만 필터링
219
+ image_files = [f for f in message["files"] if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE)]
220
+ image_tag_count = message["text"].count("<image>")
221
+ if image_tag_count != len(image_files):
222
+ gr.Warning("The number of <image> tags in the text does not match the number of image files.")
223
+ return False
224
 
225
  return True
226
 
 
233
  fps = vidcap.get(cv2.CAP_PROP_FPS)
234
  total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
235
 
236
+ # 더 적은 ν”„λ ˆμž„μ„ μΆ”μΆœν•˜λ„λ‘ μ‘°μ •
237
+ frame_interval = max(int(fps), int(total_frames / 10)) # μ΄ˆλ‹Ή 1ν”„λ ˆμž„ λ˜λŠ” μ΅œλŒ€ 10ν”„λ ˆμž„
238
  frames = []
239
 
240
  for i in range(0, total_frames, frame_interval):
 
245
  pil_image = Image.fromarray(image)
246
  timestamp = round(i / fps, 2)
247
  frames.append((pil_image, timestamp))
248
+
249
+ # μ΅œλŒ€ 5ν”„λ ˆμž„λ§Œ μ‚¬μš©
250
+ if len(frames) >= 5:
251
+ break
252
 
253
  vidcap.release()
254
  return frames
 
274
  parts = re.split(r"(<image>)", message["text"])
275
  content = []
276
  image_index = 0
277
+
278
+ # 이미지 파일만 필터링
279
+ image_files = [f for f in message["files"] if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE)]
280
+
281
  for part in parts:
282
+ if part == "<image>" and image_index < len(image_files):
283
+ content.append({"type": "image", "url": image_files[image_index]})
284
  image_index += 1
285
  elif part.strip():
286
  content.append({"type": "text", "text": part.strip()})
 
294
  ##################################################
295
  # PDF + CSV + TXT + 이미지/λΉ„λ””μ˜€
296
  ##################################################
297
+ def is_image_file(file_path: str) -> bool:
298
+ """이미지 νŒŒμΌμΈμ§€ 확인"""
299
+ return bool(re.search(r"\.(png|jpg|jpeg|gif|webp)$", file_path, re.IGNORECASE))
300
+
301
+
302
+ def is_video_file(file_path: str) -> bool:
303
+ """λΉ„λ””μ˜€ νŒŒμΌμΈμ§€ 확인"""
304
+ return file_path.endswith(".mp4")
305
+
306
+
307
+ def is_document_file(file_path: str) -> bool:
308
+ """λ¬Έμ„œ νŒŒμΌμΈμ§€ 확인 (PDF, CSV, TXT)"""
309
+ return (file_path.lower().endswith(".pdf") or
310
+ file_path.lower().endswith(".csv") or
311
+ file_path.lower().endswith(".txt"))
312
+
313
+
314
  def process_new_user_message(message: dict) -> list[dict]:
315
  if not message["files"]:
316
  return [{"type": "text", "text": message["text"]}]
317
 
318
  # 1) 파일 λΆ„λ₯˜
319
+ video_files = [f for f in message["files"] if is_video_file(f)]
320
+ image_files = [f for f in message["files"] if is_image_file(f)]
321
  csv_files = [f for f in message["files"] if f.lower().endswith(".csv")]
322
  txt_files = [f for f in message["files"] if f.lower().endswith(".txt")]
323
  pdf_files = [f for f in message["files"] if f.lower().endswith(".pdf")]
 
346
  return content_list
347
 
348
  # 7) 이미지 처리
349
+ if "<image>" in message["text"] and image_files:
350
  # interleaved
351
+ interleaved_content = process_interleaved_images({"text": message["text"], "files": image_files})
352
+ # 원본 content_list μ•žλΆ€λΆ„(ν…μŠ€νŠΈ)을 μ œκ±°ν•˜κ³  interleaved둜 λŒ€μ²΄
353
+ if content_list[0]["type"] == "text":
354
+ content_list = content_list[1:] # 원본 ν…μŠ€νŠΈ 제거
355
+ return interleaved_content + content_list # interleaved + λ‚˜λ¨Έμ§€ λ¬Έμ„œ 뢄석 λ‚΄μš©
356
  else:
357
  # 일반 μ—¬λŸ¬ μž₯
358
  for img_path in image_files:
 
380
  content = item["content"]
381
  if isinstance(content, str):
382
  current_user_content.append({"type": "text", "text": content})
383
+ elif isinstance(content, list) and len(content) > 0:
384
+ file_path = content[0]
385
+ if is_image_file(file_path):
386
+ current_user_content.append({"type": "image", "url": file_path})
387
+ else:
388
+ # 비이미지 νŒŒμΌμ€ ν…μŠ€νŠΈλ‘œ 처리
389
+ current_user_content.append({"type": "text", "text": f"[File: {os.path.basename(file_path)}]"})
390
+
391
+ # λ§ˆμ§€λ§‰ μ‚¬μš©μž λ©”μ‹œμ§€κ°€ μ²˜λ¦¬λ˜μ§€ μ•Šμ€ 경우 μΆ”κ°€
392
+ if current_user_content:
393
+ messages.append({"role": "user", "content": current_user_content})
394
+
395
  return messages
396
 
397
 
 
399
  # 메인 μΆ”λ‘  ν•¨μˆ˜
400
  ##################################################
401
  @spaces.GPU(duration=120)
402
+ def run(
403
+ message: dict,
404
+ history: list[dict],
405
+ system_prompt: str = "",
406
+ max_new_tokens: int = 512,
407
+ use_web_search: bool = False,
408
+ web_search_query: str = "",
409
+ ) -> Iterator[str]:
410
+ """
411
+ The main inference function. Now extended with optional web_search arguments:
412
+ - use_web_search: bool
413
+ - web_search_query: str
414
+ If `use_web_search` is True, calls SERPHouse for the given `web_search_query`.
415
+ """
416
+ # Validate media constraints first
417
  if not validate_media_constraints(message, history):
418
  yield ""
419
  return
420
 
421
+ try:
422
+ # If user opted for "Web Search", do it here and yield a prefix message
423
+ if use_web_search and web_search_query.strip():
424
+ ws_result = do_web_search(web_search_query.strip())
425
+ yield f"**[Web Search Results for '{web_search_query.strip()}':]**\n\n{ws_result}\n\n---\n"
426
+
427
+ messages = []
428
+ if system_prompt:
429
+ messages.append({"role": "system", "content": [{"type": "text", "text": system_prompt}]})
430
+ messages.extend(process_history(history))
431
+
432
+ # μ‚¬μš©μž λ©”μ‹œμ§€ 처리
433
+ user_content = process_new_user_message(message)
434
+
435
+ # 토큰 수λ₯Ό 쀄이기 μœ„ν•΄ λ„ˆλ¬΄ κΈ΄ ν…μŠ€νŠΈλŠ” μž˜λΌλ‚΄κΈ°
436
+ for item in user_content:
437
+ if item["type"] == "text" and len(item["text"]) > MAX_CONTENT_CHARS:
438
+ item["text"] = item["text"][:MAX_CONTENT_CHARS] + "\n...(truncated)..."
439
+
440
+ messages.append({"role": "user", "content": user_content})
441
+
442
+ # λͺ¨λΈ μž…λ ₯ 생성 μ „ μ΅œμ’… 확인
443
+ for msg in messages:
444
+ if msg["role"] != "user":
445
+ continue
446
+
447
+ filtered_content = []
448
+ for item in msg["content"]:
449
+ if item["type"] == "image":
450
+ if is_image_file(item["url"]):
451
+ filtered_content.append(item)
452
+ else:
453
+ # 이미지 파일이 μ•„λ‹Œ 경우 ν…μŠ€νŠΈλ‘œ λ³€ν™˜
454
+ filtered_content.append({
455
+ "type": "text",
456
+ "text": f"[Non-image file: {os.path.basename(item['url'])}]"
457
+ })
458
+ else:
459
+ filtered_content.append(item)
460
+
461
+ msg["content"] = filtered_content
462
+
463
+ # λͺ¨λΈ μž…λ ₯ 생성
464
+ inputs = processor.apply_chat_template(
465
+ messages,
466
+ add_generation_prompt=True,
467
+ tokenize=True,
468
+ return_dict=True,
469
+ return_tensors="pt",
470
+ ).to(device=model.device, dtype=torch.bfloat16)
471
+
472
+ # ν…μŠ€νŠΈ 생성 슀트리머 μ„€μ •
473
+ streamer = TextIteratorStreamer(processor, timeout=30.0, skip_prompt=True, skip_special_tokens=True)
474
+ gen_kwargs = dict(
475
+ inputs,
476
+ streamer=streamer,
477
+ max_new_tokens=max_new_tokens,
478
+ )
479
+
480
+ # 별도 μŠ€λ ˆλ“œμ—μ„œ ν…μŠ€νŠΈ 생성
481
+ t = Thread(target=model.generate, kwargs=gen_kwargs)
482
+ t.start()
483
+
484
+ # 결과 슀트리밍
485
+ output = ""
486
+ for new_text in streamer:
487
+ output += new_text
488
+ yield output
489
+
490
+ except Exception as e:
491
+ logger.error(f"Error in run: {str(e)}")
492
+ yield f"μ£„μ†‘ν•©λ‹ˆλ‹€. 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"
493
+
494
 
495
 
 
 
 
496
  ##################################################
497
  # μ˜ˆμ‹œλ“€ (ν•œκΈ€ν™” 버전)
498
  ##################################################
 
500
 
501
  [
502
  {
503
+ "text": "두 PDF 파일 λ‚΄μš©μ„ λΉ„κ΅ν•˜λΌ.",
504
  "files": ["assets/additional-examples/pdf.pdf"],
505
+ "files": [
506
+ "assets/additional-examples/before.pdf",
507
+ "assets/additional-examples/after.pdf",
508
+ ],
509
  }
510
  ],
511
  [
 
516
  ],
517
  [
518
  {
519
+ "text": "이 μ˜μƒμ˜ λ‚΄μš©μ„ μ„€λͺ…ν•˜λΌ",
520
+ "files": ["assets/additional-examples/tmp.mp4"],
521
  }
522
+ ],
523
  [
524
  {
525
+ "text": "ν‘œμ§€ λ‚΄μš©μ„ μ„€λͺ…ν•˜κ³  κΈ€μžλ₯Ό μ½μ–΄μ£Όμ„Έμš”.",
526
+ "files": ["assets/additional-examples/maz.jpg"],
527
  }
528
+ ],
529
  [
530
  {
531
  "text": "이미 이 μ˜μ–‘μ œλ₯Ό <image> κ°€μ§€κ³  있고, 이 μ œν’ˆ <image>을 μƒˆλ‘œ 사렀 ν•©λ‹ˆλ‹€. ν•¨κ»˜ μ„­μ·¨ν•  λ•Œ μ£Όμ˜ν•΄μ•Ό ν•  점이 μžˆμ„κΉŒμš”?",
532
  "files": ["assets/additional-examples/pill1.png", "assets/additional-examples/pill2.png"],
533
  }
534
+ ],
 
 
 
 
 
 
535
  [
536
  {
537
+ "text": "이 적뢄을 ν’€μ–΄μ£Όμ„Έμš”.",
538
+ "files": ["assets/additional-examples/4.png"],
 
 
 
 
 
539
  }
540
+ ],
541
  [
542
  {
543
+ "text": "이 티켓은 μ–Έμ œ λ°œκΈ‰λœ 것이고, 가격은 μ–Όλ§ˆμΈκ°€μš”?",
544
+ "files": ["assets/additional-examples/2.png"],
545
  }
546
+ ],
547
  [
548
  {
549
  "text": "μ΄λ―Έμ§€λ“€μ˜ μˆœμ„œλ₯Ό λ°”νƒ•μœΌλ‘œ 짧은 이야기λ₯Ό λ§Œλ“€μ–΄ μ£Όμ„Έμš”.",
 
558
  ],
559
  [
560
  {
561
+ "text": "μ΄λ―Έμ§€μ˜ μ‹œκ°μ  μš”μ†Œμ—μ„œ μ˜κ°μ„ λ°›μ•„ μ‹œλ₯Ό μž‘μ„±ν•΄μ£Όμ„Έμš”.",
562
+ "files": ["assets/sample-images/06-1.png", "assets/sample-images/06-2.png"],
563
  }
564
  ],
565
  [
566
  {
567
+ "text": "λ™μΌν•œ λ§‰λŒ€ κ·Έλž˜ν”„λ₯Ό κ·Έλ¦¬λŠ” matplotlib μ½”λ“œλ₯Ό μž‘μ„±ν•΄μ£Όμ„Έμš”.",
568
+ "files": ["assets/additional-examples/barchart.png"],
569
  }
570
+ ],
571
+
572
  [
573
  {
574
+ "text": "이 μ„Έκ³„μ—μ„œ μ‚΄κ³  μžˆμ„ 생물듀을 μƒμƒν•΄μ„œ λ¬˜μ‚¬ν•΄μ£Όμ„Έμš”.",
575
+ "files": ["assets/sample-images/08.png"],
576
  }
577
  ],
578
+
579
+
580
  [
581
  {
582
  "text": "이미지에 μžˆλŠ” ν…μŠ€νŠΈλ₯Ό κ·ΈλŒ€λ‘œ μ½μ–΄μ„œ λ§ˆν¬λ‹€μš΄ ν˜•νƒœλ‘œ μ μ–΄μ£Όμ„Έμš”.",
583
  "files": ["assets/additional-examples/3.png"],
584
  }
585
  ],
586
+
587
+
 
 
 
 
 
 
 
 
 
 
588
  [
589
  {
590
  "text": "이 ν‘œμ§€νŒμ—λŠ” 무슨 문ꡬ가 μ ν˜€ μžˆλ‚˜μš”?",
 
597
  "files": ["assets/sample-images/03.png"],
598
  }
599
  ],
600
+
 
 
 
 
 
 
 
 
 
 
 
601
  ]
602
 
603
 
604
 
605
+
606
+
607
+ ##############################################################################
608
+ # Custom CSS similar to second example (colorful background, panel, etc.)
609
+ ##############################################################################
610
+ css = """
611
+ body {
612
+ background: linear-gradient(135deg, #667eea, #764ba2);
613
+ font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;
614
+ color: #333;
615
+ margin: 0;
616
+ padding: 0;
617
+ }
618
+ .gradio-container {
619
+ background: rgba(255, 255, 255, 0.95);
620
+ border-radius: 15px;
621
+ padding: 30px 40px;
622
+ box-shadow: 0 8px 30px rgba(0, 0, 0, 0.3);
623
+ margin: 40px auto;
624
+ max-width: 1200px;
625
+ }
626
+ .gradio-container h1 {
627
+ color: #333;
628
+ text-shadow: 1px 1px 2px rgba(0, 0, 0, 0.2);
629
+ }
630
+ .fillable {
631
+ width: 95% !important;
632
+ max-width: unset !important;
633
+ }
634
+ #examples_container {
635
+ margin: auto;
636
+ width: 90%;
637
+ }
638
+ #examples_row {
639
+ justify-content: center;
640
+ }
641
+ .sidebar {
642
+ background: rgba(255, 255, 255, 0.98);
643
+ border-radius: 10px;
644
+ padding: 20px;
645
+ box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2);
646
+ }
647
+ button, .btn {
648
+ background: linear-gradient(90deg, #ff8a00, #e52e71);
649
+ border: none;
650
+ color: #fff;
651
+ padding: 12px 24px;
652
+ text-transform: uppercase;
653
+ font-weight: bold;
654
+ letter-spacing: 1px;
655
+ border-radius: 5px;
656
+ cursor: pointer;
657
+ transition: transform 0.2s ease-in-out;
658
+ }
659
+ button:hover, .btn:hover {
660
+ transform: scale(1.05);
661
+ }
662
+ """
663
+
664
+ title_html = """
665
+ <h1 align="center" style="margin-bottom: 0.2em;"> πŸ€— Vidraft-Gemma-3-27B </h1>
666
+ <p align="center" style="font-size:1.1em; color:#555;">
667
+ Multimodal Chat Interface + Optional Web Search
668
+ </p>
669
+ """
670
+
671
+ ##############################################################################
672
+ # Build a Blocks layout that includes:
673
+ # - A left sidebar with "Web Search" controls
674
+ # - The main ChatInterface in the center or right
675
+ ##############################################################################
676
+ with gr.Blocks(css=css, title="Vidraft-Gemma-3-27B") as demo:
677
+ gr.Markdown(title_html)
678
+
679
+ with gr.Row():
680
+ # Left Sidebar
681
+ with gr.Column(scale=3, variant="panel"):
682
+ gr.Markdown("### Menu / Options")
683
+ with gr.Row():
684
+ web_search_checkbox = gr.Checkbox(
685
+ label="Web Search",
686
+ value=False,
687
+ info="Check to enable a SERPHouse web search before the chat reply"
688
+ )
689
+ web_search_text = gr.Textbox(
690
+ lines=1,
691
+ label="Web Search Query",
692
+ placeholder="Enter search keywords..."
693
+ )
694
+
695
+ gr.Markdown("---")
696
+ gr.Markdown("#### System Prompt")
697
+ system_prompt_box = gr.Textbox(
698
+ lines=3,
699
+ value=(
700
+ "You are a deeply thoughtful AI. Consider problems thoroughly and derive "
701
+ "correct solutions through systematic reasoning. Please answer in korean."
702
+ ),
703
+ )
704
+
705
+ max_tokens_slider = gr.Slider(
706
+ label="Max New Tokens",
707
+ minimum=100,
708
+ maximum=8000,
709
+ step=50,
710
+ value=2000,
711
+ )
712
+
713
+ gr.Markdown("<br><br>") # spacing
714
+
715
+ # Main ChatInterface to the right
716
+ with gr.Column(scale=7):
717
+ chat = gr.ChatInterface(
718
+ fn=run,
719
+ type="messages",
720
+ chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
721
+ textbox=gr.MultimodalTextbox(
722
+ file_types=[
723
+ ".webp", ".png", ".jpg", ".jpeg", ".gif",
724
+ ".mp4", ".csv", ".txt", ".pdf"
725
+ ],
726
+ file_count="multiple",
727
+ autofocus=True
728
+ ),
729
+ multimodal=True,
730
+ additional_inputs=[
731
+ system_prompt_box,
732
+ max_tokens_slider,
733
+ web_search_checkbox,
734
+ web_search_text,
735
+ ],
736
+ stop_btn=False,
737
+ title="Vidraft-Gemma-3-27B",
738
+ examples=examples,
739
+ run_examples_on_click=False,
740
+ cache_examples=False,
741
+ css_paths=None,
742
+ delete_cache=(1800, 1800),
743
+ )
744
+
745
+ with gr.Row(elem_id="examples_row"):
746
+ with gr.Column(scale=12, elem_id="examples_container"):
747
+ gr.Markdown("### Example Inputs (click to load)")
748
+ # The fix: pass an empty list to avoid the "None" error, so we keep the code structure.
749
+ gr.Examples(
750
+ examples=examples,
751
+ inputs=[], # Instead of None or chat.
752
+ cache_examples=False
753
  )
 
 
 
 
 
 
 
 
 
 
 
754
 
755
  if __name__ == "__main__":
756
  demo.launch()