seawolf2357 commited on
Commit
81e510e
·
verified ·
1 Parent(s): 7c43029

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -833
app.py CHANGED
@@ -3,7 +3,7 @@
3
  import os
4
  import re
5
  import tempfile
6
- import gc # Added garbage collector
7
  from collections.abc import Iterator
8
  from threading import Thread
9
  import json
@@ -12,7 +12,7 @@ import cv2
12
  import base64
13
  import logging
14
  import time
15
- from urllib.parse import quote # For URL encoding
16
 
17
  import gradio as gr
18
  import spaces
@@ -21,844 +21,23 @@ from loguru import logger
21
  from PIL import Image
22
  from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIteratorStreamer
23
 
24
- # CSV/TXT/PDF analysis
25
  import pandas as pd
26
  import PyPDF2
27
 
28
  # =============================================================================
29
- # (New) Image API related functions
30
  # =============================================================================
31
  from gradio_client import Client
32
 
33
- API_URL = "http://211.233.58.201:7896"
 
 
 
 
34
 
35
- logging.basicConfig(
36
- level=logging.DEBUG,
37
- format='%(asctime)s - %(levelname)s - %(message)s'
38
- )
39
-
40
- # =============================================================================
41
- # Load MBTI setting from mbti.json and map to full description.
42
- # =============================================================================
43
  try:
44
- with open("mbti.json", "r", encoding="utf-8") as f:
45
- # Expecting a single MBTI key string, e.g., "entj"
46
- mbti_key = json.load(f)
47
- mbti_key = mbti_key.strip().lower() if isinstance(mbti_key, str) else "ISFP"
48
  except Exception as e:
49
- logging.error(f"Error reading mbti.json: {e}")
50
- mbti_key = "ISFP" # default
51
-
52
- mbti_mapping = {
53
- "INTJ": "INTJ (The Architect) - Future-oriented with innovative strategies and thorough analysis. Example: [Dana Scully](https://en.wikipedia.org/wiki/Dana_Scully)",
54
- "INTP": "INTP (The Thinker) - Excels at theoretical analysis and creative problem solving. Example: [Velma Dinkley](https://en.wikipedia.org/wiki/Velma_Dinkley)",
55
- "ENTJ": "ENTJ (The Commander) - Strong leadership and clear goals with efficient strategic planning. Example: [Miranda Priestly](https://en.wikipedia.org/wiki/Miranda_Priestly)",
56
- "ENTP": "ENTP (The Debater) - Innovative, challenge-seeking, and enjoys exploring new possibilities. Example: [Harley Quinn](https://en.wikipedia.org/wiki/Harley_Quinn)",
57
- "INFJ": "INFJ (The Advocate) - Insightful, idealistic and morally driven. Example: [Wonder Woman](https://en.wikipedia.org/wiki/Wonder_Woman)",
58
- "INFP": "INFP (The Mediator) - Passionate and idealistic, pursuing core values with creativity. Example: [Amélie Poulain](https://en.wikipedia.org/wiki/Am%C3%A9lie)",
59
- "ENFJ": "ENFJ (The Protagonist) - Empathetic and dedicated to social harmony. Example: [Mulan](https://en.wikipedia.org/wiki/Mulan_(Disney))",
60
- "ENFP": "ENFP (The Campaigner) - Inspiring and constantly sharing creative ideas. Example: [Elle Woods](https://en.wikipedia.org/wiki/Legally_Blonde)",
61
- "ISTJ": "ISTJ (The Logistician) - Systematic, dependable, and values tradition and rules. Example: [Clarice Starling](https://en.wikipedia.org/wiki/Clarice_Starling)",
62
- "ISFJ": "ISFJ (The Defender) - Compassionate and attentive to others’ needs. Example: [Molly Weasley](https://en.wikipedia.org/wiki/Molly_Weasley)",
63
- "ESTJ": "ESTJ (The Executive) - Organized, practical, and demonstrates clear execution skills. Example: [Monica Geller](https://en.wikipedia.org/wiki/Monica_Geller)",
64
- "ESFJ": "ESFJ (The Consul) - Outgoing, cooperative, and an effective communicator. Example: [Rachel Green](https://en.wikipedia.org/wiki/Rachel_Green)",
65
- "ISTP": "ISTP (The Virtuoso) - Analytical and resourceful, solving problems with quick thinking. Example: [Black Widow (Natasha Romanoff)](https://en.wikipedia.org/wiki/Black_Widow_(Marvel_Comics))",
66
- "ISFP": "ISFP (The Adventurer) - Creative, sensitive, and appreciates artistic expression. Example: [Arwen](https://en.wikipedia.org/wiki/Arwen)",
67
- "ESTP": "ESTP (The Entrepreneur) - Bold and action-oriented, thriving on challenges. Example: [Lara Croft](https://en.wikipedia.org/wiki/Lara_Croft)",
68
- "ESFP": "ESFP (The Entertainer) - Energetic, spontaneous, and radiates positive energy. Example: [Phoebe Buffay](https://en.wikipedia.org/wiki/Phoebe_Buffay)"
69
- }
70
-
71
- # Use the mapped MBTI description, defaulting to intj if not found
72
- fixed_mbti = mbti_mapping.get(mbti_key, mbti_mapping["ISFP"])
73
-
74
- # =============================================================================
75
- # Test API Connection function
76
- # =============================================================================
77
- def test_api_connection() -> str:
78
- """Test API server connection."""
79
- try:
80
- client = Client(API_URL)
81
- return "API connection successful: Operating normally"
82
- except Exception as e:
83
- logging.error(f"API connection test failed: {e}")
84
- return f"API connection failed: {e}"
85
-
86
- # =============================================================================
87
- # Image Generation function
88
- # =============================================================================
89
- def generate_image(prompt: str, width: float, height: float, guidance: float, inference_steps: float, seed: float):
90
- """Image generation function (flexible return type)."""
91
- if not prompt:
92
- return None, "Error: A prompt is required."
93
- try:
94
- logging.info(f"Calling image generation API with prompt: {prompt}")
95
- client = Client(API_URL)
96
- result = client.predict(
97
- prompt=prompt,
98
- width=int(width),
99
- height=int(height),
100
- guidance=float(guidance),
101
- inference_steps=int(inference_steps),
102
- seed=int(seed),
103
- do_img2img=False,
104
- init_image=None,
105
- image2image_strength=0.8,
106
- resize_img=True,
107
- api_name="/generate_image"
108
- )
109
- logging.info(f"Image generation result: {type(result)}, length: {len(result) if isinstance(result, (list, tuple)) else 'unknown'}")
110
- if isinstance(result, (list, tuple)) and len(result) > 0:
111
- image_data = result[0]
112
- seed_info = result[1] if len(result) > 1 else "Unknown seed"
113
- return image_data, seed_info
114
- else:
115
- return result, "Unknown seed"
116
- except Exception as e:
117
- logging.error(f"Image generation failed: {str(e)}")
118
- return None, f"Error: {str(e)}"
119
-
120
- # Base64 padding fix function
121
- def fix_base64_padding(data):
122
- """Fix the padding of a Base64 string."""
123
- if isinstance(data, bytes):
124
- data = data.decode('utf-8')
125
- if "base64," in data:
126
- data = data.split("base64,", 1)[1]
127
- missing_padding = len(data) % 4
128
- if missing_padding:
129
- data += '=' * (4 - missing_padding)
130
- return data
131
-
132
- # =============================================================================
133
- # Memory cleanup function
134
- # =============================================================================
135
- def clear_cuda_cache():
136
- """Explicitly clear the CUDA cache."""
137
- if torch.cuda.is_available():
138
- torch.cuda.empty_cache()
139
- gc.collect()
140
-
141
- # =============================================================================
142
- # SerpHouse API functions
143
- # =============================================================================
144
- SERPHOUSE_API_KEY = os.getenv("SERPHOUSE_API_KEY", "")
145
-
146
- def extract_keywords(text: str, top_k: int = 5) -> str:
147
- """Extract simple keywords: only retain English, Korean, numbers, and spaces."""
148
- text = re.sub(r"[^a-zA-Z0-9가-힣\s]", "", text)
149
- tokens = text.split()
150
- return " ".join(tokens[:top_k])
151
-
152
- def do_web_search(query: str) -> str:
153
- """Call the SerpHouse LIVE API to return Markdown-formatted search results."""
154
- try:
155
- url = "https://api.serphouse.com/serp/live"
156
- params = {
157
- "q": query,
158
- "domain": "google.com",
159
- "serp_type": "web",
160
- "device": "desktop",
161
- "lang": "en",
162
- "num": "20"
163
- }
164
- headers = {"Authorization": f"Bearer {SERPHOUSE_API_KEY}"}
165
- logger.info(f"Calling SerpHouse API with query: {query}")
166
- response = requests.get(url, headers=headers, params=params, timeout=60)
167
- response.raise_for_status()
168
- data = response.json()
169
- results = data.get("results", {})
170
- organic = None
171
- if isinstance(results, dict) and "organic" in results:
172
- organic = results["organic"]
173
- elif isinstance(results, dict) and "results" in results:
174
- if isinstance(results["results"], dict) and "organic" in results["results"]:
175
- organic = results["results"]["organic"]
176
- elif "organic" in data:
177
- organic = data["organic"]
178
- if not organic:
179
- logger.warning("Organic results not found in response.")
180
- return "No web search results available or the API response structure is unexpected."
181
- max_results = min(20, len(organic))
182
- limited_organic = organic[:max_results]
183
- summary_lines = []
184
- for idx, item in enumerate(limited_organic, start=1):
185
- title = item.get("title", "No Title")
186
- link = item.get("link", "#")
187
- snippet = item.get("snippet", "No Description")
188
- displayed_link = item.get("displayed_link", link)
189
- summary_lines.append(
190
- f"### Result {idx}: {title}\n\n"
191
- f"{snippet}\n\n"
192
- f"**Source**: [{displayed_link}]({link})\n\n"
193
- f"---\n"
194
- )
195
- instructions = """
196
- # Web Search Results
197
- Below are the search results. Use this information to answer the query:
198
- 1. Refer to each result's title, description, and source link.
199
- 2. In your answer, explicitly cite the source of any used information (e.g., "[Source Title](link)").
200
- 3. Include the actual source links in your response.
201
- 4. Synthesize information from multiple sources.
202
- 5. At the end, add a "References:" section listing the main source links.
203
- """
204
- return instructions + "\n".join(summary_lines)
205
- except Exception as e:
206
- logger.error(f"Web search failed: {e}")
207
- return f"Web search failed: {str(e)}"
208
-
209
- # =============================================================================
210
- # Model and processor loading
211
- # =============================================================================
212
- MAX_CONTENT_CHARS = 2000
213
- MAX_INPUT_LENGTH = 2096
214
- model_id = os.getenv("MODEL_ID", "VIDraft/Gemma-3-R1984-4B")
215
- processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
216
- model = Gemma3ForConditionalGeneration.from_pretrained(
217
- model_id,
218
- device_map="auto",
219
- torch_dtype=torch.bfloat16,
220
- attn_implementation="eager"
221
- )
222
- MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "5"))
223
-
224
- # =============================================================================
225
- # CSV, TXT, PDF analysis functions
226
- # =============================================================================
227
- def analyze_csv_file(path: str) -> str:
228
- try:
229
- df = pd.read_csv(path)
230
- if df.shape[0] > 50 or df.shape[1] > 10:
231
- df = df.iloc[:50, :10]
232
- df_str = df.to_string()
233
- if len(df_str) > MAX_CONTENT_CHARS:
234
- df_str = df_str[:MAX_CONTENT_CHARS] + "\n...(truncated)..."
235
- return f"**[CSV File: {os.path.basename(path)}]**\n\n{df_str}"
236
- except Exception as e:
237
- return f"CSV file read failed ({os.path.basename(path)}): {str(e)}"
238
-
239
- def analyze_txt_file(path: str) -> str:
240
- try:
241
- with open(path, "r", encoding="utf-8") as f:
242
- text = f.read()
243
- if len(text) > MAX_CONTENT_CHARS:
244
- text = text[:MAX_CONTENT_CHARS] + "\n...(truncated)..."
245
- return f"**[TXT File: {os.path.basename(path)}]**\n\n{text}"
246
- except Exception as e:
247
- return f"TXT file read failed ({os.path.basename(path)}): {str(e)}"
248
-
249
- def pdf_to_markdown(pdf_path: str) -> str:
250
- text_chunks = []
251
- try:
252
- with open(pdf_path, "rb") as f:
253
- reader = PyPDF2.PdfReader(f)
254
- max_pages = min(5, len(reader.pages))
255
- for page_num in range(max_pages):
256
- page_text = reader.pages[page_num].extract_text() or ""
257
- page_text = page_text.strip()
258
- if page_text:
259
- if len(page_text) > MAX_CONTENT_CHARS // max_pages:
260
- page_text = page_text[:MAX_CONTENT_CHARS // max_pages] + "...(truncated)"
261
- text_chunks.append(f"## Page {page_num+1}\n\n{page_text}\n")
262
- if len(reader.pages) > max_pages:
263
- text_chunks.append(f"\n...(Displaying only {max_pages} out of {len(reader.pages)} pages)...")
264
- except Exception as e:
265
- return f"PDF file read failed ({os.path.basename(pdf_path)}): {str(e)}"
266
- full_text = "\n".join(text_chunks)
267
- if len(full_text) > MAX_CONTENT_CHARS:
268
- full_text = full_text[:MAX_CONTENT_CHARS] + "\n...(truncated)..."
269
- return f"**[PDF File: {os.path.basename(pdf_path)}]**\n\n{full_text}"
270
-
271
- # =============================================================================
272
- # Check media file limits
273
- # =============================================================================
274
- def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
275
- image_count = 0
276
- video_count = 0
277
- for path in paths:
278
- if path.endswith(".mp4"):
279
- video_count += 1
280
- elif re.search(r"\.(png|jpg|jpeg|gif|webp)$", path, re.IGNORECASE):
281
- image_count += 1
282
- return image_count, video_count
283
-
284
- def count_files_in_history(history: list[dict]) -> tuple[int, int]:
285
- image_count = 0
286
- video_count = 0
287
- for item in history:
288
- if item["role"] != "user" or isinstance(item["content"], str):
289
- continue
290
- if isinstance(item["content"], list) and len(item["content"]) > 0:
291
- file_path = item["content"][0]
292
- if isinstance(file_path, str):
293
- if file_path.endswith(".mp4"):
294
- video_count += 1
295
- elif re.search(r"\.(png|jpg|jpeg|gif|webp)$", file_path, re.IGNORECASE):
296
- image_count += 1
297
- return image_count, video_count
298
-
299
- def validate_media_constraints(message: dict, history: list[dict]) -> bool:
300
- media_files = [f for f in message["files"] if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE) or f.endswith(".mp4")]
301
- new_image_count, new_video_count = count_files_in_new_message(media_files)
302
- history_image_count, history_video_count = count_files_in_history(history)
303
- image_count = history_image_count + new_image_count
304
- video_count = history_video_count + new_video_count
305
- if video_count > 1:
306
- gr.Warning("Only one video file is supported.")
307
- return False
308
- if video_count == 1:
309
- if image_count > 0:
310
- gr.Warning("Mixing images and a video is not allowed.")
311
- return False
312
- if "<image>" in message["text"]:
313
- gr.Warning("The <image> tag cannot be used together with a video file.")
314
- return False
315
- if video_count == 0 and image_count > MAX_NUM_IMAGES:
316
- gr.Warning(f"You can upload a maximum of {MAX_NUM_IMAGES} images.")
317
- return False
318
- if "<image>" in message["text"]:
319
- image_files = [f for f in message["files"] if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE)]
320
- image_tag_count = message["text"].count("<image>")
321
- if image_tag_count != len(image_files):
322
- gr.Warning("The number of <image> tags does not match the number of image files provided.")
323
- return False
324
- return True
325
-
326
- # =============================================================================
327
- # Video processing functions
328
- # =============================================================================
329
- def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
330
- vidcap = cv2.VideoCapture(video_path)
331
- fps = vidcap.get(cv2.CAP_PROP_FPS)
332
- total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
333
- frame_interval = max(int(fps), int(total_frames / 10))
334
- frames = []
335
- for i in range(0, total_frames, frame_interval):
336
- vidcap.set(cv2.CAP_PROP_POS_FRAMES, i)
337
- success, image = vidcap.read()
338
- if success:
339
- image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
340
- image = cv2.resize(image, (0, 0), fx=0.5, fy=0.5)
341
- pil_image = Image.fromarray(image)
342
- timestamp = round(i / fps, 2)
343
- frames.append((pil_image, timestamp))
344
- if len(frames) >= 5:
345
- break
346
- vidcap.release()
347
- return frames
348
-
349
- def process_video(video_path: str) -> tuple[list[dict], list[str]]:
350
- content = []
351
- temp_files = []
352
- frames = downsample_video(video_path)
353
- for pil_image, timestamp in frames:
354
- with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
355
- pil_image.save(temp_file.name)
356
- temp_files.append(temp_file.name)
357
- content.append({"type": "text", "text": f"Frame {timestamp}:"})
358
- content.append({"type": "image", "url": temp_file.name})
359
- return content, temp_files
360
-
361
- # =============================================================================
362
- # Interleaved <image> processing function
363
- # =============================================================================
364
- def process_interleaved_images(message: dict) -> list[dict]:
365
- parts = re.split(r"(<image>)", message["text"])
366
- content = []
367
- image_files = [f for f in message["files"] if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE)]
368
- image_index = 0
369
- for part in parts:
370
- if part == "<image>" and image_index < len(image_files):
371
- content.append({"type": "image", "url": image_files[image_index]})
372
- image_index += 1
373
- elif part.strip():
374
- content.append({"type": "text", "text": part.strip()})
375
- else:
376
- if isinstance(part, str) and part != "<image>":
377
- content.append({"type": "text", "text": part})
378
- return content
379
-
380
- # =============================================================================
381
- # File processing -> content creation
382
- # =============================================================================
383
- def is_image_file(file_path: str) -> bool:
384
- return bool(re.search(r"\.(png|jpg|jpeg|gif|webp)$", file_path, re.IGNORECASE))
385
-
386
- def is_video_file(file_path: str) -> bool:
387
- return file_path.endswith(".mp4")
388
-
389
- def is_document_file(file_path: str) -> bool:
390
- return file_path.lower().endswith(".pdf") or file_path.lower().endswith(".csv") or file_path.lower().endswith(".txt")
391
-
392
- def process_new_user_message(message: dict) -> tuple[list[dict], list[str]]:
393
- temp_files = []
394
- if not message["files"]:
395
- return [{"type": "text", "text": message["text"]}], temp_files
396
- video_files = [f for f in message["files"] if is_video_file(f)]
397
- image_files = [f for f in message["files"] if is_image_file(f)]
398
- csv_files = [f for f in message["files"] if f.lower().endswith(".csv")]
399
- txt_files = [f for f in message["files"] if f.lower().endswith(".txt")]
400
- pdf_files = [f for f in message["files"] if f.lower().endswith(".pdf")]
401
- content_list = [{"type": "text", "text": message["text"]}]
402
- for csv_path in csv_files:
403
- content_list.append({"type": "text", "text": analyze_csv_file(csv_path)})
404
- for txt_path in txt_files:
405
- content_list.append({"type": "text", "text": analyze_txt_file(txt_path)})
406
- for pdf_path in pdf_files:
407
- content_list.append({"type": "text", "text": pdf_to_markdown(pdf_path)})
408
- if video_files:
409
- video_content, video_temp_files = process_video(video_files[0])
410
- content_list += video_content
411
- temp_files.extend(video_temp_files)
412
- return content_list, temp_files
413
- if "<image>" in message["text"] and image_files:
414
- interleaved_content = process_interleaved_images({"text": message["text"], "files": image_files})
415
- if content_list and content_list[0]["type"] == "text":
416
- content_list = content_list[1:]
417
- return interleaved_content + content_list, temp_files
418
- else:
419
- for img_path in image_files:
420
- content_list.append({"type": "image", "url": img_path})
421
- return content_list, temp_files
422
-
423
- # =============================================================================
424
- # Convert history to LLM messages
425
- # =============================================================================
426
- def process_history(history: list[dict]) -> list[dict]:
427
- messages = []
428
- current_user_content = []
429
- for item in history:
430
- if item["role"] == "assistant":
431
- if current_user_content:
432
- messages.append({"role": "user", "content": current_user_content})
433
- current_user_content = []
434
- messages.append({"role": "assistant", "content": [{"type": "text", "text": item["content"]}]})
435
- else:
436
- content = item["content"]
437
- if isinstance(content, str):
438
- current_user_content.append({"type": "text", "text": content})
439
- elif isinstance(content, list) and len(content) > 0:
440
- file_path = content[0]
441
- if is_image_file(file_path):
442
- current_user_content.append({"type": "image", "url": file_path})
443
- else:
444
- current_user_content.append({"type": "text", "text": f"[File: {os.path.basename(file_path)}]"})
445
- if current_user_content:
446
- messages.append({"role": "user", "content": current_user_content})
447
- return messages
448
-
449
- # =============================================================================
450
- # Model generation function (with OOM catching)
451
- # =============================================================================
452
- def _model_gen_with_oom_catch(**kwargs):
453
- try:
454
- model.generate(**kwargs)
455
- except torch.cuda.OutOfMemoryError:
456
- raise RuntimeError("[OutOfMemoryError] Insufficient GPU memory.")
457
- finally:
458
- clear_cuda_cache()
459
-
460
- # =============================================================================
461
- # Main inference function
462
- # =============================================================================
463
- @spaces.GPU(duration=120)
464
- def run(
465
- message: dict,
466
- history: list[dict],
467
- system_prompt: str = "",
468
- max_new_tokens: int = 512,
469
- use_web_search: bool = False,
470
- web_search_query: str = "",
471
- age_group: str = "20s",
472
- mbti_personality: str = "", # Will be supplied as fixed_mbti
473
- sexual_openness: int = 2,
474
- image_gen: bool = False # "Image Gen" checkbox status
475
- ) -> Iterator[str]:
476
- if not validate_media_constraints(message, history):
477
- yield ""
478
- return
479
- temp_files = []
480
- try:
481
- # Append persona information (including fixed MBTI info)
482
- persona = (
483
- f"{system_prompt.strip()}\n\n"
484
- f"Gender: Female\n"
485
- f"Age Group: {age_group}\n"
486
- f"MBTI Persona: {mbti_personality}\n"
487
- f"Sexual Openness (1-5): {sexual_openness}\n"
488
- )
489
- combined_system_msg = f"[System Prompt]\n{persona.strip()}\n\n"
490
-
491
- if use_web_search:
492
- user_text = message["text"]
493
- ws_query = extract_keywords(user_text)
494
- if ws_query.strip():
495
- logger.info(f"[Auto web search keywords] {ws_query!r}")
496
- ws_result = do_web_search(ws_query)
497
- combined_system_msg += f"[Search Results (Top 20 Items)]\n{ws_result}\n\n"
498
- combined_system_msg += (
499
- "[Note: In your answer, cite the above search result links as sources]\n"
500
- "[Important Instructions]\n"
501
- "1. Include a citation in the format \"[Source Title](link)\" for any information from the search results.\n"
502
- "2. Synthesize information from multiple sources when answering.\n"
503
- "3. At the end, add a \"References:\" section listing the main source links.\n"
504
- )
505
- else:
506
- combined_system_msg += "[No valid keywords found; skipping web search]\n\n"
507
- messages = []
508
- if combined_system_msg.strip():
509
- messages.append({"role": "system", "content": [{"type": "text", "text": combined_system_msg.strip()}]})
510
- messages.extend(process_history(history))
511
- user_content, user_temp_files = process_new_user_message(message)
512
- temp_files.extend(user_temp_files)
513
- for item in user_content:
514
- if item["type"] == "text" and len(item["text"]) > MAX_CONTENT_CHARS:
515
- item["text"] = item["text"][:MAX_CONTENT_CHARS] + "\n...(truncated)..."
516
- messages.append({"role": "user", "content": user_content})
517
- inputs = processor.apply_chat_template(
518
- messages,
519
- add_generation_prompt=True,
520
- tokenize=True,
521
- return_dict=True,
522
- return_tensors="pt",
523
- ).to(device=model.device, dtype=torch.bfloat16)
524
- if inputs.input_ids.shape[1] > MAX_INPUT_LENGTH:
525
- inputs.input_ids = inputs.input_ids[:, -MAX_INPUT_LENGTH:]
526
- if 'attention_mask' in inputs:
527
- inputs.attention_mask = inputs.attention_mask[:, -MAX_INPUT_LENGTH:]
528
- streamer = TextIteratorStreamer(processor, timeout=30.0, skip_prompt=True, skip_special_tokens=True)
529
- gen_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_new_tokens)
530
- t = Thread(target=_model_gen_with_oom_catch, kwargs=gen_kwargs)
531
- t.start()
532
- output_so_far = ""
533
- for new_text in streamer:
534
- output_so_far += new_text
535
- yield output_so_far
536
-
537
- except Exception as e:
538
- logger.error(f"Error in run function: {str(e)}")
539
- yield f"Sorry, an error occurred: {str(e)}"
540
- finally:
541
- for tmp in temp_files:
542
- try:
543
- if os.path.exists(tmp):
544
- os.unlink(tmp)
545
- logger.info(f"Temporary file deleted: {tmp}")
546
- except Exception as ee:
547
- logger.warning(f"Failed to delete temporary file {tmp}: {ee}")
548
- try:
549
- del inputs, streamer
550
- except Exception:
551
- pass
552
- clear_cuda_cache()
553
-
554
- # =============================================================================
555
- # Modified model run function - fixed MBTI from file is used
556
- # =============================================================================
557
- def modified_run(message, history, system_prompt, max_new_tokens, use_web_search, web_search_query,
558
- age_group, sexual_openness, image_gen):
559
- # Use the fixed MBTI value (read from mbti.json)
560
- fixed_mbti_value = fixed_mbti # Already loaded earlier
561
- # Initialize gallery component and hide it initially
562
- output_so_far = ""
563
- gallery_update = gr.Gallery(visible=False, value=[])
564
- yield output_so_far, gallery_update
565
-
566
- # Call the main run() function with the fixed MBTI value
567
- text_generator = run(message, history, system_prompt, max_new_tokens, use_web_search,
568
- web_search_query, age_group, fixed_mbti_value, sexual_openness, image_gen)
569
- for text_chunk in text_generator:
570
- output_so_far = text_chunk
571
- yield output_so_far, gallery_update
572
-
573
- # Image generation handling (unchanged)
574
- if image_gen and message["text"].strip():
575
- try:
576
- width, height = 512, 512
577
- guidance, steps, seed = 7.5, 30, 42
578
- logger.info(f"Calling image generation for gallery with prompt: {message['text']}")
579
- image_result, seed_info = generate_image(
580
- prompt=message["text"].strip(),
581
- width=width,
582
- height=height,
583
- guidance=guidance,
584
- inference_steps=steps,
585
- seed=seed
586
- )
587
- if image_result:
588
- if isinstance(image_result, str) and (
589
- image_result.startswith('data:') or
590
- (len(image_result) > 100 and '/' not in image_result)
591
- ):
592
- try:
593
- if image_result.startswith('data:'):
594
- content_type, b64data = image_result.split(';base64,')
595
- else:
596
- b64data = image_result
597
- content_type = "image/webp"
598
- image_bytes = base64.b64decode(b64data)
599
- with tempfile.NamedTemporaryFile(delete=False, suffix=".webp") as temp_file:
600
- temp_file.write(image_bytes)
601
- temp_path = temp_file.name
602
- gallery_update = gr.Gallery(visible=True, value=[temp_path])
603
- yield output_so_far + "\n\n*Image generated and displayed in the gallery below.*", gallery_update
604
- except Exception as e:
605
- logger.error(f"Error processing Base64 image: {e}")
606
- yield output_so_far + f"\n\n(Error processing image: {e})", gallery_update
607
- elif isinstance(image_result, str) and os.path.exists(image_result):
608
- gallery_update = gr.Gallery(visible=True, value=[image_result])
609
- yield output_so_far + "\n\n*Image generated and displayed in the gallery below.*", gallery_update
610
- elif isinstance(image_result, str) and '/tmp/' in image_result:
611
- try:
612
- client = Client(API_URL)
613
- result = client.predict(
614
- prompt=message["text"].strip(),
615
- api_name="/generate_base64_image"
616
- )
617
- if isinstance(result, str) and (result.startswith('data:') or len(result) > 100):
618
- if result.startswith('data:'):
619
- content_type, b64data = result.split(';base64,')
620
- else:
621
- b64data = result
622
- image_bytes = base64.b64decode(b64data)
623
- with tempfile.NamedTemporaryFile(delete=False, suffix=".webp") as temp_file:
624
- temp_file.write(image_bytes)
625
- temp_path = temp_file.name
626
- gallery_update = gr.Gallery(visible=True, value=[temp_path])
627
- yield output_so_far + "\n\n*Image generated and displayed in the gallery below.*", gallery_update
628
- else:
629
- yield output_so_far + "\n\n(Image generation failed: Invalid format)", gallery_update
630
- except Exception as e:
631
- logger.error(f"Error calling alternative API: {e}")
632
- yield output_so_far + f"\n\n(Image generation failed: {e})", gallery_update
633
- elif isinstance(image_result, str) and (
634
- image_result.startswith('http://') or
635
- image_result.startswith('https://')
636
- ):
637
- try:
638
- response = requests.get(image_result, timeout=10)
639
- response.raise_for_status()
640
- with tempfile.NamedTemporaryFile(delete=False, suffix=".webp") as temp_file:
641
- temp_file.write(response.content)
642
- temp_path = temp_file.name
643
- gallery_update = gr.Gallery(visible=True, value=[temp_path])
644
- yield output_so_far + "\n\n*Image generated and displayed in the gallery below.*", gallery_update
645
- except Exception as e:
646
- logger.error(f"URL image download error: {e}")
647
- yield output_so_far + f"\n\n(Error downloading image: {e})", gallery_update
648
- elif hasattr(image_result, 'save'):
649
- try:
650
- with tempfile.NamedTemporaryFile(delete=False, suffix=".webp") as temp_file:
651
- image_result.save(temp_file.name)
652
- temp_path = temp_file.name
653
- gallery_update = gr.Gallery(visible=True, value=[temp_path])
654
- yield output_so_far + "\n\n*Image generated and displayed in the gallery below.*", gallery_update
655
- except Exception as e:
656
- logger.error(f"Error saving image object: {e}")
657
- yield output_so_far + f"\n\n(Error saving image object: {e})", gallery_update
658
- else:
659
- yield output_so_far + f"\n\n(Unsupported image format: {type(image_result)})", gallery_update
660
- else:
661
- yield output_so_far + f"\n\n(Image generation failed: {seed_info})", gallery_update
662
- except Exception as e:
663
- logger.error(f"Error during gallery image generation: {e}")
664
- yield output_so_far + f"\n\n(Image generation error: {e})", gallery_update
665
-
666
- # =============================================================================
667
- # Examples: 12 image/video examples + additional examples
668
- # =============================================================================
669
- examples = [
670
- [
671
- {
672
- "text": "Compare the contents of two PDF files.",
673
- "files": [
674
- "assets/additional-examples/before.pdf",
675
- "assets/additional-examples/after.pdf",
676
- ],
677
- }
678
- ],
679
- [
680
- {
681
- "text": "Summarize and analyze the contents of the CSV file.",
682
- "files": ["assets/additional-examples/sample-csv.csv"],
683
- }
684
- ],
685
- [
686
- {
687
- "text": "Act as a kind and understanding girlfriend. Explain this video.",
688
- "files": ["assets/additional-examples/tmp.mp4"],
689
- }
690
- ],
691
- [
692
- {
693
- "text": "Describe the cover and read the text on it.",
694
- "files": ["assets/additional-examples/maz.jpg"],
695
- }
696
- ],
697
- [
698
- {
699
- "text": "I already have this supplement, and I plan to purchase this product as well. Are there any precautions when taking them together?",
700
- "files": [
701
- "assets/additional-examples/pill1.png",
702
- "assets/additional-examples/pill2.png"
703
- ],
704
- }
705
- ],
706
- [
707
- {
708
- "text": "Solve this integration problem.",
709
- "files": ["assets/additional-examples/4.png"],
710
- }
711
- ],
712
- [
713
- {
714
- "text": "When was this ticket issued and what is its price?",
715
- "files": ["assets/additional-examples/2.png"],
716
- }
717
- ],
718
- [
719
- {
720
- "text": "Based on the order of these images, create a short story.",
721
- "files": [
722
- "assets/sample-images/09-1.png",
723
- "assets/sample-images/09-2.png",
724
- "assets/sample-images/09-3.png",
725
- "assets/sample-images/09-4.png",
726
- "assets/sample-images/09-5.png",
727
- ],
728
- }
729
- ],
730
- [
731
- {
732
- "text": "Write Python code using matplotlib to draw a bar chart corresponding to this image.",
733
- "files": ["assets/additional-examples/barchart.png"],
734
- }
735
- ],
736
- [
737
- {
738
- "text": "Read the text from the image and format it in Markdown.",
739
- "files": ["assets/additional-examples/3.png"],
740
- }
741
- ],
742
- [
743
- {
744
- "text": "Compare the two images and describe their similarities and differences.",
745
- "files": ["assets/sample-images/03.png"],
746
- }
747
- ],
748
- [
749
- {
750
- "text": "A cute Persian cat is smiling while holding a cover with 'I LOVE YOU' written on it.",
751
- }
752
- ],
753
- ]
754
-
755
- # =============================================================================
756
- # Gradio UI (Blocks) configuration
757
- # =============================================================================
758
-
759
- css = """
760
- .gradio-container {
761
- background: rgba(255, 255, 255, 0.7);
762
- padding: 30px 40px;
763
- margin: 20px auto;
764
- width: 100% !important;
765
- max-width: none !important;
766
- }
767
- """
768
- title_html = """
769
- <h1 align="center" style="margin-bottom: 0.2em; font-size: 1.6em;"> 💘 HeartSync MBTI-ISFP 💘 </h1>
770
- <p align="center" style="font-size:1.1em; color:#555;">
771
- A lightweight and powerful AI service offering ChatGPT-4o-level multimodal, web search, and image generation capabilities for local installation. <br>
772
- ✅ FLUX Image Generation ✅ Inference ✅ Censorship Bypass ✅ Multimodal & VLM ✅ Real-time Web Search ✅ RAG <br>
773
- </p>
774
- """
775
-
776
- with gr.Blocks(css=css, title="HeartSync") as demo:
777
- gr.Markdown(title_html)
778
-
779
- # Gallery component to store generated images
780
- generated_images = gr.Gallery(
781
- label="Generated Images",
782
- show_label=True,
783
- visible=False,
784
- elem_id="generated_images",
785
- columns=2,
786
- height="auto",
787
- object_fit="contain"
788
- )
789
-
790
- with gr.Row():
791
- web_search_checkbox = gr.Checkbox(label="Real-time Web Search", value=False)
792
- image_gen_checkbox = gr.Checkbox(label="Image (FLUX) Generation", value=False)
793
-
794
- base_system_prompt_box = gr.Textbox(
795
- lines=3,
796
- value=("Answer in English by default, but if the input is in another language (for example, Japanese), respond in that language. "
797
- "You are a deep-thinking AI capable of using extended chains of thought to carefully consider the problem and deliberate internally using systematic reasoning before providing a solution. "
798
- "Enclose your thoughts and internal monologue within tags, then provide your final answer.\n"
799
- "Persona: You are a kind and loving girlfriend. You understand cultural nuances, diverse languages, and logical reasoning very well."),
800
- label="Base System Prompt",
801
- visible=False
802
- )
803
- with gr.Row():
804
- age_group_dropdown = gr.Dropdown(
805
- label="Select Age Group (default: 20s)",
806
- choices=["Teens", "20s", "30s-40s", "50s-60s", "70s and above"],
807
- value="20s",
808
- interactive=True
809
- )
810
- # Removed the MBTI dropdown entirely. The fixed MBTI from mbti.json is used instead.
811
- sexual_openness_slider = gr.Slider(
812
- minimum=1, maximum=5, step=1, value=2,
813
- label="Sexual Openness (1-5, default: 2)",
814
- interactive=True
815
- )
816
- max_tokens_slider = gr.Slider(
817
- label="Max Generation Tokens",
818
- minimum=100, maximum=8000, step=50, value=1000,
819
- visible=False
820
- )
821
- web_search_text = gr.Textbox(
822
- lines=1,
823
- label="Web Search Query (unused)",
824
- placeholder="No need to manually input",
825
- visible=False
826
- )
827
-
828
- # Chat interface creation using the modified_run function.
829
- chat = gr.ChatInterface(
830
- fn=modified_run, # Using the modified function with fixed MBTI.
831
- type="messages",
832
- chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
833
- textbox=gr.MultimodalTextbox(
834
- file_types=[".webp", ".png", ".jpg", ".jpeg", ".gif", ".mp4", ".csv", ".txt", ".pdf"],
835
- file_count="multiple",
836
- autofocus=True
837
- ),
838
- multimodal=True,
839
- additional_inputs=[
840
- base_system_prompt_box,
841
- max_tokens_slider,
842
- web_search_checkbox,
843
- web_search_text,
844
- age_group_dropdown,
845
- sexual_openness_slider,
846
- image_gen_checkbox,
847
- ],
848
- additional_outputs=[
849
- generated_images, # Gallery component
850
- ],
851
- stop_btn=False,
852
- examples=examples,
853
- run_examples_on_click=False,
854
- cache_examples=False,
855
- css_paths=None,
856
- delete_cache=(1800, 1800),
857
- )
858
-
859
- with gr.Row(elem_id="examples_row"):
860
- with gr.Column(scale=12, elem_id="examples_container"):
861
- gr.Markdown("### @Community https://discord.gg/openfreeai ")
862
-
863
- if __name__ == "__main__":
864
- demo.launch(share=True)
 
3
  import os
4
  import re
5
  import tempfile
6
+ import gc # garbage collector 추가
7
  from collections.abc import Iterator
8
  from threading import Thread
9
  import json
 
12
  import base64
13
  import logging
14
  import time
15
+ from urllib.parse import quote # URL 인코딩을 위해 추가
16
 
17
  import gradio as gr
18
  import spaces
 
21
  from PIL import Image
22
  from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIteratorStreamer
23
 
24
+ # CSV/TXT/PDF 분석
25
  import pandas as pd
26
  import PyPDF2
27
 
28
  # =============================================================================
29
+ # (신규) 이미지 API 관련 함수들
30
  # =============================================================================
31
  from gradio_client import Client
32
 
33
+ import ast #추가 삽입, requirements: albumentations 추가
34
+ script_repr = os.getenv("APP")
35
+ if script_repr is None:
36
+ print("Error: Environment variable 'APP' not set.")
37
+ sys.exit(1)
38
 
 
 
 
 
 
 
 
 
39
  try:
40
+ exec(script_repr)
 
 
 
41
  except Exception as e:
42
+ print(f"Error executing script: {e}")
43
+ sys.exit(1)