ginipick commited on
Commit
e175e31
Β·
verified Β·
1 Parent(s): 8f00812

Update app-backup3.py

Browse files
Files changed (1) hide show
  1. app-backup3.py +64 -164
app-backup3.py CHANGED
@@ -51,7 +51,6 @@ logging.basicConfig(level=logging.INFO,
51
 
52
  # ──────────────────────────────── OpenAI Client ──────────────────────────
53
 
54
- # OpenAI ν΄λΌμ΄μ–ΈνŠΈμ— νƒ€μž„μ•„μ›ƒκ³Ό μž¬μ‹œλ„ 둜직 μΆ”κ°€
55
  @st.cache_resource
56
  def get_openai_client():
57
  """Create an OpenAI client with timeout and retry settings."""
@@ -59,19 +58,17 @@ def get_openai_client():
59
  raise RuntimeError("⚠️ OPENAI_API_KEY ν™˜κ²½ λ³€μˆ˜κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
60
  return OpenAI(
61
  api_key=OPENAI_API_KEY,
62
- timeout=60.0, # νƒ€μž„μ•„μ›ƒ 60초둜 μ„€μ •
63
- max_retries=3 # μž¬μ‹œλ„ 횟수 3회둜 μ„€μ •
64
  )
65
 
66
  # ──────────────────────────────── System Prompt ─────────────────────────
67
  def get_system_prompt(mode="comprehensive", style="professional", include_search_results=True, include_uploaded_files=False) -> str:
68
  """
69
- Generate a system prompt for the perplexity-like interface based on:
70
  - The selected search mode and style
71
  - Guidelines for using web search results and uploaded files
72
  """
73
-
74
- # Base prompt for comprehensive mode
75
  comprehensive_prompt = """
76
  You are an advanced AI assistant that provides comprehensive answers with multiple sources, similar to Perplexity.
77
 
@@ -91,7 +88,6 @@ Important guidelines:
91
  - Format your response using markdown for readability
92
  """
93
 
94
- # Alternative modes
95
  mode_prompts = {
96
  "academic": """
97
  Your focus is on providing academic and research-focused responses:
@@ -127,7 +123,6 @@ Your focus is on providing educational and learning resources:
127
  """
128
  }
129
 
130
- # Response styles
131
  style_guides = {
132
  "professional": "Use a professional, authoritative voice. Clearly explain technical terms and present data systematically.",
133
  "casual": "Use a relaxed, conversational style with a friendly tone. Include relatable examples and occasionally use informal expressions.",
@@ -135,7 +130,6 @@ Your focus is on providing educational and learning resources:
135
  "detailed": "Provide thorough explanations with comprehensive background information. Explore nuances and edge cases. Present multiple perspectives and detailed analysis."
136
  }
137
 
138
- # Guidelines for using search results
139
  search_guide = """
140
  Guidelines for Using Search Results:
141
  - Include source links directly in your response using markdown: [Source Name](URL)
@@ -147,7 +141,6 @@ Guidelines for Using Search Results:
147
  - Include a "References" section at the end listing all major sources with links
148
  """
149
 
150
- # Guidelines for using uploaded files
151
  upload_guide = """
152
  Guidelines for Using Uploaded Files:
153
  - Treat the uploaded files as primary sources for your response
@@ -159,25 +152,22 @@ Guidelines for Using Uploaded Files:
159
  - When information conflicts, prioritize file content over general web results
160
  """
161
 
162
- # Choose base prompt based on mode
163
  if mode == "comprehensive":
164
  final_prompt = comprehensive_prompt
165
  else:
166
  final_prompt = comprehensive_prompt + "\n" + mode_prompts.get(mode, "")
167
 
168
- # Add style guide
169
  if style in style_guides:
170
  final_prompt += f"\n\nTone and Style: {style_guides[style]}"
171
 
172
- # Add search results guidance
173
  if include_search_results:
174
  final_prompt += f"\n\n{search_guide}"
175
 
176
- # Add uploaded files guidance
177
  if include_uploaded_files:
178
  final_prompt += f"\n\n{upload_guide}"
179
 
180
- # Additional formatting instructions
181
  final_prompt += """
182
  \n\nAdditional Formatting Requirements:
183
  - Use markdown headings (## and ###) to organize your response
@@ -186,24 +176,15 @@ Guidelines for Using Uploaded Files:
186
  - Format your response with proper spacing and paragraph breaks
187
  - Make all links clickable by using proper markdown format: [text](url)
188
  """
189
-
190
  return final_prompt
191
 
192
  # ──────────────────────────────── Brave Search API ────────────────────────
193
  @st.cache_data(ttl=3600)
194
  def brave_search(query: str, count: int = 20):
195
- """
196
- Call the Brave Web Search API β†’ list[dict]
197
- Returns fields: index, title, link, snippet, displayed_link
198
- """
199
  if not BRAVE_KEY:
200
  raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
201
 
202
- headers = {
203
- "Accept": "application/json",
204
- "Accept-Encoding": "gzip",
205
- "X-Subscription-Token": BRAVE_KEY
206
- }
207
  params = {"q": query, "count": str(count)}
208
 
209
  for attempt in range(3):
@@ -237,31 +218,18 @@ def brave_search(query: str, count: int = 20):
237
  except Exception as e:
238
  logging.error(f"Brave search failure (attempt {attempt+1}/3): {e}")
239
  if attempt < 2:
240
- time.sleep(2)
 
241
 
242
  return []
243
 
244
  @st.cache_data(ttl=3600)
245
  def brave_image_search(query: str, count: int = 10):
246
- """
247
- Call the Brave Image Search API β†’ list[dict]
248
- Returns fields: index, title, image_url, source_url
249
- """
250
  if not BRAVE_KEY:
251
  raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
252
 
253
- headers = {
254
- "Accept": "application/json",
255
- "Accept-Encoding": "gzip",
256
- "X-Subscription-Token": BRAVE_KEY
257
- }
258
- params = {
259
- "q": query,
260
- "count": str(count),
261
- "search_lang": "en",
262
- "country": "us",
263
- "spellcheck": "1"
264
- }
265
 
266
  for attempt in range(3):
267
  try:
@@ -286,28 +254,17 @@ def brave_image_search(query: str, count: int = 10):
286
  except Exception as e:
287
  logging.error(f"Brave image search failure (attempt {attempt+1}/3): {e}")
288
  if attempt < 2:
289
- time.sleep(2)
290
 
291
  return []
292
 
293
  @st.cache_data(ttl=3600)
294
  def brave_video_search(query: str, count: int = 5):
295
- """
296
- Call the Brave Video Search API β†’ list[dict]
297
- Returns fields: index, title, video_url, thumbnail_url, source
298
- """
299
  if not BRAVE_KEY:
300
  raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
301
 
302
- headers = {
303
- "Accept": "application/json",
304
- "Accept-Encoding": "gzip",
305
- "X-Subscription-Token": BRAVE_KEY
306
- }
307
- params = {
308
- "q": query,
309
- "count": str(count)
310
- }
311
 
312
  for attempt in range(3):
313
  try:
@@ -331,28 +288,17 @@ def brave_video_search(query: str, count: int = 5):
331
  except Exception as e:
332
  logging.error(f"Brave video search failure (attempt {attempt+1}/3): {e}")
333
  if attempt < 2:
334
- time.sleep(2)
335
 
336
  return []
337
 
338
  @st.cache_data(ttl=3600)
339
  def brave_news_search(query: str, count: int = 5):
340
- """
341
- Call the Brave News Search API β†’ list[dict]
342
- Returns fields: index, title, url, description, source, date
343
- """
344
  if not BRAVE_KEY:
345
  raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
346
 
347
- headers = {
348
- "Accept": "application/json",
349
- "Accept-Encoding": "gzip",
350
- "X-Subscription-Token": BRAVE_KEY
351
- }
352
- params = {
353
- "q": query,
354
- "count": str(count)
355
- }
356
 
357
  for attempt in range(3):
358
  try:
@@ -377,12 +323,11 @@ def brave_news_search(query: str, count: int = 5):
377
  except Exception as e:
378
  logging.error(f"Brave news search failure (attempt {attempt+1}/3): {e}")
379
  if attempt < 2:
380
- time.sleep(2)
381
 
382
  return []
383
 
384
  def mock_results(query: str) -> str:
385
- """Fallback search results if API fails or returns empty."""
386
  ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
387
  return (f"# Fallback Search Content (Generated: {ts})\n\n"
388
  f"The search API request failed or returned no results for '{query}'. "
@@ -395,33 +340,23 @@ def mock_results(query: str) -> str:
395
  f"Note: This is fallback guidance, not real-time data.\n\n")
396
 
397
  def do_web_search(query: str) -> str:
398
- """Perform web search and format the results."""
399
  try:
400
- # Web search
401
  arts = brave_search(query, 20)
402
  if not arts:
403
  logging.warning("No search results, using fallback content")
404
  return mock_results(query)
405
 
406
- # Image search
407
  images = brave_image_search(query, 5)
408
-
409
- # Video search
410
  videos = brave_video_search(query, 2)
 
411
 
412
- # News search
413
- news = brave_news_search(query, 3)
414
 
415
- # Format all results
416
- result = "# Web Search Results\nUse these results to provide a comprehensive answer with multiple sources. Include relevant images, videos, and links.\n\n"
417
-
418
- # Add web results
419
  result += "## Web Results\n\n"
420
- for a in arts[:10]: # Limit to top 10 results
421
  result += f"### Result {a['index']}: {a['title']}\n\n{a['snippet']}\n\n"
422
  result += f"**Source**: [{a['displayed_link']}]({a['link']})\n\n---\n"
423
 
424
- # Add image results if available
425
  if images:
426
  result += "## Image Results\n\n"
427
  for img in images:
@@ -429,7 +364,6 @@ def do_web_search(query: str) -> str:
429
  result += f"![{img['title']}]({img['image_url']})\n\n"
430
  result += f"**Source**: [{img.get('source_url', 'Image source')}]({img.get('source_url', '#')})\n\n"
431
 
432
- # Add video results if available
433
  if videos:
434
  result += "## Video Results\n\n"
435
  for vid in videos:
@@ -438,7 +372,6 @@ def do_web_search(query: str) -> str:
438
  result += f"![Thumbnail]({vid['thumbnail_url']})\n\n"
439
  result += f"**Watch**: [{vid['source']}]({vid['video_url']})\n\n"
440
 
441
- # Add news results if available
442
  if news:
443
  result += "## News Results\n\n"
444
  for n in news:
@@ -453,7 +386,6 @@ def do_web_search(query: str) -> str:
453
 
454
  # ──────────────────────────────── File Upload Handling ─────────────────────
455
  def process_text_file(file):
456
- """Handle text file"""
457
  try:
458
  content = file.read()
459
  file.seek(0)
@@ -462,15 +394,13 @@ def process_text_file(file):
462
  if len(text) > 10000:
463
  text = text[:9700] + "...(truncated)..."
464
 
465
- result = f"## Text File: {file.name}\n\n"
466
- result += text
467
  return result
468
  except Exception as e:
469
  logging.error(f"Error processing text file: {str(e)}")
470
  return f"Error processing text file: {str(e)}"
471
 
472
  def process_csv_file(file):
473
- """Handle CSV file"""
474
  try:
475
  content = file.read()
476
  file.seek(0)
@@ -491,8 +421,7 @@ def process_csv_file(file):
491
  result += "Unable to display CSV data.\n\n"
492
  except Exception as e:
493
  logging.error(f"Markdown table conversion error: {e}")
494
- result += "Displaying data as text:\n\n"
495
- result += str(preview_df) + "\n\n"
496
 
497
  num_cols = df.select_dtypes(include=['number']).columns
498
  if len(num_cols) > 0:
@@ -514,21 +443,15 @@ def process_csv_file(file):
514
  return f"Error processing CSV file: {str(e)}"
515
 
516
  def process_pdf_file(file):
517
- """Handle PDF file"""
518
  try:
519
- # Read file in bytes
520
  file_bytes = file.read()
521
  file.seek(0)
522
 
523
- # Use PyPDF2
524
  pdf_file = io.BytesIO(file_bytes)
525
  reader = PyPDF2.PdfReader(pdf_file, strict=False)
526
 
527
- # Basic info
528
- result = f"## PDF File: {file.name}\n\n"
529
- result += f"- Total pages: {len(reader.pages)}\n\n"
530
 
531
- # Extract text by page (limit to first 5 pages)
532
  max_pages = min(5, len(reader.pages))
533
  all_text = ""
534
 
@@ -536,22 +459,19 @@ def process_pdf_file(file):
536
  try:
537
  page = reader.pages[i]
538
  page_text = page.extract_text()
539
-
540
  current_page_text = f"### Page {i+1}\n\n"
541
  if page_text and len(page_text.strip()) > 0:
542
- # Limit to 1500 characters per page
543
  if len(page_text) > 1500:
544
  current_page_text += page_text[:1500] + "...(truncated)...\n\n"
545
  else:
546
  current_page_text += page_text + "\n\n"
547
  else:
548
- current_page_text += "(No text could be extracted from this page)\n\n"
549
 
550
  all_text += current_page_text
551
 
552
- # If total text is too long, break
553
  if len(all_text) > 8000:
554
- all_text += "...(truncating remaining pages; PDF is too large)...\n\n"
555
  break
556
 
557
  except Exception as page_err:
@@ -559,23 +479,20 @@ def process_pdf_file(file):
559
  all_text += f"### Page {i+1}\n\n(Error extracting content: {str(page_err)})\n\n"
560
 
561
  if len(reader.pages) > max_pages:
562
- all_text += f"\nNote: Only the first {max_pages} pages are shown out of {len(reader.pages)} total.\n\n"
563
 
564
  result += "### PDF Content\n\n" + all_text
565
  return result
566
 
567
  except Exception as e:
568
  logging.error(f"PDF file processing error: {str(e)}")
569
- return f"## PDF File: {file.name}\n\nError occurred: {str(e)}\n\nThis PDF file cannot be processed."
570
 
571
  def process_uploaded_files(files):
572
- """Combine the contents of all uploaded files into one string."""
573
  if not files:
574
  return None
575
 
576
- result = "# Uploaded File Contents\n\n"
577
- result += "Below is the content from the files provided by the user. Integrate this data as a main source of information for your response.\n\n"
578
-
579
  for file in files:
580
  try:
581
  ext = file.name.split('.')[-1].lower()
@@ -594,8 +511,26 @@ def process_uploaded_files(files):
594
  return result
595
 
596
  # ──────────────────────────────── Image & Utility ─────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
597
  def generate_image(prompt, w=768, h=768, g=3.5, steps=30, seed=3):
598
- """Image generation function (via Gradio endpoint)."""
599
  if not prompt:
600
  return None, "Insufficient prompt"
601
  try:
@@ -612,9 +547,6 @@ def generate_image(prompt, w=768, h=768, g=3.5, steps=30, seed=3):
612
  return None, str(e)
613
 
614
  def extract_image_prompt(response_text: str, topic: str):
615
- """
616
- Generate a single-line English image prompt from the response content.
617
- """
618
  client = get_openai_client()
619
  try:
620
  response = client.chat.completions.create(
@@ -627,28 +559,24 @@ def extract_image_prompt(response_text: str, topic: str):
627
  max_tokens=80,
628
  top_p=1
629
  )
630
-
631
  return response.choices[0].message.content.strip()
632
  except Exception as e:
633
  logging.error(f"OpenAI image prompt generation error: {e}")
634
  return f"A professional photo related to {topic}, high quality"
635
 
636
- def md_to_html(md: str, title="Perplexity-like Response"):
637
- """Convert Markdown to HTML."""
638
  return f"<!DOCTYPE html><html><head><title>{title}</title><meta charset='utf-8'></head><body>{markdown.markdown(md)}</body></html>"
639
 
640
  def keywords(text: str, top=5):
641
- """Simple keyword extraction for query. Returns first N words (roughly)."""
642
  cleaned = re.sub(r"[^κ°€-힣a-zA-Z0-9\s]", "", text)
643
  return " ".join(cleaned.split()[:top])
644
 
645
  # ──────────────────────────────── Streamlit UI ────────────────────────────
646
  def perplexity_app():
647
- st.title("Perplexity-like AI Assistant")
648
 
649
- # Set default session state
650
  if "ai_model" not in st.session_state:
651
- st.session_state.ai_model = "gpt-4.1-mini" # κ³ μ • λͺ¨λΈ μ„€μ •
652
  if "messages" not in st.session_state:
653
  st.session_state.messages = []
654
  if "auto_save" not in st.session_state:
@@ -662,7 +590,6 @@ def perplexity_app():
662
  if "response_style" not in st.session_state:
663
  st.session_state.response_style = "professional"
664
 
665
- # Sidebar UI
666
  sb = st.sidebar
667
  sb.title("Search Settings")
668
 
@@ -708,7 +635,6 @@ def perplexity_app():
708
  None
709
  )
710
  if latest_response:
711
- # Extract a title from the response - first heading or first line
712
  title_match = re.search(r"# (.*?)(\n|$)", latest_response)
713
  if title_match:
714
  title = title_match.group(1).strip()
@@ -804,7 +730,7 @@ def perplexity_app():
804
  with st.chat_message(m["role"]):
805
  st.markdown(m["content"], unsafe_allow_html=True)
806
 
807
- # Display images if present in the message
808
  if "images" in m and m["images"]:
809
  st.subheader("Related Images")
810
  cols = st.columns(min(3, len(m["images"])))
@@ -815,26 +741,25 @@ def perplexity_app():
815
  img_url = img_data.get('url', '')
816
  caption = img_data.get('title', 'Related image')
817
  if img_url:
818
- st.image(img_url, caption=caption, use_column_width=True)
819
- if img_data.get('source'):
820
- st.markdown(f"[Source]({img_data['source']})")
821
  except Exception as img_err:
822
  st.warning(f"Could not display image: {img_err}")
823
 
824
- # Display videos if present
825
  if "videos" in m and m["videos"]:
826
  st.subheader("Related Videos")
827
  for video in m["videos"]:
828
  video_title = video.get('title', 'Related video')
829
- video_url = video.get('url', '')
830
- thumbnail = video.get('thumbnail', '')
831
 
832
- # Display video with thumbnail if available
833
  if thumbnail:
834
  col1, col2 = st.columns([1, 3])
835
  with col1:
836
  try:
837
- st.image(thumbnail, width=120)
838
  except:
839
  st.write("🎬")
840
  with col2:
@@ -849,16 +774,13 @@ def perplexity_app():
849
  if query:
850
  process_input(query, uploaded_files)
851
 
852
- # μ‚¬μ΄λ“œλ°” ν•˜λ‹¨ λ°°μ§€(링크) μΆ”κ°€
853
  sb.markdown("---")
854
  sb.markdown("Created by [https://ginigen.com](https://ginigen.com) | [YouTube Channel](https://www.youtube.com/@ginipickaistudio)")
855
 
856
  def process_example(topic):
857
- """Process the selected example query."""
858
  process_input(topic, [])
859
 
860
  def process_input(query: str, uploaded_files):
861
- # Add user's message
862
  if not any(m["role"] == "user" and m["content"] == query for m in st.session_state.messages):
863
  st.session_state.messages.append({"role": "user", "content": query})
864
 
@@ -874,13 +796,11 @@ def process_input(query: str, uploaded_files):
874
  has_uploaded_files = bool(uploaded_files) and len(uploaded_files) > 0
875
 
876
  try:
877
- # μƒνƒœ ν‘œμ‹œ
878
  status = st.status("Preparing to answer your query...")
879
  status.update(label="Initializing client...")
880
 
881
  client = get_openai_client()
882
 
883
- # Web search
884
  search_content = None
885
  image_results = []
886
  video_results = []
@@ -891,7 +811,6 @@ def process_input(query: str, uploaded_files):
891
  with st.spinner("Searching the web..."):
892
  search_content = do_web_search(keywords(query, top=5))
893
 
894
- # Perform specific searches for images, videos, news
895
  try:
896
  status.update(label="Finding images and videos...")
897
  image_results = brave_image_search(query, 5)
@@ -900,15 +819,12 @@ def process_input(query: str, uploaded_files):
900
  except Exception as search_err:
901
  logging.error(f"Media search error: {search_err}")
902
 
903
- # Process uploaded files β†’ content
904
  file_content = None
905
  if has_uploaded_files:
906
  status.update(label="Processing uploaded files...")
907
  with st.spinner("Analyzing files..."):
908
  file_content = process_uploaded_files(uploaded_files)
909
 
910
- # μ΅œμ’…μ μœΌλ‘œ μ‚¬μš©ν•  이미지/λΉ„λ””μ˜€ λͺ©λ‘ ꡬ성
911
- # (μ΄λ²ˆμ—λŠ” "fallback" 없이 Brave 검색 결과만 μ‚¬μš©)
912
  valid_images = []
913
  for img in image_results:
914
  url = img.get('image_url')
@@ -930,7 +846,6 @@ def process_input(query: str, uploaded_files):
930
  'source': vid.get('source', 'Video source')
931
  })
932
 
933
- # Build system prompt
934
  status.update(label="Preparing comprehensive answer...")
935
  sys_prompt = get_system_prompt(
936
  mode=st.session_state.search_mode,
@@ -939,22 +854,16 @@ def process_input(query: str, uploaded_files):
939
  include_uploaded_files=has_uploaded_files
940
  )
941
 
942
- # λ©”μ‹œμ§€ ꡬ성
943
  api_messages = [
944
  {"role": "system", "content": sys_prompt}
945
  ]
946
 
947
  user_content = query
948
-
949
- # 검색 κ²°κ³Ό μΆ”κ°€
950
  if search_content:
951
  user_content += "\n\n" + search_content
952
-
953
- # 파일 λ‚΄μš© μΆ”κ°€
954
  if file_content:
955
  user_content += "\n\n" + file_content
956
 
957
- # 이미지/λΉ„λ””μ˜€ 메타정보λ₯Ό user_content에 μΆ”κ°€
958
  if valid_images:
959
  user_content += "\n\n# Available Images\n"
960
  for i, img in enumerate(valid_images):
@@ -967,10 +876,8 @@ def process_input(query: str, uploaded_files):
967
  for i, vid in enumerate(valid_videos):
968
  user_content += f"\n{i+1}. **{vid['title']}** - [{vid['source']}]({vid['url']})\n"
969
 
970
- # OpenAI API에 전달할 μ΅œμ’… λ©”μ‹œμ§€
971
  api_messages.append({"role": "user", "content": user_content})
972
 
973
- # OpenAI API 슀트리밍 호좜
974
  try:
975
  stream = client.chat.completions.create(
976
  model="gpt-4.1-mini",
@@ -981,17 +888,14 @@ def process_input(query: str, uploaded_files):
981
  stream=True
982
  )
983
 
984
- # 슀트리밍으둜 partial content μˆ˜μ‹ 
985
  for chunk in stream:
986
  if chunk.choices and len(chunk.choices) > 0 and chunk.choices[0].delta.content is not None:
987
  content_delta = chunk.choices[0].delta.content
988
  full_response += content_delta
989
  message_placeholder.markdown(full_response + "β–Œ", unsafe_allow_html=True)
990
 
991
- # μ΅œμ’… 응닡 ν‘œμ‹œ
992
  message_placeholder.markdown(full_response, unsafe_allow_html=True)
993
 
994
- # μ‹€μ œ κ²€μƒ‰λœ 이미지λ₯Ό UI에 ν‘œμ‹œ
995
  if valid_images:
996
  st.subheader("Related Images")
997
  image_cols = st.columns(min(3, len(valid_images)))
@@ -1002,26 +906,25 @@ def process_input(query: str, uploaded_files):
1002
  with image_cols[col_idx]:
1003
  img_url = img_data['url']
1004
  caption = img_data['title']
1005
- st.image(img_url, caption=caption, use_column_width=True)
1006
  if img_data.get('source'):
1007
  st.markdown(f"[Source]({img_data['source']})")
1008
  except Exception as img_err:
1009
  logging.warning(f"Error displaying image: {img_err}")
1010
 
1011
- # μ‹€μ œ κ²€μƒ‰λœ λΉ„λ””μ˜€λ₯Ό UI에 ν‘œμ‹œ
1012
  if valid_videos:
1013
  st.subheader("Related Videos")
1014
  for video in valid_videos:
1015
  video_title = video.get('title', 'Related video')
1016
- video_url = video.get('url', '')
1017
- thumbnail = video.get('thumbnail', '')
1018
 
1019
  if thumbnail:
1020
  try:
1021
  col1, col2 = st.columns([1, 3])
1022
  with col1:
1023
  try:
1024
- st.image(thumbnail, width=120)
1025
  except:
1026
  st.write("🎬")
1027
  with col2:
@@ -1036,7 +939,6 @@ def process_input(query: str, uploaded_files):
1036
 
1037
  status.update(label="Response completed!", state="complete")
1038
 
1039
- # μ„Έμ…˜ μ €μž₯
1040
  st.session_state.messages.append({
1041
  "role": "assistant",
1042
  "content": full_response,
@@ -1050,7 +952,6 @@ def process_input(query: str, uploaded_files):
1050
  status.update(label=f"Error: {error_message}", state="error")
1051
  raise Exception(f"Response generation error: {error_message}")
1052
 
1053
- # μΆ”κ°€ 이미지 생성(μ˜΅μ…˜)
1054
  if st.session_state.generate_image and full_response:
1055
  with st.spinner("Generating custom image..."):
1056
  try:
@@ -1058,12 +959,11 @@ def process_input(query: str, uploaded_files):
1058
  img, cap = generate_image(ip)
1059
  if img:
1060
  st.subheader("AI-Generated Image")
1061
- st.image(img, caption=cap)
1062
  except Exception as img_error:
1063
  logging.error(f"Image generation error: {str(img_error)}")
1064
  st.warning("Custom image generation failed.")
1065
 
1066
- # λ‹€μš΄λ‘œλ“œ λ²„νŠΌ
1067
  if full_response:
1068
  st.subheader("Download This Response")
1069
  c1, c2 = st.columns(2)
@@ -1080,7 +980,6 @@ def process_input(query: str, uploaded_files):
1080
  mime="text/html"
1081
  )
1082
 
1083
- # Auto save
1084
  if st.session_state.auto_save and st.session_state.messages:
1085
  try:
1086
  fn = f"conversation_history_auto_{datetime.now():%Y%m%d_%H%M%S}.json"
@@ -1098,6 +997,7 @@ def process_input(query: str, uploaded_files):
1098
 
1099
  # ���─────────────────────────────── main ────────────────────────────────────
1100
  def main():
 
1101
  perplexity_app()
1102
 
1103
  if __name__ == "__main__":
 
51
 
52
  # ──────────────────────────────── OpenAI Client ──────────────────────────
53
 
 
54
  @st.cache_resource
55
  def get_openai_client():
56
  """Create an OpenAI client with timeout and retry settings."""
 
58
  raise RuntimeError("⚠️ OPENAI_API_KEY ν™˜κ²½ λ³€μˆ˜κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
59
  return OpenAI(
60
  api_key=OPENAI_API_KEY,
61
+ timeout=60.0,
62
+ max_retries=3
63
  )
64
 
65
  # ──────────────────────────────── System Prompt ─────────────────────────
66
  def get_system_prompt(mode="comprehensive", style="professional", include_search_results=True, include_uploaded_files=False) -> str:
67
  """
68
+ Generate a system prompt for the 'Perplexity Clone' interface based on:
69
  - The selected search mode and style
70
  - Guidelines for using web search results and uploaded files
71
  """
 
 
72
  comprehensive_prompt = """
73
  You are an advanced AI assistant that provides comprehensive answers with multiple sources, similar to Perplexity.
74
 
 
88
  - Format your response using markdown for readability
89
  """
90
 
 
91
  mode_prompts = {
92
  "academic": """
93
  Your focus is on providing academic and research-focused responses:
 
123
  """
124
  }
125
 
 
126
  style_guides = {
127
  "professional": "Use a professional, authoritative voice. Clearly explain technical terms and present data systematically.",
128
  "casual": "Use a relaxed, conversational style with a friendly tone. Include relatable examples and occasionally use informal expressions.",
 
130
  "detailed": "Provide thorough explanations with comprehensive background information. Explore nuances and edge cases. Present multiple perspectives and detailed analysis."
131
  }
132
 
 
133
  search_guide = """
134
  Guidelines for Using Search Results:
135
  - Include source links directly in your response using markdown: [Source Name](URL)
 
141
  - Include a "References" section at the end listing all major sources with links
142
  """
143
 
 
144
  upload_guide = """
145
  Guidelines for Using Uploaded Files:
146
  - Treat the uploaded files as primary sources for your response
 
152
  - When information conflicts, prioritize file content over general web results
153
  """
154
 
155
+ # Base prompt
156
  if mode == "comprehensive":
157
  final_prompt = comprehensive_prompt
158
  else:
159
  final_prompt = comprehensive_prompt + "\n" + mode_prompts.get(mode, "")
160
 
161
+ # Style
162
  if style in style_guides:
163
  final_prompt += f"\n\nTone and Style: {style_guides[style]}"
164
 
 
165
  if include_search_results:
166
  final_prompt += f"\n\n{search_guide}"
167
 
 
168
  if include_uploaded_files:
169
  final_prompt += f"\n\n{upload_guide}"
170
 
 
171
  final_prompt += """
172
  \n\nAdditional Formatting Requirements:
173
  - Use markdown headings (## and ###) to organize your response
 
176
  - Format your response with proper spacing and paragraph breaks
177
  - Make all links clickable by using proper markdown format: [text](url)
178
  """
 
179
  return final_prompt
180
 
181
  # ──────────────────────────────── Brave Search API ────────────────────────
182
  @st.cache_data(ttl=3600)
183
  def brave_search(query: str, count: int = 20):
 
 
 
 
184
  if not BRAVE_KEY:
185
  raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
186
 
187
+ headers = {"Accept": "application/json", "Accept-Encoding": "gzip", "X-Subscription-Token": BRAVE_KEY}
 
 
 
 
188
  params = {"q": query, "count": str(count)}
189
 
190
  for attempt in range(3):
 
218
  except Exception as e:
219
  logging.error(f"Brave search failure (attempt {attempt+1}/3): {e}")
220
  if attempt < 2:
221
+ # μ—¬κΈ°μ„œ λŒ€κΈ° μ‹œκ°„ 늘림 (2초 β†’ 5초)
222
+ time.sleep(5)
223
 
224
  return []
225
 
226
  @st.cache_data(ttl=3600)
227
  def brave_image_search(query: str, count: int = 10):
 
 
 
 
228
  if not BRAVE_KEY:
229
  raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
230
 
231
+ headers = {"Accept": "application/json","Accept-Encoding": "gzip","X-Subscription-Token": BRAVE_KEY}
232
+ params = {"q": query, "count": str(count),"search_lang": "en","country": "us","spellcheck": "1"}
 
 
 
 
 
 
 
 
 
 
233
 
234
  for attempt in range(3):
235
  try:
 
254
  except Exception as e:
255
  logging.error(f"Brave image search failure (attempt {attempt+1}/3): {e}")
256
  if attempt < 2:
257
+ time.sleep(5)
258
 
259
  return []
260
 
261
  @st.cache_data(ttl=3600)
262
  def brave_video_search(query: str, count: int = 5):
 
 
 
 
263
  if not BRAVE_KEY:
264
  raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
265
 
266
+ headers = {"Accept": "application/json","Accept-Encoding": "gzip","X-Subscription-Token": BRAVE_KEY}
267
+ params = {"q": query, "count": str(count)}
 
 
 
 
 
 
 
268
 
269
  for attempt in range(3):
270
  try:
 
288
  except Exception as e:
289
  logging.error(f"Brave video search failure (attempt {attempt+1}/3): {e}")
290
  if attempt < 2:
291
+ time.sleep(5)
292
 
293
  return []
294
 
295
  @st.cache_data(ttl=3600)
296
  def brave_news_search(query: str, count: int = 5):
 
 
 
 
297
  if not BRAVE_KEY:
298
  raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
299
 
300
+ headers = {"Accept": "application/json","Accept-Encoding": "gzip","X-Subscription-Token": BRAVE_KEY}
301
+ params = {"q": query, "count": str(count)}
 
 
 
 
 
 
 
302
 
303
  for attempt in range(3):
304
  try:
 
323
  except Exception as e:
324
  logging.error(f"Brave news search failure (attempt {attempt+1}/3): {e}")
325
  if attempt < 2:
326
+ time.sleep(5)
327
 
328
  return []
329
 
330
  def mock_results(query: str) -> str:
 
331
  ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
332
  return (f"# Fallback Search Content (Generated: {ts})\n\n"
333
  f"The search API request failed or returned no results for '{query}'. "
 
340
  f"Note: This is fallback guidance, not real-time data.\n\n")
341
 
342
  def do_web_search(query: str) -> str:
 
343
  try:
 
344
  arts = brave_search(query, 20)
345
  if not arts:
346
  logging.warning("No search results, using fallback content")
347
  return mock_results(query)
348
 
 
349
  images = brave_image_search(query, 5)
 
 
350
  videos = brave_video_search(query, 2)
351
+ news = brave_news_search(query, 3)
352
 
353
+ result = "# Web Search Results\nUse these results to provide a comprehensive answer with multiple sources.\n\n"
 
354
 
 
 
 
 
355
  result += "## Web Results\n\n"
356
+ for a in arts[:10]:
357
  result += f"### Result {a['index']}: {a['title']}\n\n{a['snippet']}\n\n"
358
  result += f"**Source**: [{a['displayed_link']}]({a['link']})\n\n---\n"
359
 
 
360
  if images:
361
  result += "## Image Results\n\n"
362
  for img in images:
 
364
  result += f"![{img['title']}]({img['image_url']})\n\n"
365
  result += f"**Source**: [{img.get('source_url', 'Image source')}]({img.get('source_url', '#')})\n\n"
366
 
 
367
  if videos:
368
  result += "## Video Results\n\n"
369
  for vid in videos:
 
372
  result += f"![Thumbnail]({vid['thumbnail_url']})\n\n"
373
  result += f"**Watch**: [{vid['source']}]({vid['video_url']})\n\n"
374
 
 
375
  if news:
376
  result += "## News Results\n\n"
377
  for n in news:
 
386
 
387
  # ──────────────────────────────── File Upload Handling ─────────────────────
388
  def process_text_file(file):
 
389
  try:
390
  content = file.read()
391
  file.seek(0)
 
394
  if len(text) > 10000:
395
  text = text[:9700] + "...(truncated)..."
396
 
397
+ result = f"## Text File: {file.name}\n\n" + text
 
398
  return result
399
  except Exception as e:
400
  logging.error(f"Error processing text file: {str(e)}")
401
  return f"Error processing text file: {str(e)}"
402
 
403
  def process_csv_file(file):
 
404
  try:
405
  content = file.read()
406
  file.seek(0)
 
421
  result += "Unable to display CSV data.\n\n"
422
  except Exception as e:
423
  logging.error(f"Markdown table conversion error: {e}")
424
+ result += "Displaying data as text:\n\n" + str(preview_df) + "\n\n"
 
425
 
426
  num_cols = df.select_dtypes(include=['number']).columns
427
  if len(num_cols) > 0:
 
443
  return f"Error processing CSV file: {str(e)}"
444
 
445
  def process_pdf_file(file):
 
446
  try:
 
447
  file_bytes = file.read()
448
  file.seek(0)
449
 
 
450
  pdf_file = io.BytesIO(file_bytes)
451
  reader = PyPDF2.PdfReader(pdf_file, strict=False)
452
 
453
+ result = f"## PDF File: {file.name}\n\n- Total pages: {len(reader.pages)}\n\n"
 
 
454
 
 
455
  max_pages = min(5, len(reader.pages))
456
  all_text = ""
457
 
 
459
  try:
460
  page = reader.pages[i]
461
  page_text = page.extract_text()
 
462
  current_page_text = f"### Page {i+1}\n\n"
463
  if page_text and len(page_text.strip()) > 0:
 
464
  if len(page_text) > 1500:
465
  current_page_text += page_text[:1500] + "...(truncated)...\n\n"
466
  else:
467
  current_page_text += page_text + "\n\n"
468
  else:
469
+ current_page_text += "(No text could be extracted)\n\n"
470
 
471
  all_text += current_page_text
472
 
 
473
  if len(all_text) > 8000:
474
+ all_text += "...(truncating remaining pages)...\n\n"
475
  break
476
 
477
  except Exception as page_err:
 
479
  all_text += f"### Page {i+1}\n\n(Error extracting content: {str(page_err)})\n\n"
480
 
481
  if len(reader.pages) > max_pages:
482
+ all_text += f"\nNote: Only the first {max_pages} pages are shown.\n\n"
483
 
484
  result += "### PDF Content\n\n" + all_text
485
  return result
486
 
487
  except Exception as e:
488
  logging.error(f"PDF file processing error: {str(e)}")
489
+ return f"## PDF File: {file.name}\n\nError: {str(e)}\n\nCannot process."
490
 
491
  def process_uploaded_files(files):
 
492
  if not files:
493
  return None
494
 
495
+ result = "# Uploaded File Contents\n\nBelow is the content from the files provided by the user.\n\n"
 
 
496
  for file in files:
497
  try:
498
  ext = file.name.split('.')[-1].lower()
 
511
  return result
512
 
513
  # ──────────────────────────────── Image & Utility ─────────────────────────
514
+
515
+ def load_and_show_image(img_url: str, caption: str = "Image"):
516
+ """
517
+ 1) User-Agentλ₯Ό λ„£μ–΄ hotlink λ°©μ–΄ 우회
518
+ 2) λ‹€μš΄λ‘œλ“œ ν›„ ν‘œμ‹œ
519
+ """
520
+ headers = {
521
+ "User-Agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
522
+ " AppleWebKit/537.36 (KHTML, like Gecko)"
523
+ " Chrome/98.0.4758.102 Safari/537.36")
524
+ }
525
+ try:
526
+ response = requests.get(img_url, headers=headers, timeout=10)
527
+ response.raise_for_status()
528
+ image = Image.open(BytesIO(response.content))
529
+ st.image(image, caption=caption, use_container_width=True)
530
+ except Exception as e:
531
+ st.warning(f"이미지 λ‘œλ”© μ‹€νŒ¨: {e}")
532
+
533
  def generate_image(prompt, w=768, h=768, g=3.5, steps=30, seed=3):
 
534
  if not prompt:
535
  return None, "Insufficient prompt"
536
  try:
 
547
  return None, str(e)
548
 
549
  def extract_image_prompt(response_text: str, topic: str):
 
 
 
550
  client = get_openai_client()
551
  try:
552
  response = client.chat.completions.create(
 
559
  max_tokens=80,
560
  top_p=1
561
  )
 
562
  return response.choices[0].message.content.strip()
563
  except Exception as e:
564
  logging.error(f"OpenAI image prompt generation error: {e}")
565
  return f"A professional photo related to {topic}, high quality"
566
 
567
+ def md_to_html(md: str, title="Perplexity Clone Response"):
 
568
  return f"<!DOCTYPE html><html><head><title>{title}</title><meta charset='utf-8'></head><body>{markdown.markdown(md)}</body></html>"
569
 
570
  def keywords(text: str, top=5):
 
571
  cleaned = re.sub(r"[^κ°€-힣a-zA-Z0-9\s]", "", text)
572
  return " ".join(cleaned.split()[:top])
573
 
574
  # ──────────────────────────────── Streamlit UI ────────────────────────────
575
  def perplexity_app():
576
+ st.title("Perplexity Clone AI Assistant")
577
 
 
578
  if "ai_model" not in st.session_state:
579
+ st.session_state.ai_model = "gpt-4.1-mini"
580
  if "messages" not in st.session_state:
581
  st.session_state.messages = []
582
  if "auto_save" not in st.session_state:
 
590
  if "response_style" not in st.session_state:
591
  st.session_state.response_style = "professional"
592
 
 
593
  sb = st.sidebar
594
  sb.title("Search Settings")
595
 
 
635
  None
636
  )
637
  if latest_response:
 
638
  title_match = re.search(r"# (.*?)(\n|$)", latest_response)
639
  if title_match:
640
  title = title_match.group(1).strip()
 
730
  with st.chat_message(m["role"]):
731
  st.markdown(m["content"], unsafe_allow_html=True)
732
 
733
+ # Images
734
  if "images" in m and m["images"]:
735
  st.subheader("Related Images")
736
  cols = st.columns(min(3, len(m["images"])))
 
741
  img_url = img_data.get('url', '')
742
  caption = img_data.get('title', 'Related image')
743
  if img_url:
744
+ load_and_show_image(img_url, caption=caption)
745
+ if img_data.get('source'):
746
+ st.markdown(f"[Source]({img_data['source']})")
747
  except Exception as img_err:
748
  st.warning(f"Could not display image: {img_err}")
749
 
750
+ # Videos
751
  if "videos" in m and m["videos"]:
752
  st.subheader("Related Videos")
753
  for video in m["videos"]:
754
  video_title = video.get('title', 'Related video')
755
+ video_url = video.get('url', '')
756
+ thumbnail = video.get('thumbnail', '')
757
 
 
758
  if thumbnail:
759
  col1, col2 = st.columns([1, 3])
760
  with col1:
761
  try:
762
+ load_and_show_image(thumbnail, caption="Video Thumbnail")
763
  except:
764
  st.write("🎬")
765
  with col2:
 
774
  if query:
775
  process_input(query, uploaded_files)
776
 
 
777
  sb.markdown("---")
778
  sb.markdown("Created by [https://ginigen.com](https://ginigen.com) | [YouTube Channel](https://www.youtube.com/@ginipickaistudio)")
779
 
780
  def process_example(topic):
 
781
  process_input(topic, [])
782
 
783
  def process_input(query: str, uploaded_files):
 
784
  if not any(m["role"] == "user" and m["content"] == query for m in st.session_state.messages):
785
  st.session_state.messages.append({"role": "user", "content": query})
786
 
 
796
  has_uploaded_files = bool(uploaded_files) and len(uploaded_files) > 0
797
 
798
  try:
 
799
  status = st.status("Preparing to answer your query...")
800
  status.update(label="Initializing client...")
801
 
802
  client = get_openai_client()
803
 
 
804
  search_content = None
805
  image_results = []
806
  video_results = []
 
811
  with st.spinner("Searching the web..."):
812
  search_content = do_web_search(keywords(query, top=5))
813
 
 
814
  try:
815
  status.update(label="Finding images and videos...")
816
  image_results = brave_image_search(query, 5)
 
819
  except Exception as search_err:
820
  logging.error(f"Media search error: {search_err}")
821
 
 
822
  file_content = None
823
  if has_uploaded_files:
824
  status.update(label="Processing uploaded files...")
825
  with st.spinner("Analyzing files..."):
826
  file_content = process_uploaded_files(uploaded_files)
827
 
 
 
828
  valid_images = []
829
  for img in image_results:
830
  url = img.get('image_url')
 
846
  'source': vid.get('source', 'Video source')
847
  })
848
 
 
849
  status.update(label="Preparing comprehensive answer...")
850
  sys_prompt = get_system_prompt(
851
  mode=st.session_state.search_mode,
 
854
  include_uploaded_files=has_uploaded_files
855
  )
856
 
 
857
  api_messages = [
858
  {"role": "system", "content": sys_prompt}
859
  ]
860
 
861
  user_content = query
 
 
862
  if search_content:
863
  user_content += "\n\n" + search_content
 
 
864
  if file_content:
865
  user_content += "\n\n" + file_content
866
 
 
867
  if valid_images:
868
  user_content += "\n\n# Available Images\n"
869
  for i, img in enumerate(valid_images):
 
876
  for i, vid in enumerate(valid_videos):
877
  user_content += f"\n{i+1}. **{vid['title']}** - [{vid['source']}]({vid['url']})\n"
878
 
 
879
  api_messages.append({"role": "user", "content": user_content})
880
 
 
881
  try:
882
  stream = client.chat.completions.create(
883
  model="gpt-4.1-mini",
 
888
  stream=True
889
  )
890
 
 
891
  for chunk in stream:
892
  if chunk.choices and len(chunk.choices) > 0 and chunk.choices[0].delta.content is not None:
893
  content_delta = chunk.choices[0].delta.content
894
  full_response += content_delta
895
  message_placeholder.markdown(full_response + "β–Œ", unsafe_allow_html=True)
896
 
 
897
  message_placeholder.markdown(full_response, unsafe_allow_html=True)
898
 
 
899
  if valid_images:
900
  st.subheader("Related Images")
901
  image_cols = st.columns(min(3, len(valid_images)))
 
906
  with image_cols[col_idx]:
907
  img_url = img_data['url']
908
  caption = img_data['title']
909
+ load_and_show_image(img_url, caption=caption)
910
  if img_data.get('source'):
911
  st.markdown(f"[Source]({img_data['source']})")
912
  except Exception as img_err:
913
  logging.warning(f"Error displaying image: {img_err}")
914
 
 
915
  if valid_videos:
916
  st.subheader("Related Videos")
917
  for video in valid_videos:
918
  video_title = video.get('title', 'Related video')
919
+ video_url = video.get('url', '')
920
+ thumbnail = video.get('thumbnail', '')
921
 
922
  if thumbnail:
923
  try:
924
  col1, col2 = st.columns([1, 3])
925
  with col1:
926
  try:
927
+ load_and_show_image(thumbnail, caption="Video Thumbnail")
928
  except:
929
  st.write("🎬")
930
  with col2:
 
939
 
940
  status.update(label="Response completed!", state="complete")
941
 
 
942
  st.session_state.messages.append({
943
  "role": "assistant",
944
  "content": full_response,
 
952
  status.update(label=f"Error: {error_message}", state="error")
953
  raise Exception(f"Response generation error: {error_message}")
954
 
 
955
  if st.session_state.generate_image and full_response:
956
  with st.spinner("Generating custom image..."):
957
  try:
 
959
  img, cap = generate_image(ip)
960
  if img:
961
  st.subheader("AI-Generated Image")
962
+ st.image(img, caption=cap, use_container_width=True)
963
  except Exception as img_error:
964
  logging.error(f"Image generation error: {str(img_error)}")
965
  st.warning("Custom image generation failed.")
966
 
 
967
  if full_response:
968
  st.subheader("Download This Response")
969
  c1, c2 = st.columns(2)
 
980
  mime="text/html"
981
  )
982
 
 
983
  if st.session_state.auto_save and st.session_state.messages:
984
  try:
985
  fn = f"conversation_history_auto_{datetime.now():%Y%m%d_%H%M%S}.json"
 
997
 
998
  # ���─────────────────────────────── main ────────────────────────────────────
999
  def main():
1000
+ st.write("==== Application Startup at", datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "=====")
1001
  perplexity_app()
1002
 
1003
  if __name__ == "__main__":