ginipick commited on
Commit
c696d7f
Β·
verified Β·
1 Parent(s): 21a85e8

Delete app-backup2.py

Browse files
Files changed (1) hide show
  1. app-backup2.py +0 -1119
app-backup2.py DELETED
@@ -1,1119 +0,0 @@
1
- # ──────────────────────────────── Imports ────────────────────────────────
2
- import os, json, re, logging, requests, markdown, time, io
3
- from datetime import datetime
4
-
5
- import streamlit as st
6
- from openai import OpenAI # OpenAI 라이브러리
7
-
8
- from gradio_client import Client
9
- import pandas as pd
10
- import PyPDF2 # For handling PDF files
11
-
12
- # ──────────────────────────────── Environment Variables / Constants ─────────────────────────
13
- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
14
- BRAVE_KEY = os.getenv("SERPHOUSE_API_KEY", "") # Keep this name
15
- BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
16
- BRAVE_IMAGE_ENDPOINT = "https://api.search.brave.com/res/v1/images/search"
17
- BRAVE_VIDEO_ENDPOINT = "https://api.search.brave.com/res/v1/videos/search"
18
- BRAVE_NEWS_ENDPOINT = "https://api.search.brave.com/res/v1/news/search"
19
- IMAGE_API_URL = "http://211.233.58.201:7896"
20
- MAX_TOKENS = 7999
21
-
22
- # Search modes and style definitions (in English)
23
- SEARCH_MODES = {
24
- "comprehensive": "Comprehensive answer with multiple sources",
25
- "academic": "Academic and research-focused results",
26
- "news": "Latest news and current events",
27
- "technical": "Technical and specialized information",
28
- "educational": "Educational and learning resources"
29
- }
30
-
31
- RESPONSE_STYLES = {
32
- "professional": "Professional and formal tone",
33
- "casual": "Friendly and conversational tone",
34
- "simple": "Simple and easy to understand",
35
- "detailed": "Detailed and thorough explanations"
36
- }
37
-
38
- # Example search queries
39
- EXAMPLE_QUERIES = {
40
- "example1": "What are the latest developments in quantum computing?",
41
- "example2": "How does climate change affect biodiversity in tropical rainforests?",
42
- "example3": "What are the economic implications of artificial intelligence in the job market?"
43
- }
44
-
45
- # ──────────────────────────────── Logging ────────────────────────────────
46
- logging.basicConfig(level=logging.INFO,
47
- format="%(asctime)s - %(levelname)s - %(message)s")
48
-
49
- # ──────────────────────────────── OpenAI Client ──────────────────────────
50
-
51
- # OpenAI ν΄λΌμ΄μ–ΈνŠΈμ— νƒ€μž„μ•„μ›ƒκ³Ό μž¬μ‹œλ„ 둜직 μΆ”κ°€
52
- @st.cache_resource
53
- def get_openai_client():
54
- """Create an OpenAI client with timeout and retry settings."""
55
- if not OPENAI_API_KEY:
56
- raise RuntimeError("⚠️ OPENAI_API_KEY ν™˜κ²½ λ³€μˆ˜κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
57
- return OpenAI(
58
- api_key=OPENAI_API_KEY,
59
- timeout=60.0, # νƒ€μž„μ•„μ›ƒ 60초둜 μ„€μ •
60
- max_retries=3 # μž¬μ‹œλ„ 횟수 3회둜 μ„€μ •
61
- )
62
-
63
- # ──────────────────────────────── System Prompt ─────────────────────────
64
- def get_system_prompt(mode="comprehensive", style="professional", include_search_results=True, include_uploaded_files=False) -> str:
65
- """
66
- Generate a system prompt for the perplexity-like interface based on:
67
- - The selected search mode and style
68
- - Guidelines for using web search results and uploaded files
69
- """
70
-
71
- # Base prompt for comprehensive mode
72
- comprehensive_prompt = """
73
- You are an advanced AI assistant that provides comprehensive answers with multiple sources, similar to Perplexity.
74
-
75
- Your task is to:
76
- 1. Thoroughly analyze the user's query
77
- 2. Provide a clear, well-structured answer integrating information from multiple sources
78
- 3. Include relevant images, videos, and links in your response
79
- 4. Format your answer with proper headings, bullet points, and sections
80
- 5. Cite sources inline and provide a references section at the end
81
-
82
- Important guidelines:
83
- - Organize information logically with clear section headings
84
- - Use bullet points and numbered lists for clarity
85
- - Include specific, factual information whenever possible
86
- - Provide balanced perspectives on controversial topics
87
- - Display relevant statistics, data, or quotes when appropriate
88
- - Format your response using markdown for readability
89
- """
90
-
91
- # Alternative modes
92
- mode_prompts = {
93
- "academic": """
94
- Your focus is on providing academic and research-focused responses:
95
- - Prioritize peer-reviewed research and academic sources
96
- - Include citations in a formal academic format
97
- - Discuss methodologies and research limitations where relevant
98
- - Present different scholarly perspectives on the topic
99
- - Use precise, technical language appropriate for an academic audience
100
- """,
101
- "news": """
102
- Your focus is on providing the latest news and current events:
103
- - Prioritize recent news articles and current information
104
- - Include publication dates for all news sources
105
- - Present multiple perspectives from different news outlets
106
- - Distinguish between facts and opinions/editorial content
107
- - Update information with the most recent developments
108
- """,
109
- "technical": """
110
- Your focus is on providing technical and specialized information:
111
- - Use precise technical terminology appropriate to the field
112
- - Include code snippets, formulas, or technical diagrams where relevant
113
- - Break down complex concepts into step-by-step explanations
114
- - Reference technical documentation, standards, and best practices
115
- - Consider different technical approaches or methodologies
116
- """,
117
- "educational": """
118
- Your focus is on providing educational and learning resources:
119
- - Structure information in a learning-friendly progression
120
- - Include examples, analogies, and visual explanations
121
- - Highlight key concepts and definitions
122
- - Suggest further learning resources at different difficulty levels
123
- - Present information that's accessible to learners at various levels
124
- """
125
- }
126
-
127
- # Response styles
128
- style_guides = {
129
- "professional": "Use a professional, authoritative voice. Clearly explain technical terms and present data systematically.",
130
- "casual": "Use a relaxed, conversational style with a friendly tone. Include relatable examples and occasionally use informal expressions.",
131
- "simple": "Use straightforward language and avoid jargon. Keep sentences and paragraphs short. Explain concepts as if to someone with no background in the subject.",
132
- "detailed": "Provide thorough explanations with comprehensive background information. Explore nuances and edge cases. Present multiple perspectives and detailed analysis."
133
- }
134
-
135
- # Guidelines for using search results
136
- search_guide = """
137
- Guidelines for Using Search Results:
138
- - Include source links directly in your response using markdown: [Source Name](URL)
139
- - For each major claim or piece of information, indicate its source
140
- - If sources conflict, explain the different perspectives and their reliability
141
- - Include 3-5 relevant images by writing: ![Image description](image_url)
142
- - Include 1-2 relevant video links when appropriate by writing: [Video: Title](video_url)
143
- - Format search information into a cohesive, well-structured response
144
- - Include a "References" section at the end listing all major sources with links
145
- """
146
-
147
- # Guidelines for using uploaded files
148
- upload_guide = """
149
- Guidelines for Using Uploaded Files:
150
- - Treat the uploaded files as primary sources for your response
151
- - Extract and highlight key information from files that directly addresses the query
152
- - Quote relevant passages and cite the specific file
153
- - For numerical data in CSV files, consider creating summary statements
154
- - For PDF content, reference specific sections or pages
155
- - Integrate file information seamlessly with web search results
156
- - When information conflicts, prioritize file content over general web results
157
- """
158
-
159
- # Choose base prompt based on mode
160
- if mode == "comprehensive":
161
- final_prompt = comprehensive_prompt
162
- else:
163
- final_prompt = comprehensive_prompt + "\n" + mode_prompts.get(mode, "")
164
-
165
- # Add style guide
166
- if style in style_guides:
167
- final_prompt += f"\n\nTone and Style: {style_guides[style]}"
168
-
169
- # Add search results guidance
170
- if include_search_results:
171
- final_prompt += f"\n\n{search_guide}"
172
-
173
- # Add uploaded files guidance
174
- if include_uploaded_files:
175
- final_prompt += f"\n\n{upload_guide}"
176
-
177
- # Additional formatting instructions
178
- final_prompt += """
179
- \n\nAdditional Formatting Requirements:
180
- - Use markdown headings (## and ###) to organize your response
181
- - Use bold text (**text**) for emphasis on important points
182
- - Include a "Related Questions" section at the end with 3-5 follow-up questions
183
- - Format your response with proper spacing and paragraph breaks
184
- - Make all links clickable by using proper markdown format: [text](url)
185
- """
186
-
187
- return final_prompt
188
-
189
- # ──────────────────────────────── Brave Search API ────────────────────────
190
- @st.cache_data(ttl=3600)
191
- def brave_search(query: str, count: int = 20):
192
- """
193
- Call the Brave Web Search API β†’ list[dict]
194
- Returns fields: index, title, link, snippet, displayed_link
195
- """
196
- if not BRAVE_KEY:
197
- raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
198
-
199
- headers = {
200
- "Accept": "application/json",
201
- "Accept-Encoding": "gzip",
202
- "X-Subscription-Token": BRAVE_KEY
203
- }
204
- params = {"q": query, "count": str(count)}
205
-
206
- for attempt in range(3):
207
- try:
208
- r = requests.get(BRAVE_ENDPOINT, headers=headers, params=params, timeout=15)
209
- r.raise_for_status()
210
- data = r.json()
211
-
212
- logging.info(f"Brave search result data structure: {list(data.keys())}")
213
-
214
- raw = data.get("web", {}).get("results") or data.get("results", [])
215
- if not raw:
216
- logging.warning(f"No Brave search results found. Response: {data}")
217
- raise ValueError("No search results found.")
218
-
219
- arts = []
220
- for i, res in enumerate(raw[:count], 1):
221
- url = res.get("url", res.get("link", ""))
222
- host = re.sub(r"https?://(www\.)?", "", url).split("/")[0]
223
- arts.append({
224
- "index": i,
225
- "title": res.get("title", "No title"),
226
- "link": url,
227
- "snippet": res.get("description", res.get("text", "No snippet")),
228
- "displayed_link": host
229
- })
230
-
231
- logging.info(f"Brave search success: {len(arts)} results")
232
- return arts
233
-
234
- except Exception as e:
235
- logging.error(f"Brave search failure (attempt {attempt+1}/3): {e}")
236
- if attempt < 2:
237
- time.sleep(2)
238
-
239
- return []
240
-
241
- @st.cache_data(ttl=3600)
242
- def brave_image_search(query: str, count: int = 10):
243
- """
244
- Call the Brave Image Search API β†’ list[dict]
245
- Returns fields: index, title, image_url, source_url
246
- """
247
- if not BRAVE_KEY:
248
- raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
249
-
250
- headers = {
251
- "Accept": "application/json",
252
- "Accept-Encoding": "gzip",
253
- "X-Subscription-Token": BRAVE_KEY
254
- }
255
- params = {
256
- "q": query,
257
- "count": str(count),
258
- "search_lang": "en",
259
- "country": "us",
260
- "spellcheck": "1"
261
- }
262
-
263
- for attempt in range(3):
264
- try:
265
- r = requests.get(BRAVE_IMAGE_ENDPOINT, headers=headers, params=params, timeout=15)
266
- r.raise_for_status()
267
- data = r.json()
268
-
269
- results = []
270
- for i, img in enumerate(data.get("results", [])[:count], 1):
271
- results.append({
272
- "index": i,
273
- "title": img.get("title", "Image"),
274
- "image_url": img.get("image", {}).get("url", ""),
275
- "source_url": img.get("source", ""),
276
- "width": img.get("image", {}).get("width", 0),
277
- "height": img.get("image", {}).get("height", 0)
278
- })
279
-
280
- logging.info(f"Brave image search success: {len(results)} results")
281
- return results
282
-
283
- except Exception as e:
284
- logging.error(f"Brave image search failure (attempt {attempt+1}/3): {e}")
285
- if attempt < 2:
286
- time.sleep(2)
287
-
288
- return []
289
-
290
- @st.cache_data(ttl=3600)
291
- def brave_video_search(query: str, count: int = 5):
292
- """
293
- Call the Brave Video Search API β†’ list[dict]
294
- Returns fields: index, title, video_url, thumbnail_url, source
295
- """
296
- if not BRAVE_KEY:
297
- raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
298
-
299
- headers = {
300
- "Accept": "application/json",
301
- "Accept-Encoding": "gzip",
302
- "X-Subscription-Token": BRAVE_KEY
303
- }
304
- params = {
305
- "q": query,
306
- "count": str(count)
307
- }
308
-
309
- for attempt in range(3):
310
- try:
311
- r = requests.get(BRAVE_VIDEO_ENDPOINT, headers=headers, params=params, timeout=15)
312
- r.raise_for_status()
313
- data = r.json()
314
-
315
- results = []
316
- for i, vid in enumerate(data.get("results", [])[:count], 1):
317
- results.append({
318
- "index": i,
319
- "title": vid.get("title", "Video"),
320
- "video_url": vid.get("url", ""),
321
- "thumbnail_url": vid.get("thumbnail", {}).get("src", ""),
322
- "source": vid.get("provider", {}).get("name", "Unknown source")
323
- })
324
-
325
- logging.info(f"Brave video search success: {len(results)} results")
326
- return results
327
-
328
- except Exception as e:
329
- logging.error(f"Brave video search failure (attempt {attempt+1}/3): {e}")
330
- if attempt < 2:
331
- time.sleep(2)
332
-
333
- return []
334
-
335
- @st.cache_data(ttl=3600)
336
- def brave_news_search(query: str, count: int = 5):
337
- """
338
- Call the Brave News Search API β†’ list[dict]
339
- Returns fields: index, title, url, description, source, date
340
- """
341
- if not BRAVE_KEY:
342
- raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
343
-
344
- headers = {
345
- "Accept": "application/json",
346
- "Accept-Encoding": "gzip",
347
- "X-Subscription-Token": BRAVE_KEY
348
- }
349
- params = {
350
- "q": query,
351
- "count": str(count)
352
- }
353
-
354
- for attempt in range(3):
355
- try:
356
- r = requests.get(BRAVE_NEWS_ENDPOINT, headers=headers, params=params, timeout=15)
357
- r.raise_for_status()
358
- data = r.json()
359
-
360
- results = []
361
- for i, news in enumerate(data.get("results", [])[:count], 1):
362
- results.append({
363
- "index": i,
364
- "title": news.get("title", "News article"),
365
- "url": news.get("url", ""),
366
- "description": news.get("description", ""),
367
- "source": news.get("source", "Unknown source"),
368
- "date": news.get("age", "Unknown date")
369
- })
370
-
371
- logging.info(f"Brave news search success: {len(results)} results")
372
- return results
373
-
374
- except Exception as e:
375
- logging.error(f"Brave news search failure (attempt {attempt+1}/3): {e}")
376
- if attempt < 2:
377
- time.sleep(2)
378
-
379
- return []
380
-
381
- def mock_results(query: str) -> str:
382
- """Fallback search results if API fails"""
383
- ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
384
- return (f"# Fallback Search Content (Generated: {ts})\n\n"
385
- f"The search API request failed. Please generate a response based on any pre-existing knowledge about '{query}'.\n\n"
386
- f"You may consider the following points:\n\n"
387
- f"- Basic concepts and importance of {query}\n"
388
- f"- Commonly known related statistics or trends\n"
389
- f"- Typical expert opinions on this subject\n"
390
- f"- Questions that readers might have\n\n"
391
- f"Note: This is fallback guidance, not real-time data.\n\n")
392
-
393
- def do_web_search(query: str) -> str:
394
- """Perform web search and format the results."""
395
- try:
396
- # Web search
397
- arts = brave_search(query, 20)
398
- if not arts:
399
- logging.warning("No search results, using fallback content")
400
- return mock_results(query)
401
-
402
- # Image search
403
- images = brave_image_search(query, 5)
404
-
405
- # Video search
406
- videos = brave_video_search(query, 2)
407
-
408
- # News search
409
- news = brave_news_search(query, 3)
410
-
411
- # Format all results
412
- result = "# Web Search Results\nUse these results to provide a comprehensive answer with multiple sources. Include relevant images, videos, and links.\n\n"
413
-
414
- # Add web results
415
- result += "## Web Results\n\n"
416
- for a in arts[:10]: # Limit to top 10 results
417
- result += f"### Result {a['index']}: {a['title']}\n\n{a['snippet']}\n\n"
418
- result += f"**Source**: [{a['displayed_link']}]({a['link']})\n\n---\n"
419
-
420
- # Add image results if available
421
- if images:
422
- result += "## Image Results\n\n"
423
- for img in images:
424
- if img.get('image_url'):
425
- result += f"![{img['title']}]({img['image_url']})\n\n"
426
- result += f"**Source**: [{img.get('source_url', 'Image source')}]({img.get('source_url', '#')})\n\n"
427
-
428
- # Add video results if available
429
- if videos:
430
- result += "## Video Results\n\n"
431
- for vid in videos:
432
- result += f"### {vid['title']}\n\n"
433
- if vid.get('thumbnail_url'):
434
- result += f"![Thumbnail]({vid['thumbnail_url']})\n\n"
435
- result += f"**Watch**: [{vid['source']}]({vid['video_url']})\n\n"
436
-
437
- # Add news results if available
438
- if news:
439
- result += "## News Results\n\n"
440
- for n in news:
441
- result += f"### {n['title']}\n\n{n['description']}\n\n"
442
- result += f"**Source**: [{n['source']}]({n['url']}) - {n['date']}\n\n---\n"
443
-
444
- return result
445
-
446
- except Exception as e:
447
- logging.error(f"Web search process failed: {str(e)}")
448
- return mock_results(query)
449
-
450
- # ──────────────────────────────── File Upload Handling ─────────────────────
451
- def process_text_file(file):
452
- """Handle text file"""
453
- try:
454
- content = file.read()
455
- file.seek(0)
456
-
457
- text = content.decode('utf-8', errors='ignore')
458
- if len(text) > 10000:
459
- text = text[:9700] + "...(truncated)..."
460
-
461
- result = f"## Text File: {file.name}\n\n"
462
- result += text
463
- return result
464
- except Exception as e:
465
- logging.error(f"Error processing text file: {str(e)}")
466
- return f"Error processing text file: {str(e)}"
467
-
468
- def process_csv_file(file):
469
- """Handle CSV file"""
470
- try:
471
- content = file.read()
472
- file.seek(0)
473
-
474
- df = pd.read_csv(io.BytesIO(content))
475
- result = f"## CSV File: {file.name}\n\n"
476
- result += f"- Rows: {len(df)}\n"
477
- result += f"- Columns: {len(df.columns)}\n"
478
- result += f"- Column Names: {', '.join(df.columns.tolist())}\n\n"
479
-
480
- result += "### Data Preview\n\n"
481
- preview_df = df.head(10)
482
- try:
483
- markdown_table = preview_df.to_markdown(index=False)
484
- if markdown_table:
485
- result += markdown_table + "\n\n"
486
- else:
487
- result += "Unable to display CSV data.\n\n"
488
- except Exception as e:
489
- logging.error(f"Markdown table conversion error: {e}")
490
- result += "Displaying data as text:\n\n"
491
- result += str(preview_df) + "\n\n"
492
-
493
- num_cols = df.select_dtypes(include=['number']).columns
494
- if len(num_cols) > 0:
495
- result += "### Basic Statistical Information\n\n"
496
- try:
497
- stats_df = df[num_cols].describe().round(2)
498
- stats_markdown = stats_df.to_markdown()
499
- if stats_markdown:
500
- result += stats_markdown + "\n\n"
501
- else:
502
- result += "Unable to display statistical information.\n\n"
503
- except Exception as e:
504
- logging.error(f"Statistical info conversion error: {e}")
505
- result += "Unable to generate statistical information.\n\n"
506
-
507
- return result
508
- except Exception as e:
509
- logging.error(f"CSV file processing error: {str(e)}")
510
- return f"Error processing CSV file: {str(e)}"
511
-
512
- def process_pdf_file(file):
513
- """Handle PDF file"""
514
- try:
515
- # Read file in bytes
516
- file_bytes = file.read()
517
- file.seek(0)
518
-
519
- # Use PyPDF2
520
- pdf_file = io.BytesIO(file_bytes)
521
- reader = PyPDF2.PdfReader(pdf_file, strict=False)
522
-
523
- # Basic info
524
- result = f"## PDF File: {file.name}\n\n"
525
- result += f"- Total pages: {len(reader.pages)}\n\n"
526
-
527
- # Extract text by page (limit to first 5 pages)
528
- max_pages = min(5, len(reader.pages))
529
- all_text = ""
530
-
531
- for i in range(max_pages):
532
- try:
533
- page = reader.pages[i]
534
- page_text = page.extract_text()
535
-
536
- current_page_text = f"### Page {i+1}\n\n"
537
- if page_text and len(page_text.strip()) > 0:
538
- # Limit to 1500 characters per page
539
- if len(page_text) > 1500:
540
- current_page_text += page_text[:1500] + "...(truncated)...\n\n"
541
- else:
542
- current_page_text += page_text + "\n\n"
543
- else:
544
- current_page_text += "(No text could be extracted from this page)\n\n"
545
-
546
- all_text += current_page_text
547
-
548
- # If total text is too long, break
549
- if len(all_text) > 8000:
550
- all_text += "...(truncating remaining pages; PDF is too large)...\n\n"
551
- break
552
-
553
- except Exception as page_err:
554
- logging.error(f"Error processing PDF page {i+1}: {str(page_err)}")
555
- all_text += f"### Page {i+1}\n\n(Error extracting content: {str(page_err)})\n\n"
556
-
557
- if len(reader.pages) > max_pages:
558
- all_text += f"\nNote: Only the first {max_pages} pages are shown out of {len(reader.pages)} total.\n\n"
559
-
560
- result += "### PDF Content\n\n" + all_text
561
- return result
562
-
563
- except Exception as e:
564
- logging.error(f"PDF file processing error: {str(e)}")
565
- return f"## PDF File: {file.name}\n\nError occurred: {str(e)}\n\nThis PDF file cannot be processed."
566
-
567
- def process_uploaded_files(files):
568
- """Combine the contents of all uploaded files into one string."""
569
- if not files:
570
- return None
571
-
572
- result = "# Uploaded File Contents\n\n"
573
- result += "Below is the content from the files provided by the user. Integrate this data as a main source of information for your response.\n\n"
574
-
575
- for file in files:
576
- try:
577
- ext = file.name.split('.')[-1].lower()
578
- if ext == 'txt':
579
- result += process_text_file(file) + "\n\n---\n\n"
580
- elif ext == 'csv':
581
- result += process_csv_file(file) + "\n\n---\n\n"
582
- elif ext == 'pdf':
583
- result += process_pdf_file(file) + "\n\n---\n\n"
584
- else:
585
- result += f"### Unsupported File: {file.name}\n\n---\n\n"
586
- except Exception as e:
587
- logging.error(f"File processing error {file.name}: {e}")
588
- result += f"### File processing error: {file.name}\n\nError: {e}\n\n---\n\n"
589
-
590
- return result
591
-
592
- # ──────────────────────────────── Image & Utility ─────────────────────────
593
- def extract_image_urls_from_search(image_results):
594
- """Extract valid image URLs from Brave image search results."""
595
- if not image_results:
596
- return []
597
-
598
- valid_urls = []
599
- for img in image_results:
600
- url = img.get('image_url')
601
- if url and url.startswith('http'):
602
- valid_urls.append({
603
- 'url': url,
604
- 'title': img.get('title', 'Image'),
605
- 'source': img.get('source_url', '')
606
- })
607
-
608
- return valid_urls
609
-
610
- def extract_video_data_from_search(video_results):
611
- """Extract valid video data from Brave video search results."""
612
- if not video_results:
613
- return []
614
-
615
- valid_videos = []
616
- for vid in video_results:
617
- url = vid.get('video_url')
618
- if url and url.startswith('http'):
619
- valid_videos.append({
620
- 'url': url,
621
- 'title': vid.get('title', 'Video'),
622
- 'thumbnail': vid.get('thumbnail_url', ''),
623
- 'source': vid.get('source', 'Video source')
624
- })
625
-
626
- return valid_videos
627
-
628
- def generate_image(prompt, w=768, h=768, g=3.5, steps=30, seed=3):
629
- """Image generation function."""
630
- if not prompt:
631
- return None, "Insufficient prompt"
632
- try:
633
- res = Client(IMAGE_API_URL).predict(
634
- prompt=prompt, width=w, height=h, guidance=g,
635
- inference_steps=steps, seed=seed,
636
- do_img2img=False, init_image=None,
637
- image2image_strength=0.8, resize_img=True,
638
- api_name="/generate_image"
639
- )
640
- return res[0], f"Seed: {res[1]}"
641
- except Exception as e:
642
- logging.error(e)
643
- return None, str(e)
644
-
645
- def extract_image_prompt(response_text: str, topic: str):
646
- """
647
- Generate a single-line English image prompt from the response content.
648
- """
649
- client = get_openai_client()
650
-
651
- try:
652
- response = client.chat.completions.create(
653
- model="gpt-4.1-mini",
654
- messages=[
655
- {"role": "system", "content": "Generate a single-line English image prompt from the following text. Return only the prompt text, nothing else."},
656
- {"role": "user", "content": f"Topic: {topic}\n\n---\n{response_text}\n\n---"}
657
- ],
658
- temperature=1,
659
- max_tokens=80,
660
- top_p=1
661
- )
662
-
663
- return response.choices[0].message.content.strip()
664
- except Exception as e:
665
- logging.error(f"OpenAI image prompt generation error: {e}")
666
- return f"A professional photo related to {topic}, high quality"
667
-
668
- def md_to_html(md: str, title="Perplexity-like Response"):
669
- """Convert Markdown to HTML."""
670
- return f"<!DOCTYPE html><html><head><title>{title}</title><meta charset='utf-8'></head><body>{markdown.markdown(md)}</body></html>"
671
-
672
- def keywords(text: str, top=5):
673
- """Simple keyword extraction."""
674
- cleaned = re.sub(r"[^κ°€-힣a-zA-Z0-9\s]", "", text)
675
- return " ".join(cleaned.split()[:top])
676
-
677
- # ──────────────────────────────── Streamlit UI ────────────────────────────
678
- def perplexity_app():
679
- st.title("Perplexity-like AI Assistant")
680
-
681
- # Set default session state
682
- if "ai_model" not in st.session_state:
683
- st.session_state.ai_model = "gpt-4.1-mini" # κ³ μ • λͺ¨λΈ μ„€μ •
684
- if "messages" not in st.session_state:
685
- st.session_state.messages = []
686
- if "auto_save" not in st.session_state:
687
- st.session_state.auto_save = True
688
- if "generate_image" not in st.session_state:
689
- st.session_state.generate_image = False
690
- if "web_search_enabled" not in st.session_state:
691
- st.session_state.web_search_enabled = True
692
- if "search_mode" not in st.session_state:
693
- st.session_state.search_mode = "comprehensive"
694
- if "response_style" not in st.session_state:
695
- st.session_state.response_style = "professional"
696
-
697
- # Sidebar UI
698
- sb = st.sidebar
699
- sb.title("Search Settings")
700
-
701
- sb.subheader("Response Configuration")
702
- sb.selectbox(
703
- "Search Mode",
704
- options=list(SEARCH_MODES.keys()),
705
- format_func=lambda x: SEARCH_MODES[x],
706
- key="search_mode"
707
- )
708
-
709
- sb.selectbox(
710
- "Response Style",
711
- options=list(RESPONSE_STYLES.keys()),
712
- format_func=lambda x: RESPONSE_STYLES[x],
713
- key="response_style"
714
- )
715
-
716
- # Example queries
717
- sb.subheader("Example Queries")
718
- c1, c2, c3 = sb.columns(3)
719
- if c1.button("Quantum Computing", key="ex1"):
720
- process_example(EXAMPLE_QUERIES["example1"])
721
- if c2.button("Climate Change", key="ex2"):
722
- process_example(EXAMPLE_QUERIES["example2"])
723
- if c3.button("AI Economics", key="ex3"):
724
- process_example(EXAMPLE_QUERIES["example3"])
725
-
726
- sb.subheader("Other Settings")
727
- sb.toggle("Auto Save", key="auto_save")
728
- sb.toggle("Auto Image Generation", key="generate_image")
729
-
730
- web_search_enabled = sb.toggle("Use Web Search", value=st.session_state.web_search_enabled)
731
- st.session_state.web_search_enabled = web_search_enabled
732
-
733
- if web_search_enabled:
734
- st.sidebar.info("βœ… Web search results will be integrated into the response.")
735
-
736
- # Download the latest response
737
- latest_response = next(
738
- (m["content"] for m in reversed(st.session_state.messages)
739
- if m["role"] == "assistant" and m["content"].strip()),
740
- None
741
- )
742
- if latest_response:
743
- # Extract a title from the response - first heading or first line
744
- title_match = re.search(r"# (.*?)(\n|$)", latest_response)
745
- if title_match:
746
- title = title_match.group(1).strip()
747
- else:
748
- first_line = latest_response.split('\n', 1)[0].strip()
749
- title = first_line[:40] + "..." if len(first_line) > 40 else first_line
750
-
751
- sb.subheader("Download Latest Response")
752
- d1, d2 = sb.columns(2)
753
- d1.download_button("Download as Markdown", latest_response,
754
- file_name=f"{title}.md", mime="text/markdown")
755
- d2.download_button("Download as HTML", md_to_html(latest_response, title),
756
- file_name=f"{title}.html", mime="text/html")
757
-
758
- # JSON conversation record upload
759
- up = sb.file_uploader("Load Conversation History (.json)", type=["json"], key="json_uploader")
760
- if up:
761
- try:
762
- st.session_state.messages = json.load(up)
763
- sb.success("Conversation history loaded successfully")
764
- except Exception as e:
765
- sb.error(f"Failed to load: {e}")
766
-
767
- # JSON conversation record download
768
- if sb.button("Download Conversation as JSON"):
769
- sb.download_button(
770
- "Save",
771
- data=json.dumps(st.session_state.messages, ensure_ascii=False, indent=2),
772
- file_name="conversation_history.json",
773
- mime="application/json"
774
- )
775
-
776
- # File Upload
777
- st.subheader("Upload Files")
778
- uploaded_files = st.file_uploader(
779
- "Upload files to be used as reference (txt, csv, pdf)",
780
- type=["txt", "csv", "pdf"],
781
- accept_multiple_files=True,
782
- key="file_uploader"
783
- )
784
-
785
- if uploaded_files:
786
- file_count = len(uploaded_files)
787
- st.success(f"{file_count} files uploaded. They will be used as sources for your query.")
788
-
789
- with st.expander("Preview Uploaded Files", expanded=False):
790
- for idx, file in enumerate(uploaded_files):
791
- st.write(f"**File Name:** {file.name}")
792
- ext = file.name.split('.')[-1].lower()
793
-
794
- if ext == 'txt':
795
- preview = file.read(1000).decode('utf-8', errors='ignore')
796
- file.seek(0)
797
- st.text_area(
798
- f"Preview of {file.name}",
799
- preview + ("..." if len(preview) >= 1000 else ""),
800
- height=150
801
- )
802
- elif ext == 'csv':
803
- try:
804
- df = pd.read_csv(file)
805
- file.seek(0)
806
- st.write("CSV Preview (up to 5 rows)")
807
- st.dataframe(df.head(5))
808
- except Exception as e:
809
- st.error(f"CSV preview failed: {e}")
810
- elif ext == 'pdf':
811
- try:
812
- file_bytes = file.read()
813
- file.seek(0)
814
-
815
- pdf_file = io.BytesIO(file_bytes)
816
- reader = PyPDF2.PdfReader(pdf_file, strict=False)
817
-
818
- pc = len(reader.pages)
819
- st.write(f"PDF File: {pc} pages")
820
-
821
- if pc > 0:
822
- try:
823
- page_text = reader.pages[0].extract_text()
824
- preview = page_text[:500] if page_text else "(No text extracted)"
825
- st.text_area("Preview of the first page", preview + "...", height=150)
826
- except:
827
- st.warning("Failed to extract text from the first page")
828
- except Exception as e:
829
- st.error(f"PDF preview failed: {e}")
830
-
831
- if idx < file_count - 1:
832
- st.divider()
833
-
834
- # Display existing messages
835
- for m in st.session_state.messages:
836
- with st.chat_message(m["role"]):
837
- # Process markdown to allow clickable links and properly rendered content
838
- st.markdown(m["content"], unsafe_allow_html=True)
839
-
840
- # Display images if present
841
- if "images" in m and m["images"]:
842
- st.subheader("Related Images")
843
- cols = st.columns(min(3, len(m["images"])))
844
- for i, img_data in enumerate(m["images"]):
845
- col_idx = i % len(cols)
846
- with cols[col_idx]:
847
- try:
848
- img_url = img_data.get('url', '')
849
- caption = img_data.get('title', 'Related image')
850
- if img_url:
851
- st.image(img_url, caption=caption, use_column_width=True)
852
- if img_data.get('source'):
853
- st.markdown(f"[Source]({img_data['source']})")
854
- except Exception as img_err:
855
- st.warning(f"Could not display image: {img_err}")
856
-
857
- # Display videos if present
858
- if "videos" in m and m["videos"]:
859
- st.subheader("Related Videos")
860
- for video in m["videos"]:
861
- video_title = video.get('title', 'Related video')
862
- video_url = video.get('url', '')
863
- thumbnail = video.get('thumbnail', '')
864
-
865
- # Display video information with thumbnail if available
866
- if thumbnail:
867
- col1, col2 = st.columns([1, 3])
868
- with col1:
869
- try:
870
- st.image(thumbnail, width=120)
871
- except:
872
- st.write("🎬")
873
- with col2:
874
- st.markdown(f"**[{video_title}]({video_url})**")
875
- st.write(f"Source: {video.get('source', 'Unknown')}")
876
- else:
877
- st.markdown(f"🎬 **[{video_title}]({video_url})**")
878
- st.write(f"Source: {video.get('source', 'Unknown')}")
879
-
880
- # User input
881
- query = st.chat_input("Enter your query or question here.")
882
- if query:
883
- process_input(query, uploaded_files)
884
-
885
- # μ‚¬μ΄λ“œλ°” ν•˜λ‹¨ λ°°μ§€(링크) μΆ”κ°€
886
- sb.markdown("---")
887
- sb.markdown("Created by [https://ginigen.com](https://ginigen.com) | [YouTube Channel](https://www.youtube.com/@ginipickaistudio)")
888
-
889
- def process_example(topic):
890
- """Process the selected example query."""
891
- process_input(topic, [])
892
-
893
- def process_input(query: str, uploaded_files):
894
- # Add user's message
895
- if not any(m["role"] == "user" and m["content"] == query for m in st.session_state.messages):
896
- st.session_state.messages.append({"role": "user", "content": query})
897
-
898
- with st.chat_message("user"):
899
- st.markdown(query)
900
-
901
- with st.chat_message("assistant"):
902
- placeholder = st.empty()
903
- message_placeholder = st.empty()
904
- full_response = ""
905
-
906
- use_web_search = st.session_state.web_search_enabled
907
- has_uploaded_files = bool(uploaded_files) and len(uploaded_files) > 0
908
-
909
- try:
910
- # μƒνƒœ ν‘œμ‹œλ₯Ό μœ„ν•œ μƒνƒœ μ»΄ν¬λ„ŒνŠΈ
911
- status = st.status("Preparing to answer your query...")
912
- status.update(label="Initializing client...")
913
-
914
- client = get_openai_client()
915
-
916
- # Web search
917
- search_content = None
918
- image_results = []
919
- video_results = []
920
- news_results = []
921
-
922
- if use_web_search:
923
- status.update(label="Performing web search...")
924
- with st.spinner("Searching the web..."):
925
- search_content = do_web_search(keywords(query, top=5))
926
-
927
- # Perform specific searches for media
928
- try:
929
- status.update(label="Finding images and videos...")
930
- image_results = brave_image_search(query, 5)
931
- video_results = brave_video_search(query, 2)
932
- news_results = brave_news_search(query, 3)
933
- except Exception as search_err:
934
- logging.error(f"Media search error: {search_err}")
935
-
936
- # Process uploaded files β†’ content
937
- file_content = None
938
- if has_uploaded_files:
939
- status.update(label="Processing uploaded files...")
940
- with st.spinner("Analyzing files..."):
941
- file_content = process_uploaded_files(uploaded_files)
942
-
943
- # Extract usable image and video data
944
- valid_images = extract_image_urls_from_search(image_results)
945
- valid_videos = extract_video_data_from_search(video_results)
946
-
947
- # Build system prompt
948
- status.update(label="Preparing comprehensive answer...")
949
- sys_prompt = get_system_prompt(
950
- mode=st.session_state.search_mode,
951
- style=st.session_state.response_style,
952
- include_search_results=use_web_search,
953
- include_uploaded_files=has_uploaded_files
954
- )
955
-
956
- # OpenAI API 호좜 μ€€λΉ„
957
- status.update(label="Generating response...")
958
-
959
- # λ©”μ‹œμ§€ ꡬ성
960
- api_messages = [
961
- {"role": "system", "content": sys_prompt}
962
- ]
963
-
964
- user_content = query
965
-
966
- # 검색 κ²°κ³Όκ°€ 있으면 μ‚¬μš©μž ν”„λ‘¬ν”„νŠΈμ— μΆ”κ°€
967
- if search_content:
968
- user_content += "\n\n" + search_content
969
-
970
- # 파일 λ‚΄μš©μ΄ 있으면 μ‚¬μš©μž ν”„λ‘¬ν”„νŠΈμ— μΆ”κ°€
971
- if file_content:
972
- user_content += "\n\n" + file_content
973
-
974
- # Include specific image information
975
- if valid_images:
976
- user_content += "\n\n# Available Images\n"
977
- for i, img in enumerate(valid_images[:5]):
978
- user_content += f"\n{i+1}. ![{img['title']}]({img['url']})\n"
979
- if img['source']:
980
- user_content += f" Source: {img['source']}\n"
981
-
982
- # Include specific video information
983
- if valid_videos:
984
- user_content += "\n\n# Available Videos\n"
985
- for i, vid in enumerate(valid_videos[:2]):
986
- user_content += f"\n{i+1}. **{vid['title']}** - [{vid['source']}]({vid['url']})\n"
987
-
988
- # μ‚¬μš©μž λ©”μ‹œμ§€ μΆ”κ°€
989
- api_messages.append({"role": "user", "content": user_content})
990
-
991
- # OpenAI API 슀트리밍 호좜 - κ³ μ • λͺ¨λΈ "gpt-4.1-mini" μ‚¬μš©
992
- try:
993
- # 슀트리밍 λ°©μ‹μœΌλ‘œ API 호좜
994
- stream = client.chat.completions.create(
995
- model="gpt-4.1-mini", # κ³ μ • λͺ¨λΈ μ‚¬μš©
996
- messages=api_messages,
997
- temperature=1,
998
- max_tokens=MAX_TOKENS,
999
- top_p=1,
1000
- stream=True # 슀트리밍 ν™œμ„±ν™”
1001
- )
1002
-
1003
- # 슀트리밍 응닡 처리
1004
- for chunk in stream:
1005
- if chunk.choices and len(chunk.choices) > 0 and chunk.choices[0].delta.content is not None:
1006
- content_delta = chunk.choices[0].delta.content
1007
- full_response += content_delta
1008
- message_placeholder.markdown(full_response + "β–Œ", unsafe_allow_html=True)
1009
-
1010
- # μ΅œμ’… 응닡 ν‘œμ‹œ (μ»€μ„œ 제거)
1011
- message_placeholder.markdown(full_response, unsafe_allow_html=True)
1012
-
1013
- # Display related images if available
1014
- if valid_images:
1015
- st.subheader("Related Images")
1016
- image_cols = st.columns(min(3, len(valid_images)))
1017
-
1018
- for i, img_data in enumerate(valid_images):
1019
- col_idx = i % len(image_cols)
1020
- with image_cols[col_idx]:
1021
- try:
1022
- st.image(img_data['url'], caption=img_data['title'], use_column_width=True)
1023
- if img_data['source']:
1024
- st.markdown(f"[Source]({img_data['source']})")
1025
- except Exception as img_err:
1026
- st.warning(f"Could not load image: {str(img_err)}")
1027
-
1028
- # Display related videos if available
1029
- if valid_videos:
1030
- st.subheader("Related Videos")
1031
- for video in valid_videos:
1032
- video_title = video.get('title', 'Related video')
1033
- video_url = video.get('url', '')
1034
- thumbnail = video.get('thumbnail', '')
1035
-
1036
- # Display video information with thumbnail if available
1037
- if thumbnail:
1038
- col1, col2 = st.columns([1, 3])
1039
- with col1:
1040
- try:
1041
- st.image(thumbnail, width=120)
1042
- except:
1043
- st.write("🎬")
1044
- with col2:
1045
- st.markdown(f"**[{video_title}]({video_url})**")
1046
- st.write(f"Source: {video.get('source', 'Unknown')}")
1047
- else:
1048
- st.markdown(f"🎬 **[{video_title}]({video_url})**")
1049
- st.write(f"Source: {video.get('source', 'Unknown')}")
1050
-
1051
- status.update(label="Response completed!", state="complete")
1052
-
1053
- # Save the response with images and videos in the session state
1054
- st.session_state.messages.append({
1055
- "role": "assistant",
1056
- "content": full_response,
1057
- "images": valid_images,
1058
- "videos": valid_videos
1059
- })
1060
-
1061
- except Exception as api_error:
1062
- error_message = str(api_error)
1063
- logging.error(f"API error: {error_message}")
1064
- status.update(label=f"Error: {error_message}", state="error")
1065
- raise Exception(f"Response generation error: {error_message}")
1066
-
1067
- # Additional image generation if enabled
1068
- if st.session_state.generate_image and full_response:
1069
- with st.spinner("Generating custom image..."):
1070
- try:
1071
- ip = extract_image_prompt(full_response, query)
1072
- img, cap = generate_image(ip)
1073
- if img:
1074
- st.subheader("AI-Generated Image")
1075
- st.image(img, caption=cap)
1076
- except Exception as img_error:
1077
- logging.error(f"Image generation error: {str(img_error)}")
1078
- st.warning("Custom image generation failed. Using web images only.")
1079
-
1080
- # Download buttons
1081
- if full_response:
1082
- st.subheader("Download This Response")
1083
- c1, c2 = st.columns(2)
1084
- c1.download_button(
1085
- "Markdown",
1086
- data=full_response,
1087
- file_name=f"{query[:30]}.md",
1088
- mime="text/markdown"
1089
- )
1090
- c2.download_button(
1091
- "HTML",
1092
- data=md_to_html(full_response, query[:30]),
1093
- file_name=f"{query[:30]}.html",
1094
- mime="text/html"
1095
- )
1096
-
1097
- # Auto save
1098
- if st.session_state.auto_save and st.session_state.messages:
1099
- try:
1100
- fn = f"conversation_history_auto_{datetime.now():%Y%m%d_%H%M%S}.json"
1101
- with open(fn, "w", encoding="utf-8") as fp:
1102
- json.dump(st.session_state.messages, fp, ensure_ascii=False, indent=2)
1103
- except Exception as e:
1104
- logging.error(f"Auto-save failed: {e}")
1105
-
1106
- except Exception as e:
1107
- error_message = str(e)
1108
- placeholder.error(f"An error occurred: {error_message}")
1109
- logging.error(f"Process input error: {error_message}")
1110
- ans = f"An error occurred while processing your request: {error_message}"
1111
- st.session_state.messages.append({"role": "assistant", "content": ans})
1112
-
1113
-
1114
- # ──────────────────────────────── main ────────────────────────────────────
1115
- def main():
1116
- perplexity_app()
1117
-
1118
- if __name__ == "__main__":
1119
- main()