Spaces:
Running
on
Zero
Running
on
Zero
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,680 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import spaces
|
2 |
+
import json
|
3 |
+
import subprocess
|
4 |
+
import os
|
5 |
+
from llama_cpp import Llama
|
6 |
+
from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
|
7 |
+
from llama_cpp_agent.providers import LlamaCppPythonProvider
|
8 |
+
from llama_cpp_agent.chat_history import BasicChatHistory
|
9 |
+
from llama_cpp_agent.chat_history.messages import Roles
|
10 |
+
import gradio as gr
|
11 |
+
from huggingface_hub import hf_hub_download
|
12 |
+
import tempfile
|
13 |
+
from typing import List, Tuple, Optional
|
14 |
+
|
15 |
+
# PDF μ²λ¦¬ λΌμ΄λΈλ¬λ¦¬ μ‘°κ±΄λΆ import
|
16 |
+
try:
|
17 |
+
from docling.document_converter import DocumentConverter
|
18 |
+
DOCLING_AVAILABLE = True
|
19 |
+
except ImportError:
|
20 |
+
DOCLING_AVAILABLE = False
|
21 |
+
print("Docling not available, using alternative PDF processing")
|
22 |
+
try:
|
23 |
+
import PyPDF2
|
24 |
+
import pdfplumber
|
25 |
+
except ImportError:
|
26 |
+
print("Warning: PDF processing libraries not fully installed")
|
27 |
+
|
28 |
+
# νκ²½ λ³μμμ HF_TOKEN κ°μ Έμ€κΈ°
|
29 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
30 |
+
|
31 |
+
# μ μ λ³μ μ΄κΈ°ν (μ€μ!)
|
32 |
+
llm = None
|
33 |
+
llm_model = None
|
34 |
+
document_context = "" # PDFμμ μΆμΆν λ¬Έμ 컨ν
μ€νΈ μ μ₯
|
35 |
+
document_filename = "" # νμ¬ λ‘λλ λ¬Έμμ νμΌλͺ
|
36 |
+
|
37 |
+
print("Global variables initialized")
|
38 |
+
print(f"document_context initial value: '{document_context}'")
|
39 |
+
print(f"document_filename initial value: '{document_filename}'")
|
40 |
+
|
41 |
+
# λͺ¨λΈ μ΄λ¦κ³Ό κ²½λ‘λ₯Ό μ μ
|
42 |
+
MISTRAL_MODEL_NAME = "Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf"
|
43 |
+
|
44 |
+
# λͺ¨λΈ λ€μ΄λ‘λ (HF_TOKEN μ¬μ©)
|
45 |
+
model_path = hf_hub_download(
|
46 |
+
repo_id="ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503",
|
47 |
+
filename=MISTRAL_MODEL_NAME,
|
48 |
+
local_dir="./models",
|
49 |
+
token=HF_TOKEN
|
50 |
+
)
|
51 |
+
|
52 |
+
print(f"Downloaded model path: {model_path}")
|
53 |
+
|
54 |
+
css = """
|
55 |
+
.bubble-wrap {
|
56 |
+
padding-top: calc(var(--spacing-xl) * 3) !important;
|
57 |
+
}
|
58 |
+
.message-row {
|
59 |
+
justify-content: space-evenly !important;
|
60 |
+
width: 100% !important;
|
61 |
+
max-width: 100% !important;
|
62 |
+
margin: calc(var(--spacing-xl)) 0 !important;
|
63 |
+
padding: 0 calc(var(--spacing-xl) * 3) !important;
|
64 |
+
}
|
65 |
+
.flex-wrap.user {
|
66 |
+
border-bottom-right-radius: var(--radius-lg) !important;
|
67 |
+
}
|
68 |
+
.flex-wrap.bot {
|
69 |
+
border-bottom-left-radius: var(--radius-lg) !important;
|
70 |
+
}
|
71 |
+
.message.user{
|
72 |
+
padding: 10px;
|
73 |
+
}
|
74 |
+
.message.bot{
|
75 |
+
text-align: right;
|
76 |
+
width: 100%;
|
77 |
+
padding: 10px;
|
78 |
+
border-radius: 10px;
|
79 |
+
}
|
80 |
+
.message-bubble-border {
|
81 |
+
border-radius: 6px !important;
|
82 |
+
}
|
83 |
+
.message-buttons {
|
84 |
+
justify-content: flex-end !important;
|
85 |
+
}
|
86 |
+
.message-buttons-left {
|
87 |
+
align-self: end !important;
|
88 |
+
}
|
89 |
+
.message-buttons-bot, .message-buttons-user {
|
90 |
+
right: 10px !important;
|
91 |
+
left: auto !important;
|
92 |
+
bottom: 2px !important;
|
93 |
+
}
|
94 |
+
.dark.message-bubble-border {
|
95 |
+
border-color: #343140 !important;
|
96 |
+
}
|
97 |
+
.dark.user {
|
98 |
+
background: #1e1c26 !important;
|
99 |
+
}
|
100 |
+
.dark.assistant.dark, .dark.pending.dark {
|
101 |
+
background: #16141c !important;
|
102 |
+
}
|
103 |
+
.upload-container {
|
104 |
+
margin-bottom: 20px;
|
105 |
+
padding: 15px;
|
106 |
+
border: 2px dashed #666;
|
107 |
+
border-radius: 10px;
|
108 |
+
background-color: #f0f0f0;
|
109 |
+
}
|
110 |
+
.dark .upload-container {
|
111 |
+
background-color: #292733;
|
112 |
+
border-color: #444;
|
113 |
+
}
|
114 |
+
"""
|
115 |
+
|
116 |
+
def get_messages_formatter_type(model_name):
|
117 |
+
if "Mistral" in model_name or "BitSix" in model_name:
|
118 |
+
return MessagesFormatterType.MISTRAL # CHATML λμ MISTRAL νμ μ¬μ©
|
119 |
+
else:
|
120 |
+
raise ValueError(f"Unsupported model: {model_name}")
|
121 |
+
|
122 |
+
@spaces.GPU
|
123 |
+
def convert_pdf_to_markdown(file):
|
124 |
+
"""Convert PDF file to Markdown"""
|
125 |
+
global document_context, document_filename
|
126 |
+
|
127 |
+
if file is None:
|
128 |
+
return "No file uploaded.", {}
|
129 |
+
|
130 |
+
try:
|
131 |
+
print(f"\n=== PDF Conversion Started ===")
|
132 |
+
print(f"File path: {file.name}")
|
133 |
+
|
134 |
+
# DocumentConverter μΈμ€ν΄μ€ μμ±
|
135 |
+
converter = DocumentConverter()
|
136 |
+
|
137 |
+
# νμΌ λ³ν
|
138 |
+
result = converter.convert(file.name)
|
139 |
+
|
140 |
+
# MarkdownμΌλ‘ λ΄λ³΄λ΄κΈ°
|
141 |
+
markdown_content = result.document.export_to_markdown()
|
142 |
+
|
143 |
+
# λ¬Έμ 컨ν
μ€νΈ μ
λ°μ΄νΈ (μ€μ!)
|
144 |
+
document_context = markdown_content
|
145 |
+
document_filename = os.path.basename(file.name)
|
146 |
+
|
147 |
+
# λ©νλ°μ΄ν° μΆμΆ
|
148 |
+
metadata = {
|
149 |
+
"filename": document_filename,
|
150 |
+
"conversion_status": "success",
|
151 |
+
"content_length": len(markdown_content),
|
152 |
+
"preview": markdown_content[:500] + "..." if len(markdown_content) > 500 else markdown_content
|
153 |
+
}
|
154 |
+
|
155 |
+
print(f"β
PDF conversion successful!")
|
156 |
+
print(f"π Filename: {document_filename}")
|
157 |
+
print(f"π Document length: {len(markdown_content)} characters")
|
158 |
+
print(f"π Document preview (first 300 chars):\n{markdown_content[:300]}...")
|
159 |
+
print(f"=== PDF Conversion Complete ===\n")
|
160 |
+
|
161 |
+
# μ μ λ³μ νμΈ λ° κ°μ μ€μ
|
162 |
+
print(f"\n=== Before setting global variables ===")
|
163 |
+
print(f"global document_context length: {len(document_context)}")
|
164 |
+
print(f"global document_filename: {document_filename}")
|
165 |
+
|
166 |
+
# globals() ν¨μλ₯Ό μ¬μ©νμ¬ κ°μ λ‘ μ μ λ³μ μ€μ
|
167 |
+
globals()['document_context'] = markdown_content
|
168 |
+
globals()['document_filename'] = document_filename
|
169 |
+
|
170 |
+
print(f"\n=== After setting global variables ===")
|
171 |
+
print(f"global document_context length: {len(globals()['document_context'])}")
|
172 |
+
print(f"global document_filename: {globals()['document_filename']}")
|
173 |
+
|
174 |
+
return markdown_content, metadata
|
175 |
+
|
176 |
+
except Exception as e:
|
177 |
+
error_msg = f"Error during PDF conversion: {str(e)}"
|
178 |
+
print(f"β {error_msg}")
|
179 |
+
document_context = ""
|
180 |
+
document_filename = ""
|
181 |
+
return error_msg, {"error": str(e)}
|
182 |
+
|
183 |
+
def find_relevant_chunks(document, query, chunk_size=1500, overlap=300):
|
184 |
+
"""Find relevant chunks from document based on query"""
|
185 |
+
if not document:
|
186 |
+
return ""
|
187 |
+
|
188 |
+
print(f"Finding relevant chunks for query: {query}")
|
189 |
+
|
190 |
+
# κ°λ¨ν ν€μλ κΈ°λ° κ²μ
|
191 |
+
query_words = query.lower().split()
|
192 |
+
chunks = []
|
193 |
+
|
194 |
+
# λ¬Έμλ₯Ό μ²ν¬λ‘ λλκΈ°
|
195 |
+
for i in range(0, len(document), chunk_size - overlap):
|
196 |
+
chunk = document[i:i + chunk_size]
|
197 |
+
chunks.append((i, chunk))
|
198 |
+
|
199 |
+
print(f"Document split into {len(chunks)} chunks")
|
200 |
+
|
201 |
+
# κ° μ²ν¬μ κ΄λ ¨μ± μ μ κ³μ°
|
202 |
+
scored_chunks = []
|
203 |
+
for idx, chunk in chunks:
|
204 |
+
chunk_lower = chunk.lower()
|
205 |
+
score = sum(1 for word in query_words if word in chunk_lower)
|
206 |
+
if score > 0:
|
207 |
+
scored_chunks.append((score, idx, chunk))
|
208 |
+
|
209 |
+
# μμ 2κ° μ²ν¬ μ ν (λ©λͺ¨λ¦¬ μ μ½)
|
210 |
+
scored_chunks.sort(reverse=True, key=lambda x: x[0])
|
211 |
+
relevant_chunks = scored_chunks[:2]
|
212 |
+
|
213 |
+
if relevant_chunks:
|
214 |
+
result = ""
|
215 |
+
for score, idx, chunk in relevant_chunks:
|
216 |
+
result += f"\n[Extracted from position {idx} - relevance score: {score}]\n{chunk}\n"
|
217 |
+
print(f"Found {len(relevant_chunks)} relevant chunks")
|
218 |
+
return result
|
219 |
+
else:
|
220 |
+
# κ΄λ ¨ μ²ν¬λ₯Ό μ°Ύμ§ λͺ»ν κ²½μ° λ¬Έμ μμ λΆλΆ λ°ν
|
221 |
+
print("No relevant chunks found, returning document beginning")
|
222 |
+
return document[:2000]
|
223 |
+
|
224 |
+
@spaces.GPU(duration=120)
|
225 |
+
def respond(
|
226 |
+
message,
|
227 |
+
history: list[dict],
|
228 |
+
system_message,
|
229 |
+
max_tokens,
|
230 |
+
temperature,
|
231 |
+
top_p,
|
232 |
+
top_k,
|
233 |
+
repeat_penalty,
|
234 |
+
):
|
235 |
+
global llm, llm_model
|
236 |
+
|
237 |
+
# globals()λ₯Ό μ¬μ©νμ¬ μ μ λ³μμ μ κ·Ό
|
238 |
+
document_context = globals().get('document_context', '')
|
239 |
+
document_filename = globals().get('document_filename', '')
|
240 |
+
|
241 |
+
# λλ²κΉ
μ μν μμΈ λ‘κ·Έ
|
242 |
+
print(f"\n=== RESPOND Function Started ===")
|
243 |
+
print(f"User message: {message}")
|
244 |
+
print(f"Document context exists: {bool(document_context)}")
|
245 |
+
if document_context:
|
246 |
+
print(f"Document length: {len(document_context)}")
|
247 |
+
print(f"Document filename: {document_filename}")
|
248 |
+
print(f"Document preview (first 100 chars): {document_context[:100]}...")
|
249 |
+
else:
|
250 |
+
print("β οΈ document_context is empty!")
|
251 |
+
print(f"globals() keys (first 20): {list(globals().keys())[:20]}...")
|
252 |
+
|
253 |
+
chat_template = get_messages_formatter_type(MISTRAL_MODEL_NAME)
|
254 |
+
|
255 |
+
# λͺ¨λΈ νμΌ κ²½λ‘ νμΈ
|
256 |
+
model_path_local = os.path.join("./models", MISTRAL_MODEL_NAME)
|
257 |
+
|
258 |
+
if llm is None or llm_model != MISTRAL_MODEL_NAME:
|
259 |
+
print("Loading LLM model...")
|
260 |
+
llm = Llama(
|
261 |
+
model_path=model_path_local,
|
262 |
+
flash_attn=True,
|
263 |
+
n_gpu_layers=81,
|
264 |
+
n_batch=1024,
|
265 |
+
n_ctx=16384, # 컨ν
μ€νΈ ν¬κΈ°
|
266 |
+
verbose=True # λλ²κΉ
μ μν μμΈ λ‘κ·Έ
|
267 |
+
)
|
268 |
+
llm_model = MISTRAL_MODEL_NAME
|
269 |
+
print("LLM model loaded successfully!")
|
270 |
+
|
271 |
+
provider = LlamaCppPythonProvider(llm)
|
272 |
+
|
273 |
+
# κΈ°λ³Έ μμ€ν
λ©μμ§ μ¬μ©
|
274 |
+
system_prompt = system_message
|
275 |
+
|
276 |
+
# λ¬Έμ 컨ν
μ€νΈκ° μμΌλ©΄ μμ€ν
λ©μμ§μ μ¬μ©μ λ©μμ§ λͺ¨λμ ν¬ν¨
|
277 |
+
if document_context and len(document_context) > 0:
|
278 |
+
doc_length = len(document_context)
|
279 |
+
print(f"π Including document context in message: {doc_length} characters")
|
280 |
+
|
281 |
+
# μμ€ν
λ©μμ§μλ λ¬Έμ μ 보 μΆκ°
|
282 |
+
system_prompt += f"\n\nCurrently loaded document: '{document_filename}'. You must reference this document content when answering all user questions."
|
283 |
+
|
284 |
+
# λ¬Έμ λ΄μ©μ μ μ ν ν¬κΈ°λ‘ μ ν
|
285 |
+
max_doc_length = 4000 # μ΅λ 4000μλ‘ μ ν
|
286 |
+
if doc_length > max_doc_length:
|
287 |
+
# λ¬Έμκ° λ무 κΈ΄ κ²½μ° μ²μκ³Ό λ λΆλΆλ§ ν¬ν¨
|
288 |
+
doc_snippet = document_context[:2000] + "\n\n[... middle content omitted ...]\n\n" + document_context[-1500:]
|
289 |
+
enhanced_message = f"""Uploaded PDF document information:
|
290 |
+
- Filename: {document_filename}
|
291 |
+
- Document length: {doc_length} characters
|
292 |
+
|
293 |
+
Document content (excerpt):
|
294 |
+
{doc_snippet}
|
295 |
+
|
296 |
+
User question: {message}
|
297 |
+
|
298 |
+
Please answer the question based on the document above."""
|
299 |
+
else:
|
300 |
+
# μ§§μ λ¬Έμλ μ 체 ν¬ν¨
|
301 |
+
enhanced_message = f"""Uploaded PDF document information:
|
302 |
+
- Filename: {document_filename}
|
303 |
+
- Document length: {doc_length} characters
|
304 |
+
|
305 |
+
Document content:
|
306 |
+
{document_context}
|
307 |
+
|
308 |
+
User question: {message}
|
309 |
+
|
310 |
+
Please answer the question based on the document above."""
|
311 |
+
|
312 |
+
print(f"Enhanced message length: {len(enhanced_message)}")
|
313 |
+
print(f"Message preview (first 300 chars):\n{enhanced_message[:300]}...")
|
314 |
+
|
315 |
+
# λλ²κ·Έ: μ΅μ’
λ©μμ§ νμΌλ‘ μ μ₯ (νμΈμ©)
|
316 |
+
with open("debug_last_message.txt", "w", encoding="utf-8") as f:
|
317 |
+
f.write(f"=== Debug Information ===\n")
|
318 |
+
f.write(f"Document length: {len(document_context)}\n")
|
319 |
+
f.write(f"Filename: {document_filename}\n")
|
320 |
+
f.write(f"User question: {message}\n")
|
321 |
+
f.write(f"\n=== Message to be sent ===\n")
|
322 |
+
f.write(enhanced_message)
|
323 |
+
else:
|
324 |
+
# λ¬Έμκ° μλ κ²½μ°
|
325 |
+
enhanced_message = message
|
326 |
+
if any(keyword in message.lower() for keyword in ["document", "pdf", "upload", "file", "content", "summary", "λ¬Έμ", "μμ½"]):
|
327 |
+
enhanced_message = f"{message}\n\n[System message: No PDF document is currently uploaded. Please upload a PDF file first.]"
|
328 |
+
print("Document-related question but no document loaded")
|
329 |
+
|
330 |
+
# λλ²κ·Έ λ©μμ§
|
331 |
+
print("β οΈ Warning: document_context is empty!")
|
332 |
+
print(f"document_context type: {type(document_context)}")
|
333 |
+
print(f"document_context value: {repr(document_context)}")
|
334 |
+
print(f"document_filename: {document_filename}")
|
335 |
+
|
336 |
+
settings = provider.get_provider_default_settings()
|
337 |
+
settings.temperature = temperature
|
338 |
+
settings.top_k = top_k
|
339 |
+
settings.top_p = top_p
|
340 |
+
settings.max_tokens = max_tokens
|
341 |
+
settings.repeat_penalty = repeat_penalty
|
342 |
+
settings.stream = True
|
343 |
+
|
344 |
+
# μμ€ν
ν둬ννΈμ λ¬Έμ λ΄μ© μ§μ ν¬ν¨ (λ¬Έμκ° μλ κ²½μ°)
|
345 |
+
if document_context and len(document_context) > 0:
|
346 |
+
doc_snippet = document_context[:3000] # μ²μ 3000μλ§ μ¬μ©
|
347 |
+
enhanced_system_prompt = f"""{system_prompt}
|
348 |
+
|
349 |
+
Currently loaded PDF document:
|
350 |
+
Filename: {document_filename}
|
351 |
+
Document content:
|
352 |
+
{doc_snippet}
|
353 |
+
{'' if len(document_context) <= 3000 else '... (remainder omitted)'}
|
354 |
+
|
355 |
+
Answer user questions based on the document content above."""
|
356 |
+
|
357 |
+
# μ¬μ©μ λ©μμ§λ λ¨μνκ²
|
358 |
+
final_message = message
|
359 |
+
else:
|
360 |
+
enhanced_system_prompt = system_prompt
|
361 |
+
final_message = enhanced_message
|
362 |
+
|
363 |
+
agent = LlamaCppAgent(
|
364 |
+
provider,
|
365 |
+
system_prompt=enhanced_system_prompt,
|
366 |
+
predefined_messages_formatter_type=chat_template,
|
367 |
+
debug_output=True
|
368 |
+
)
|
369 |
+
|
370 |
+
messages = BasicChatHistory()
|
371 |
+
|
372 |
+
# μ΄μ λν κΈ°λ‘ μΆκ° (μμ λ¨)
|
373 |
+
for i in range(0, len(history)):
|
374 |
+
# νμ¬ λ©μμ§λ μ μΈ
|
375 |
+
if i < len(history) - 1 and history[i][1] is not None:
|
376 |
+
# μ¬μ©μ λ©μμ§
|
377 |
+
messages.add_message({
|
378 |
+
'role': Roles.user,
|
379 |
+
'content': history[i][0]
|
380 |
+
})
|
381 |
+
# μ΄μμ€ν΄νΈ λ©μμ§
|
382 |
+
messages.add_message({
|
383 |
+
'role': Roles.assistant,
|
384 |
+
'content': history[i][1]
|
385 |
+
})
|
386 |
+
|
387 |
+
print(f"Sending final message: {final_message}")
|
388 |
+
|
389 |
+
# μ€νΈλ¦Ό μλ΅ μμ±
|
390 |
+
try:
|
391 |
+
stream = agent.get_chat_response(
|
392 |
+
final_message, # λ¨μν λ©μμ§ μ¬μ©
|
393 |
+
llm_sampling_settings=settings,
|
394 |
+
chat_history=messages,
|
395 |
+
returns_streaming_generator=True,
|
396 |
+
print_output=False
|
397 |
+
)
|
398 |
+
|
399 |
+
outputs = ""
|
400 |
+
for output in stream:
|
401 |
+
outputs += output
|
402 |
+
yield outputs
|
403 |
+
except Exception as e:
|
404 |
+
print(f"Error during stream generation: {e}")
|
405 |
+
yield "Sorry, an error occurred while generating the response. Please try again."
|
406 |
+
|
407 |
+
def clear_document_context():
|
408 |
+
"""Clear document context"""
|
409 |
+
global document_context, document_filename
|
410 |
+
document_context = ""
|
411 |
+
document_filename = ""
|
412 |
+
return "π Document context has been cleared. Please upload a new PDF."
|
413 |
+
|
414 |
+
def check_document_status():
|
415 |
+
"""Check current document status"""
|
416 |
+
global document_context, document_filename
|
417 |
+
print(f"\n=== Document Status Check ===")
|
418 |
+
print(f"document_context type: {type(document_context)}")
|
419 |
+
print(f"document_context length: {len(document_context) if document_context else 0}")
|
420 |
+
print(f"document_filename: '{document_filename}'")
|
421 |
+
|
422 |
+
if document_context and len(document_context) > 0:
|
423 |
+
status = f"β
Document loaded successfully.\nπ Filename: {document_filename}\nπ Document length: {len(document_context):,} characters"
|
424 |
+
print(f"Document first 100 chars: {document_context[:100]}")
|
425 |
+
return status
|
426 |
+
else:
|
427 |
+
return "π No document loaded. Please upload a PDF file."
|
428 |
+
|
429 |
+
# Gradio μΈν°νμ΄μ€ ꡬμ±
|
430 |
+
with gr.Blocks(theme=gr.themes.Soft(
|
431 |
+
primary_hue="blue",
|
432 |
+
secondary_hue="cyan",
|
433 |
+
neutral_hue="gray",
|
434 |
+
font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]
|
435 |
+
).set(
|
436 |
+
body_background_fill="#f8f9fa",
|
437 |
+
block_background_fill="#ffffff",
|
438 |
+
block_border_width="1px",
|
439 |
+
block_title_background_fill="#e9ecef",
|
440 |
+
input_background_fill="#ffffff",
|
441 |
+
button_secondary_background_fill="#e9ecef",
|
442 |
+
border_color_accent="#dee2e6",
|
443 |
+
border_color_primary="#ced4da",
|
444 |
+
background_fill_secondary="#f8f9fa",
|
445 |
+
color_accent_soft="transparent",
|
446 |
+
code_background_fill="#f1f3f5",
|
447 |
+
), css=css) as demo:
|
448 |
+
|
449 |
+
gr.Markdown("# On-Premise Optimized 'LLM+RAG Model' Service by VIDraft")
|
450 |
+
gr.Markdown("π **Advanced document analysis with state-of-the-art language model for accurate Q&A based on your PDF content**")
|
451 |
+
gr.Markdown("π **Supports both English and Korean languages seamlessly with context-aware responses**")
|
452 |
+
gr.Markdown("π‘ **How to use**: 1) Upload PDF below β 2) Ask questions about the document β 3) Get AI-powered answers")
|
453 |
+
|
454 |
+
# μ±ν
μΈν°νμ΄μ€λ₯Ό μμͺ½μ λ°°μΉ
|
455 |
+
with gr.Row():
|
456 |
+
with gr.Column():
|
457 |
+
# μ±ν
μΈν°νμ΄μ€
|
458 |
+
chatbot = gr.Chatbot(elem_id="chatbot", height=500)
|
459 |
+
msg = gr.Textbox(
|
460 |
+
label="Message Input",
|
461 |
+
placeholder="Enter your question... (Upload a PDF to ask questions about its content)",
|
462 |
+
lines=2
|
463 |
+
)
|
464 |
+
with gr.Row():
|
465 |
+
submit = gr.Button("Send", variant="primary")
|
466 |
+
clear_chat = gr.Button("Clear Chat")
|
467 |
+
|
468 |
+
# μμ λ₯Ό μ€κ°μ λ°°μΉ - μμ΄μ νκ΅μ΄ μμ λͺ¨λ ν¬ν¨
|
469 |
+
gr.Examples(
|
470 |
+
examples=[
|
471 |
+
["What is this document about?"],
|
472 |
+
["Please summarize the main contents of the uploaded PDF document."],
|
473 |
+
["What are the key dates or deadlines mentioned in the document?"],
|
474 |
+
["What are the 3 most important key points in this document?"],
|
475 |
+
["μ΄ λ¬Έμμ μ£Όμ λ΄μ©μ μμ½ν΄μ£ΌμΈμ."],
|
476 |
+
["λ¬Έμμ λμ¨ μ€μν μΌμ μ΄λ λ μ§λ₯Ό μλ €μ£ΌμΈμ."],
|
477 |
+
["μ΄ λ¬Έμμμ κ°μ₯ μ€μν 3κ°μ§ ν΅μ¬ ν¬μΈνΈλ 무μμΈκ°μ?"]
|
478 |
+
],
|
479 |
+
inputs=msg
|
480 |
+
)
|
481 |
+
|
482 |
+
# PDF μ
λ‘λ μΉμ
μ μλμͺ½μ λ°°μΉ
|
483 |
+
with gr.Accordion("π PDF Document Upload", open=True):
|
484 |
+
with gr.Row():
|
485 |
+
with gr.Column(scale=1):
|
486 |
+
file_input = gr.File(
|
487 |
+
label="Select PDF Document",
|
488 |
+
file_types=[".pdf"],
|
489 |
+
type="filepath"
|
490 |
+
)
|
491 |
+
with gr.Row():
|
492 |
+
convert_button = gr.Button("Convert Document", variant="primary")
|
493 |
+
clear_button = gr.Button("Clear Document", variant="secondary")
|
494 |
+
test_button = gr.Button("Test Document", variant="secondary")
|
495 |
+
|
496 |
+
status_text = gr.Textbox(
|
497 |
+
label="Document Status",
|
498 |
+
interactive=False,
|
499 |
+
value=check_document_status(),
|
500 |
+
lines=3
|
501 |
+
)
|
502 |
+
|
503 |
+
with gr.Column(scale=1):
|
504 |
+
with gr.Accordion("Converted Document Preview", open=False):
|
505 |
+
converted_text = gr.Textbox(
|
506 |
+
label="Markdown Conversion Result",
|
507 |
+
lines=10,
|
508 |
+
max_lines=20,
|
509 |
+
interactive=False
|
510 |
+
)
|
511 |
+
metadata_output = gr.JSON(label="Metadata")
|
512 |
+
|
513 |
+
# κ³ κΈ μ€μ μ κ°μ₯ μλμ λ°°μΉ
|
514 |
+
with gr.Accordion("βοΈ Advanced Settings", open=False):
|
515 |
+
system_message = gr.Textbox(
|
516 |
+
value="You are an AI assistant that can answer in both English and Korean. When a PDF document is provided, analyze its content accurately and provide detailed answers. Respond in the same language as the user's question.",
|
517 |
+
label="System Message",
|
518 |
+
lines=3
|
519 |
+
)
|
520 |
+
max_tokens = gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max Tokens")
|
521 |
+
temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.3, step=0.1, label="Temperature (lower = more consistent)")
|
522 |
+
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.90, step=0.05, label="Top-p")
|
523 |
+
top_k = gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k")
|
524 |
+
repeat_penalty = gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty")
|
525 |
+
|
526 |
+
# μ΄λ²€νΈ νΈλ€λ¬
|
527 |
+
def user_submit(message, history):
|
528 |
+
return "", history + [[message, None]]
|
529 |
+
|
530 |
+
def bot_response(history, system_msg, max_tok, temp, top_p_val, top_k_val, rep_pen):
|
531 |
+
if history and history[-1][1] is None:
|
532 |
+
user_message = history[-1][0]
|
533 |
+
|
534 |
+
# λλ²κΉ
: λ¬Έμ 컨ν
μ€νΈ μν νμΈ
|
535 |
+
global document_context, document_filename
|
536 |
+
print(f"\n=== BOT RESPONSE Started ===")
|
537 |
+
print(f"User message: {user_message}")
|
538 |
+
if document_context:
|
539 |
+
print(f"π Document context active: {document_filename} ({len(document_context)} chars)")
|
540 |
+
print(f"Document preview (first 200 chars): {document_context[:200]}...")
|
541 |
+
else:
|
542 |
+
print("π No document context")
|
543 |
+
|
544 |
+
# λ¨μν νμ μ¬μ© - [user_message, assistant_message]
|
545 |
+
previous_history = []
|
546 |
+
for i in range(len(history) - 1):
|
547 |
+
if history[i][1] is not None:
|
548 |
+
previous_history.append({
|
549 |
+
"user": history[i][0],
|
550 |
+
"assistant": history[i][1]
|
551 |
+
})
|
552 |
+
|
553 |
+
print(f"Previous conversations: {len(previous_history)}")
|
554 |
+
|
555 |
+
# λ¬Έμκ° μλ κ²½μ° νΉλ³ μ²λ¦¬
|
556 |
+
if document_context and len(document_context) > 0:
|
557 |
+
print(f"π Generating document-based response... (Document length: {len(document_context)})")
|
558 |
+
|
559 |
+
bot_message = ""
|
560 |
+
try:
|
561 |
+
for token in respond(
|
562 |
+
user_message,
|
563 |
+
previous_history,
|
564 |
+
system_msg,
|
565 |
+
max_tok,
|
566 |
+
temp,
|
567 |
+
top_p_val,
|
568 |
+
top_k_val,
|
569 |
+
rep_pen
|
570 |
+
):
|
571 |
+
bot_message = token
|
572 |
+
history[-1][1] = bot_message
|
573 |
+
yield history
|
574 |
+
except Exception as e:
|
575 |
+
print(f"β Error during response generation: {e}")
|
576 |
+
import traceback
|
577 |
+
traceback.print_exc()
|
578 |
+
history[-1][1] = "Sorry, an error occurred while generating the response. Please try again."
|
579 |
+
yield history
|
580 |
+
|
581 |
+
# PDF λ³ν μ΄λ²€νΈ
|
582 |
+
def on_pdf_convert(file):
|
583 |
+
"""PDF conversion and status update"""
|
584 |
+
global document_context, document_filename
|
585 |
+
|
586 |
+
if file is None:
|
587 |
+
return "", {}, "β No file selected."
|
588 |
+
|
589 |
+
markdown_content, metadata = convert_pdf_to_markdown(file)
|
590 |
+
|
591 |
+
if "error" in metadata:
|
592 |
+
status = f"β Conversion failed: {metadata['error']}"
|
593 |
+
else:
|
594 |
+
# μ μ λ³μ λ€μ νλ² νμΈ λ° μ€μ (globals() μ¬μ©)
|
595 |
+
globals()['document_context'] = markdown_content
|
596 |
+
globals()['document_filename'] = metadata['filename']
|
597 |
+
|
598 |
+
status = f"β
PDF document converted successfully!\nπ Filename: {metadata['filename']}\nπ Document length: {metadata['content_length']:,} characters\n\nYou can now ask questions about the document content in English or Korean.\n\nExample questions:\n- What is the main topic of this document?\n- Summarize the key points\n- μ΄ λ¬Έμμ ν΅μ¬ λ΄μ©μ μ€λͺ
ν΄μ£ΌμΈμ"
|
599 |
+
|
600 |
+
print(f"\nβ
Document loading confirmed:")
|
601 |
+
print(f"- globals()['document_context'] length: {len(globals()['document_context'])}")
|
602 |
+
print(f"- globals()['document_filename']: {globals()['document_filename']}")
|
603 |
+
|
604 |
+
# μ΅μ’
νμΈ
|
605 |
+
if len(globals()['document_context']) > 0:
|
606 |
+
print("β
Document successfully saved to global variables!")
|
607 |
+
else:
|
608 |
+
print("β Warning: Document not saved to global variables!")
|
609 |
+
|
610 |
+
return markdown_content, metadata, status
|
611 |
+
|
612 |
+
# νμΌ μ
λ‘λ μ μλ λ³ν
|
613 |
+
file_input.change(
|
614 |
+
fn=on_pdf_convert,
|
615 |
+
inputs=[file_input],
|
616 |
+
outputs=[converted_text, metadata_output, status_text]
|
617 |
+
)
|
618 |
+
|
619 |
+
# μλ λ³ν λ²νΌ
|
620 |
+
convert_button.click(
|
621 |
+
fn=on_pdf_convert,
|
622 |
+
inputs=[file_input],
|
623 |
+
outputs=[converted_text, metadata_output, status_text]
|
624 |
+
)
|
625 |
+
|
626 |
+
# λ¬Έμ ν
μ€νΈ ν¨μ
|
627 |
+
def test_document():
|
628 |
+
"""Test currently loaded document"""
|
629 |
+
global document_context, document_filename
|
630 |
+
if document_context:
|
631 |
+
test_msg = f"β
Document test results:\n"
|
632 |
+
test_msg += f"π Filename: {document_filename}\n"
|
633 |
+
test_msg += f"π Total length: {len(document_context):,} characters\n"
|
634 |
+
test_msg += f"π First 500 characters:\n{document_context[:500]}..."
|
635 |
+
return test_msg
|
636 |
+
else:
|
637 |
+
return "β No document currently loaded."
|
638 |
+
|
639 |
+
test_button.click(
|
640 |
+
fn=test_document,
|
641 |
+
outputs=[status_text]
|
642 |
+
)
|
643 |
+
|
644 |
+
clear_button.click(
|
645 |
+
fn=clear_document_context,
|
646 |
+
outputs=[status_text]
|
647 |
+
).then(
|
648 |
+
fn=lambda: ("", {}, check_document_status()),
|
649 |
+
outputs=[converted_text, metadata_output, status_text]
|
650 |
+
)
|
651 |
+
|
652 |
+
# μ±ν
μ΄λ²€νΈ
|
653 |
+
msg.submit(user_submit, [msg, chatbot], [msg, chatbot]).then(
|
654 |
+
bot_response,
|
655 |
+
[chatbot, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty],
|
656 |
+
chatbot
|
657 |
+
)
|
658 |
+
|
659 |
+
submit.click(user_submit, [msg, chatbot], [msg, chatbot]).then(
|
660 |
+
bot_response,
|
661 |
+
[chatbot, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty],
|
662 |
+
chatbot
|
663 |
+
)
|
664 |
+
|
665 |
+
clear_chat.click(lambda: [], None, chatbot)
|
666 |
+
|
667 |
+
if __name__ == "__main__":
|
668 |
+
# νμν λλ ν 리 μμ±
|
669 |
+
os.makedirs("./models", exist_ok=True)
|
670 |
+
|
671 |
+
# νκ²½ λ³μ νμΈ
|
672 |
+
if not HF_TOKEN:
|
673 |
+
print("β οΈ Warning: HF_TOKEN not set. Model download may be restricted.")
|
674 |
+
print("To set environment variable: export HF_TOKEN='your_huggingface_token'")
|
675 |
+
|
676 |
+
demo.launch(
|
677 |
+
server_name="0.0.0.0", # Accessible from local network
|
678 |
+
server_port=7860,
|
679 |
+
share=False # Disabled for on-premise environment
|
680 |
+
)
|