Mistral-RAG-BitSix

Running on Zero

App Files Files Community

Mistral-RAG-BitSix / app.py

openfree

Rename app-backup2.py to app.py

3e1eea4 verified 5 days ago

raw

history blame contribute delete

26 kB

	import spaces
	import json
	import subprocess
	import os
	from llama_cpp import Llama
	from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
	from llama_cpp_agent.providers import LlamaCppPythonProvider
	from llama_cpp_agent.chat_history import BasicChatHistory
	from llama_cpp_agent.chat_history.messages import Roles
	import gradio as gr
	from huggingface_hub import hf_hub_download
	import tempfile
	from typing import List, Tuple, Optional

	# PDF 처리 라이브러리 조건부 import
	try:
	from docling.document_converter import DocumentConverter
	DOCLING_AVAILABLE = True
	except ImportError:
	DOCLING_AVAILABLE = False
	print("Docling not available, using alternative PDF processing")
	try:
	import PyPDF2
	import pdfplumber
	except ImportError:
	print("Warning: PDF processing libraries not fully installed")

	# 환경 변수에서 HF_TOKEN 가져오기
	HF_TOKEN = os.getenv("HF_TOKEN")

	# 전역 변수 초기화 (중요!)
	llm = None
	llm_model = None
	document_context = "" # PDF에서 추출한 문서 컨텍스트 저장
	document_filename = "" # 현재 로드된 문서의 파일명

	print("전역 변수 초기화 완료")
	print(f"document_context 초기값: '{document_context}'")
	print(f"document_filename 초기값: '{document_filename}'")

	# 모델 이름과 경로를 정의
	MISTRAL_MODEL_NAME = "Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf"

	# 모델 다운로드 (HF_TOKEN 사용)
	model_path = hf_hub_download(
	repo_id="ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503",
	filename=MISTRAL_MODEL_NAME,
	local_dir="./models",
	token=HF_TOKEN
	)

	print(f"Downloaded model path: {model_path}")

	css = """
	.bubble-wrap {
	padding-top: calc(var(--spacing-xl) * 3) !important;
	}
	.message-row {
	justify-content: space-evenly !important;
	width: 100% !important;
	max-width: 100% !important;
	margin: calc(var(--spacing-xl)) 0 !important;
	padding: 0 calc(var(--spacing-xl) * 3) !important;
	}
	.flex-wrap.user {
	border-bottom-right-radius: var(--radius-lg) !important;
	}
	.flex-wrap.bot {
	border-bottom-left-radius: var(--radius-lg) !important;
	}
	.message.user{
	padding: 10px;
	}
	.message.bot{
	text-align: right;
	width: 100%;
	padding: 10px;
	border-radius: 10px;
	}
	.message-bubble-border {
	border-radius: 6px !important;
	}
	.message-buttons {
	justify-content: flex-end !important;
	}
	.message-buttons-left {
	align-self: end !important;
	}
	.message-buttons-bot, .message-buttons-user {
	right: 10px !important;
	left: auto !important;
	bottom: 2px !important;
	}
	.dark.message-bubble-border {
	border-color: #343140 !important;
	}
	.dark.user {
	background: #1e1c26 !important;
	}
	.dark.assistant.dark, .dark.pending.dark {
	background: #16141c !important;
	}
	.upload-container {
	margin-bottom: 20px;
	padding: 15px;
	border: 2px dashed #666;
	border-radius: 10px;
	background-color: #f0f0f0;
	}
	.dark .upload-container {
	background-color: #292733;
	border-color: #444;
	}
	"""

	def get_messages_formatter_type(model_name):
	if "Mistral" in model_name or "BitSix" in model_name:
	return MessagesFormatterType.MISTRAL # CHATML 대신 MISTRAL 형식 사용
	else:
	raise ValueError(f"Unsupported model: {model_name}")

	@spaces.GPU
	def convert_pdf_to_markdown(file):
	"""PDF 파일을 Markdown으로 변환"""
	global document_context, document_filename

	if file is None:
	return "파일이 업로드되지 않았습니다.", {}

	try:
	print(f"\n=== PDF 변환 시작 ===")
	print(f"파일 경로: {file.name}")

	# DocumentConverter 인스턴스 생성
	converter = DocumentConverter()

	# 파일 변환
	result = converter.convert(file.name)

	# Markdown으로 내보내기
	markdown_content = result.document.export_to_markdown()

	# 문서 컨텍스트 업데이트 (중요!)
	document_context = markdown_content
	document_filename = os.path.basename(file.name)

	# 메타데이터 추출
	metadata = {
	"filename": document_filename,
	"conversion_status": "success",
	"content_length": len(markdown_content),
	"preview": markdown_content[:500] + "..." if len(markdown_content) > 500 else markdown_content
	}

	print(f"✅ PDF 변환 성공!")
	print(f"📄 파일명: {document_filename}")
	print(f"📏 문서 길이: {len(markdown_content)} 문자")
	print(f"📝 문서 시작 300자:\n{markdown_content[:300]}...")
	print(f"=== PDF 변환 완료 ===\n")

	# 전역 변수 확인 및 강제 설정
	print(f"\n=== 전역 변수 설정 전 ===")
	print(f"global document_context 길이: {len(document_context)}")
	print(f"global document_filename: {document_filename}")

	# globals() 함수를 사용하여 강제로 전역 변수 설정
	globals()['document_context'] = markdown_content
	globals()['document_filename'] = document_filename

	print(f"\n=== 전역 변수 설정 후 ===")
	print(f"global document_context 길이: {len(globals()['document_context'])}")
	print(f"global document_filename: {globals()['document_filename']}")

	return markdown_content, metadata

	except Exception as e:
	error_msg = f"PDF 변환 중 오류 발생: {str(e)}"
	print(f"❌ {error_msg}")
	document_context = ""
	document_filename = ""
	return error_msg, {"error": str(e)}

	def find_relevant_chunks(document, query, chunk_size=1500, overlap=300):
	"""문서에서 질문과 관련된 청크 찾기"""
	if not document:
	return ""

	print(f"관련 청크 찾기 시작 - 쿼리: {query}")

	# 간단한 키워드 기반 검색
	query_words = query.lower().split()
	chunks = []

	# 문서를 청크로 나누기
	for i in range(0, len(document), chunk_size - overlap):
	chunk = document[i:i + chunk_size]
	chunks.append((i, chunk))

	print(f"총 {len(chunks)}개의 청크로 분할됨")

	# 각 청크의 관련성 점수 계산
	scored_chunks = []
	for idx, chunk in chunks:
	chunk_lower = chunk.lower()
	score = sum(1 for word in query_words if word in chunk_lower)
	if score > 0:
	scored_chunks.append((score, idx, chunk))

	# 상위 2개 청크 선택 (메모리 절약)
	scored_chunks.sort(reverse=True, key=lambda x: x[0])
	relevant_chunks = scored_chunks[:2]

	if relevant_chunks:
	result = ""
	for score, idx, chunk in relevant_chunks:
	result += f"\n[문서의 {idx}번째 위치에서 발췌 - 관련도: {score}]\n{chunk}\n"
	print(f"{len(relevant_chunks)}개의 관련 청크 찾음")
	return result
	else:
	# 관련 청크를 찾지 못한 경우 문서 시작 부분 반환
	print("관련 청크를 찾지 못함, 문서 시작 부분 반환")
	return document[:2000]

	@spaces.GPU(duration=120)
	def respond(
	message,
	history: list[dict],
	system_message,
	max_tokens,
	temperature,
	top_p,
	top_k,
	repeat_penalty,
	):
	global llm, llm_model

	# globals()를 사용하여 전역 변수에 접근
	document_context = globals().get('document_context', '')
	document_filename = globals().get('document_filename', '')

	# 디버깅을 위한 상세 로그
	print(f"\n=== RESPOND 함수 시작 ===")
	print(f"사용자 메시지: {message}")
	print(f"문서 컨텍스트 존재 여부: {bool(document_context)}")
	if document_context:
	print(f"문서 길이: {len(document_context)}")
	print(f"문서 파일명: {document_filename}")
	print(f"문서 시작 100자: {document_context[:100]}...")
	else:
	print("⚠️ document_context가 비어있습니다!")
	print(f"globals()의 키들: {list(globals().keys())[:20]}...") # 처음 20개 키만

	chat_template = get_messages_formatter_type(MISTRAL_MODEL_NAME)

	# 모델 파일 경로 확인
	model_path_local = os.path.join("./models", MISTRAL_MODEL_NAME)

	if llm is None or llm_model != MISTRAL_MODEL_NAME:
	print("LLM 모델 로딩 중...")
	llm = Llama(
	model_path=model_path_local,
	flash_attn=True,
	n_gpu_layers=81,
	n_batch=1024,
	n_ctx=16384, # 컨텍스트 크기
	verbose=True # 디버깅을 위한 상세 로그
	)
	llm_model = MISTRAL_MODEL_NAME
	print("LLM 모델 로딩 완료!")

	provider = LlamaCppPythonProvider(llm)

	# 한국어 답변을 위한 기본 시스템 메시지
	korean_system_message = system_message # 사용자가 설정한 시스템 메시지 사용

	# 문서 컨텍스트가 있으면 시스템 메시지와 사용자 메시지 모두에 포함
	if document_context and len(document_context) > 0:
	doc_length = len(document_context)
	print(f"📄 문서 컨텍스트를 메시지에 포함합니다: {doc_length} 문자")

	# 시스템 메시지에도 문서 정보 추가
	korean_system_message += f"\n\n현재 '{document_filename}' PDF 문서가 로드되어 있습니다. 사용자의 모든 질문에 대해 이 문서의 내용을 반드시 참조하여 답변하세요."

	# 문서 내용을 적절한 크기로 제한
	max_doc_length = 4000 # 최대 4000자로 제한
	if doc_length > max_doc_length:
	# 문서가 너무 긴 경우 처음과 끝 부분만 포함
	doc_snippet = document_context[:2000] + "\n\n[... 중간 내용 생략 ...]\n\n" + document_context[-1500:]
	enhanced_message = f"""업로드된 PDF 문서 정보:
	- 파일명: {document_filename}
	- 문서 길이: {doc_length} 문자

	문서 내용 (일부):
	{doc_snippet}

	사용자 질문: {message}

	위 문서를 참고하여 한국어로 답변해주세요."""
	else:
	# 짧은 문서는 전체 포함
	enhanced_message = f"""업로드된 PDF 문서 정보:
	- 파일명: {document_filename}
	- 문서 길이: {doc_length} 문자

	문서 내용:
	{document_context}

	사용자 질문: {message}

	위 문서를 참고하여 한국어로 답변해주세요."""

	print(f"강화된 메시지 길이: {len(enhanced_message)}")
	print(f"메시지 미리보기 (처음 300자):\n{enhanced_message[:300]}...")

	# 디버그: 최종 메시지 파일로 저장 (확인용)
	with open("debug_last_message.txt", "w", encoding="utf-8") as f:
	f.write(f"=== 디버그 정보 ===\n")
	f.write(f"문서 길이: {len(document_context)}\n")
	f.write(f"파일명: {document_filename}\n")
	f.write(f"사용자 질문: {message}\n")
	f.write(f"\n=== 전송될 메시지 ===\n")
	f.write(enhanced_message)
	else:
	# 문서가 없는 경우
	enhanced_message = message
	if any(keyword in message.lower() for keyword in ["문서", "pdf", "업로드", "파일", "내용", "요약"]):
	enhanced_message = f"{message}\n\n[시스템 메시지: 현재 업로드된 PDF 문서가 없습니다. PDF 파일을 먼저 업로드해주세요.]"
	print("문서 관련 질문이지만 문서가 없음")

	# 디버그 메시지
	print("⚠️ 경고: document_context가 비어있습니다!")
	print(f"document_context 타입: {type(document_context)}")
	print(f"document_context 값: {repr(document_context)}")
	print(f"document_filename: {document_filename}")

	settings = provider.get_provider_default_settings()
	settings.temperature = temperature
	settings.top_k = top_k
	settings.top_p = top_p
	settings.max_tokens = max_tokens
	settings.repeat_penalty = repeat_penalty
	settings.stream = True

	# 시스템 프롬프트에 문서 내용 직접 포함 (문서가 있는 경우)
	if document_context and len(document_context) > 0:
	doc_snippet = document_context[:3000] # 처음 3000자만 사용
	enhanced_system_prompt = f"""{korean_system_message}

	현재 로드된 PDF 문서:
	파일명: {document_filename}
	문서 내용:
	{doc_snippet}
	{'' if len(document_context) <= 3000 else '... (이하 생략)'}

	위 문서의 내용을 바탕으로 사용자의 질문에 답변하세요."""

	# 사용자 메시지는 단순하게
	final_message = message
	else:
	enhanced_system_prompt = korean_system_message
	final_message = enhanced_message

	agent = LlamaCppAgent(
	provider,
	system_prompt=enhanced_system_prompt,
	predefined_messages_formatter_type=chat_template,
	debug_output=True
	)

	messages = BasicChatHistory()

	# 이전 대화 기록 추가 (수정됨)
	for i in range(0, len(history)):
	# 현재 메시지는 제외
	if i < len(history) - 1 and history[i][1] is not None:
	# 사용자 메시지
	messages.add_message({
	'role': Roles.user,
	'content': history[i][0]
	})
	# 어시스턴트 메시지
	messages.add_message({
	'role': Roles.assistant,
	'content': history[i][1]
	})

	print(f"최종 메시지 전송 중: {final_message}")

	# 스트림 응답 생성
	try:
	stream = agent.get_chat_response(
	final_message, # 단순한 메시지 사용
	llm_sampling_settings=settings,
	chat_history=messages,
	returns_streaming_generator=True,
	print_output=False
	)

	outputs = ""
	for output in stream:
	outputs += output
	yield outputs
	except Exception as e:
	print(f"스트림 생성 중 오류: {e}")
	yield "죄송합니다. 응답 생성 중 오류가 발생했습니다. 다시 시도해주세요."

	def clear_document_context():
	"""문서 컨텍스트 초기화"""
	global document_context, document_filename
	document_context = ""
	document_filename = ""
	return "📭 문서 컨텍스트가 초기화되었습니다. 새로운 PDF를 업로드해주세요."

	def check_document_status():
	"""현재 문서 상태 확인"""
	global document_context, document_filename
	print(f"\n=== 문서 상태 확인 ===")
	print(f"document_context 타입: {type(document_context)}")
	print(f"document_context 길이: {len(document_context) if document_context else 0}")
	print(f"document_filename: '{document_filename}'")

	if document_context and len(document_context) > 0:
	status = f"✅ 문서가 로드되어 있습니다.\n📄 파일명: {document_filename}\n📏 문서 길이: {len(document_context):,} 문자"
	print(f"문서 첫 100자: {document_context[:100]}")
	return status
	else:
	return "📭 로드된 문서가 없습니다. PDF 파일을 업로드해주세요."

	# Gradio 인터페이스 구성
	with gr.Blocks(theme=gr.themes.Soft(
	primary_hue="blue",
	secondary_hue="cyan",
	neutral_hue="gray",
	font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]
	).set(
	body_background_fill="#f8f9fa",
	block_background_fill="#ffffff",
	block_border_width="1px",
	block_title_background_fill="#e9ecef",
	input_background_fill="#ffffff",
	button_secondary_background_fill="#e9ecef",
	border_color_accent="#dee2e6",
	border_color_primary="#ced4da",
	background_fill_secondary="#f8f9fa",
	color_accent_soft="transparent",
	code_background_fill="#f1f3f5",
	), css=css) as demo:

	gr.Markdown("# 온프레미스 최적화 'LLM+RAG 모델' 서비스 by VIDraft")
	gr.Markdown("📄 PDF 문서를 업로드하면 AI가 문서 내용을 분석하여 질문에 답변합니다.")
	gr.Markdown("💡 사용법: 1) 아래에서 PDF 업로드 → 2) 문서에 대한 질문 입력 → 3) AI가 한국어로 답변")

	# 채팅 인터페이스를 위쪽에 배치
	with gr.Row():
	with gr.Column():
	# 채팅 인터페이스
	chatbot = gr.Chatbot(elem_id="chatbot", height=500)
	msg = gr.Textbox(
	label="메시지 입력",
	placeholder="질문을 입력하세요... (PDF를 업로드하면 문서 내용에 대해 질문할 수 있습니다)",
	lines=2
	)
	with gr.Row():
	submit = gr.Button("전송", variant="primary")
	clear_chat = gr.Button("대화 초기화")

	# 예제를 중간에 배치
	gr.Examples(
	examples=[
	["이 문서는 무엇에 관한 내용인가요?"],
	["업로드한 PDF 문서의 주요 내용을 한국어로 요약해주세요."],
	["문서에 나온 일정을 알려주세요."],
	["문서에서 가장 중요한 3가지 핵심 포인트는 무엇인가요?"],
	["이 행사의 개요를 설명해주세요."]
	],
	inputs=msg
	)

	# PDF 업로드 섹션을 아래쪽에 배치
	with gr.Accordion("📄 PDF 문서 업로드", open=True):
	with gr.Row():
	with gr.Column(scale=1):
	file_input = gr.File(
	label="PDF 문서 선택",
	file_types=[".pdf"],
	type="filepath"
	)
	with gr.Row():
	convert_button = gr.Button("문서 변환", variant="primary")
	clear_button = gr.Button("문서 초기화", variant="secondary")
	test_button = gr.Button("문서 테스트", variant="secondary")

	status_text = gr.Textbox(
	label="문서 상태",
	interactive=False,
	value=check_document_status(),
	lines=3
	)

	with gr.Column(scale=1):
	with gr.Accordion("변환된 문서 미리보기", open=False):
	converted_text = gr.Textbox(
	label="Markdown 변환 결과",
	lines=10,
	max_lines=20,
	interactive=False
	)
	metadata_output = gr.JSON(label="메타데이터")

	# 고급 설정을 가장 아래에 배치
	with gr.Accordion("⚙️ 고급 설정", open=False):
	system_message = gr.Textbox(
	value="당신은 한국어로 답변하는 AI 어시스턴트입니다. PDF 문서가 제공되면 그 내용을 정확히 분석하여 답변합니다.",
	label="시스템 메시지",
	lines=3
	)
	max_tokens = gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="최대 토큰 수")
	temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.3, step=0.1, label="Temperature (낮을수록 일관성 있음)")
	top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.90, step=0.05, label="Top-p")
	top_k = gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k")
	repeat_penalty = gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty")

	# 이벤트 핸들러
	def user_submit(message, history):
	return "", history + [[message, None]]

	def bot_response(history, system_msg, max_tok, temp, top_p_val, top_k_val, rep_pen):
	if history and history[-1][1] is None:
	user_message = history[-1][0]

	# 디버깅: 문서 컨텍스트 상태 확인
	global document_context, document_filename
	print(f"\n=== BOT RESPONSE 시작 ===")
	print(f"사용자 메시지: {user_message}")
	if document_context:
	print(f"📄 문서 컨텍스트 활성: {document_filename} ({len(document_context)} 문자)")
	print(f"문서 첫 200자: {document_context[:200]}...")
	else:
	print("📭 문서 컨텍스트 없음")

	# 단순한 형식 사용 - [user_message, assistant_message]
	previous_history = []
	for i in range(len(history) - 1):
	if history[i][1] is not None:
	previous_history.append({
	"user": history[i][0],
	"assistant": history[i][1]
	})

	print(f"이전 대화 수: {len(previous_history)}")

	# 문서가 있는 경우 특별 처리
	if document_context and len(document_context) > 0:
	print(f"📄 문서 기반 응답 생성 중... (문서 길이: {len(document_context)})")

	bot_message = ""
	try:
	for token in respond(
	user_message,
	previous_history,
	system_msg,
	max_tok,
	temp,
	top_p_val,
	top_k_val,
	rep_pen
	):
	bot_message = token
	history[-1][1] = bot_message
	yield history
	except Exception as e:
	print(f"❌ 응답 생성 중 오류: {e}")
	import traceback
	traceback.print_exc()
	history[-1][1] = "죄송합니다. 응답 생성 중 오류가 발생했습니다. 다시 시도해주세요."
	yield history

	# PDF 변환 이벤트
	def on_pdf_convert(file):
	"""PDF 변환 및 상태 업데이트"""
	global document_context, document_filename

	if file is None:
	return "", {}, "❌ 파일이 선택되지 않았습니다."

	markdown_content, metadata = convert_pdf_to_markdown(file)

	if "error" in metadata:
	status = f"❌ 변환 실패: {metadata['error']}"
	else:
	# 전역 변수 다시 한번 확인 및 설정 (globals() 사용)
	globals()['document_context'] = markdown_content
	globals()['document_filename'] = metadata['filename']

	status = f"✅ PDF 문서가 성공적으로 변환되었습니다!\n📄 파일명: {metadata['filename']}\n📏 문서 길이: {metadata['content_length']:,} 문자\n\n이제 문서 내용에 대해 한국어로 질문하실 수 있습니다.\n\n예시 질문:\n- 이 문서의 주요 내용을 요약해주세요\n- 문서에 나온 핵심 개념을 설명해주세요"

	print(f"\n✅ 문서 로드 완료 확인:")
	print(f"- globals()['document_context'] 길이: {len(globals()['document_context'])}")
	print(f"- globals()['document_filename']: {globals()['document_filename']}")

	# 최종 확인
	if len(globals()['document_context']) > 0:
	print("✅ 문서가 성공적으로 전역 변수에 저장되었습니다!")
	else:
	print("❌ 경고: 문서가 전역 변수에 저장되지 않았습니다!")

	return markdown_content, metadata, status

	# 파일 업로드 시 자동 변환
	file_input.change(
	fn=on_pdf_convert,
	inputs=[file_input],
	outputs=[converted_text, metadata_output, status_text]
	)

	# 수동 변환 버튼
	convert_button.click(
	fn=on_pdf_convert,
	inputs=[file_input],
	outputs=[converted_text, metadata_output, status_text]
	)

	# 문서 테스트 함수
	def test_document():
	"""현재 로드된 문서 테스트"""
	global document_context, document_filename
	if document_context:
	test_msg = f"✅ 문서 테스트 결과:\n"
	test_msg += f"📄 파일명: {document_filename}\n"
	test_msg += f"📏 전체 길이: {len(document_context):,} 문자\n"
	test_msg += f"📝 첫 500자:\n{document_context[:500]}..."
	return test_msg
	else:
	return "❌ 현재 로드된 문서가 없습니다."

	test_button.click(
	fn=test_document,
	outputs=[status_text]
	)

	clear_button.click(
	fn=clear_document_context,
	outputs=[status_text]
	).then(
	fn=lambda: ("", {}, check_document_status()),
	outputs=[converted_text, metadata_output, status_text]
	)

	# 채팅 이벤트
	msg.submit(user_submit, [msg, chatbot], [msg, chatbot]).then(
	bot_response,
	[chatbot, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty],
	chatbot
	)

	submit.click(user_submit, [msg, chatbot], [msg, chatbot]).then(
	bot_response,
	[chatbot, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty],
	chatbot
	)

	clear_chat.click(lambda: [], None, chatbot)

	if __name__ == "__main__":
	# 필요한 디렉토리 생성
	os.makedirs("./models", exist_ok=True)

	# 환경 변수 확인
	if not HF_TOKEN:
	print("⚠️ 경고: HF_TOKEN이 설정되지 않았습니다. 모델 다운로드에 제한이 있을 수 있습니다.")
	print("환경 변수를 설정하려면: export HF_TOKEN='your_huggingface_token'")

	demo.launch(
	server_name="0.0.0.0", # 로컬 네트워크에서 접근 가능
	server_port=7860,
	share=False # 온프레미스 환경이므로 공유 비활성화
	)