openfree commited on
Commit
bbebfbc
ยท
verified ยท
1 Parent(s): 6d2ef84

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +644 -252
app.py CHANGED
@@ -1,285 +1,677 @@
1
- import os
 
2
  import subprocess
3
- import streamlit as st
4
- from huggingface_hub import snapshot_download, login, HfApi
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- if "quantized_model_path" not in st.session_state:
7
- st.session_state.quantized_model_path = None
8
- if "upload_to_hf" not in st.session_state:
9
- st.session_state.upload_to_hf = False
10
 
11
- def check_directory_path(directory_name: str) -> str:
12
- if os.path.exists(directory_name):
13
- path = os.path.abspath(directory_name)
14
- return str(path)
 
15
 
16
- models_list = ['deepseek-ai/DeepSeek-R1', 'deepseek-ai/DeepSeek-V3',
17
- 'mistralai/Mistral-Small-24B-Instruct-2501', 'simplescaling/s1-32B',
18
- 'deepseek-ai/DeepSeek-R1-Distill-Qwen-32B', 'deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B',
19
- 'deepseek-ai/DeepSeek-R1-Distill-Llama-70B', 'deepseek-ai/DeepSeek-R1-Zero', 'deepseek-ai/DeepSeek-R1-Distill-Llama-8B',
20
- 'deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', 'm-a-p/YuE-s1-7B-anneal-en-cot', 'deepseek-ai/DeepSeek-R1-Distill-Qwen-14B',
21
- 'microsoft/phi-4', 'huihui-ai/DeepSeek-R1-Distill-Qwen-32B-abliterated', 'meta-llama/Llama-3.3-70B-Instruct',
22
- 'cognitivecomputations/Dolphin3.0-R1-Mistral-24B', 'allenai/Llama-3.1-Tulu-3-405B', 'meta-llama/Llama-3.1-8B',
23
- 'meta-llama/Llama-3.1-8B-Instruct', 'Qwen/Qwen2.5-14B-Instruct-1M', 'mistralai/Mistral-Small-24B-Base-2501',
24
- 'huihui-ai/DeepSeek-R1-Distill-Llama-70B-abliterated', 'huihui-ai/DeepSeek-R1-Distill-Qwen-14B-abliterated-v2',
25
- 'Qwen/Qwen2.5-7B-Instruct-1M', 'open-thoughts/OpenThinker-7B', 'Almawave/Velvet-14B', 'cognitivecomputations/Dolphin3.0-Mistral-24B',
26
- 'Steelskull/L3.3-Damascus-R1', 'Qwen/Qwen2.5-Coder-32B-Instruct', 'huihui-ai/DeepSeek-R1-Distill-Llama-8B-abliterated',
27
- 'cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese', 'jinaai/ReaderLM-v2', 'mistralai/Mistral-7B-Instruct-v0.3',
28
- 'meta-llama/Llama-3.2-1B', 'xwen-team/Xwen-7B-Chat', 'meta-llama/Llama-3.2-3B-Instruct', 'cognitivecomputations/DeepSeek-R1-AWQ',
29
- 'HuggingFaceTB/SmolLM2-1.7B-Instruct', 'xwen-team/Xwen-72B-Chat', 'openai-community/gpt2', 'meta-llama/Llama-2-7b-chat-hf', 'google/gemma-2-2b-it',
30
- 'mistralai/Mistral-7B-v0.1', 'meta-llama/Meta-Llama-3-8B', 'mistralai/Mistral-Nemo-Instruct-2407', 'microsoft/Phi-3.5-mini-instruct',
31
- 'arcee-ai/Virtuoso-Small-v2', 'MiniMaxAI/MiniMax-Text-01', 'AtlaAI/Selene-1-Mini-Llama-3.1-8B', 'Steelskull/L3.3-Nevoria-R1-70b',
32
- 'prithivMLmods/Calcium-Opus-14B-Elite2-R1', 'pfnet/plamo-2-1b', 'huihui-ai/DeepSeek-R1-Distill-Qwen-7B-abliterated-v2', 'Vikhrmodels/QVikhr-2.5-1.5B-Instruct-SMPO',
33
- 'mistralai/Mixtral-8x7B-Instruct-v0.1', 'vikhyatk/moondream2', 'meta-llama/Meta-Llama-3-8B-Instruct', 'deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct',
34
- 'Steelskull/L3.3-MS-Nevoria-70b', 'unsloth/DeepSeek-R1-Distill-Llama-8B', 'cyberagent/DeepSeek-R1-Distill-Qwen-14B-Japanese', 'mistralai/Mistral-7B-Instruct-v0.2',
35
- 'deepseek-ai/DeepSeek-Coder-V2-Instruct', 'Qwen/Qwen2.5-32B', 'Qwen/Qwen2.5-72B-Instruct', 'allenai/Llama-3.1-Tulu-3-8B', 'SakanaAI/TinySwallow-1.5B-Instruct',
36
- 'm-a-p/YuE-s2-1B-general', 'arcee-ai/Virtuoso-Medium-v2', 'Black-Ink-Guild/Pernicious_Prophecy_70B', 'Qwen/Qwen2.5-14B', 'inflatebot/MN-12B-Mag-Mell-R1', 'Qwen/Qwen2.5-Math-1.5B',
37
- 'Qwen/Qwen2.5-Coder-7B-Instruct', 'Qwen/Qwen2.5-1.5B-Instruct', 'Qwen/QwQ-32B-Preview', 'NovaSky-AI/Sky-T1-32B-Preview', 'sometimesanotion/Lamarck-14B-v0.7',
38
- 'SentientAGI/Dobby-Mini-Leashed-Llama-3.1-8B', 'NaniDAO/deepseek-r1-qwen-2.5-32B-ablated', 'rubenroy/Zurich-14B-GCv2-5m', 'rubenroy/Geneva-12B-GCv2-5m', 'prithivMLmods/Primal-Opus-14B-Optimus-v1',
39
- 'prithivMLmods/Megatron-Opus-14B-Exp', 'prithivMLmods/Primal-Mini-3B-Exp', 'TinyLlama/TinyLlama-1.1B-Chat-v1.0', 'Qwen/Qwen2.5-0.5B-Instruct',
40
- 'Qwen/Qwen2.5-3B-Instruct', 'meta-llama/Llama-3.2-1B-Instruct', 'HuggingFaceTB/SmolLM2-135M-Instruct', 'PowerInfer/SmallThinker-3B-Preview',
41
- 'Valdemardi/DeepSeek-R1-Distill-Qwen-32B-AWQ', 'huihui-ai/DeepSeek-R1-Distill-Qwen-14B-abliterated', 'SentientAGI/Dobby-Mini-Unhinged-Llama-3.1-8B',
42
- 'lightblue/DeepSeek-R1-Distill-Qwen-7B-Japanese', 'Ihor/Text2Graph-R1-Qwen2.5-0.5b', 'prithivMLmods/Bellatrix-Tiny-3B-R1', 'prithivMLmods/Bellatrix-Tiny-1.5B-R1', 'prithivMLmods/Megatron-Opus-14B-Stock',
43
- 'prithivMLmods/Jolt-v0.1', 'prithivMLmods/Sqweeks-7B-Instruct', 'bigscience/bloom', 'mistralai/Mistral-7B-Instruct-v0.1', 'google/gemma-2-27b-it', 'meta-llama/Llama-3.1-70B', 'Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2',
44
- 'Qwen/Qwen2.5-7B-Instruct', 'LatitudeGames/Wayfarer-12B', 'prithivMLmods/QwQ-Math-IO-500M', 'prithivMLmods/Llama-3.2-6B-AlgoCode', 'prithivMLmods/Omni-Reasoner-Merged', 'Valdemardi/DeepSeek-R1-Distill-Llama-70B-AWQ',
45
- 'silma-ai/SILMA-Kashif-2B-Instruct-v1.0', 'mkurman/Qwen2.5-14B-DeepSeek-R1-1M', 'prithivMLmods/Blaze-14B-xElite', 'prithivMLmods/Megatron-Opus-7B-Exp', 'v2ray/GPT4chan-24B', 'prithivMLmods/Elita-1', 'prithivMLmods/Viper-Coder-v0.1',
46
- 'prithivMLmods/WebMind-7B-v0.1', 'prithivMLmods/Megatron-Corpus-14B-Exp.v2', 'prithivMLmods/Feynman-Grpo-Exp', 'meta-llama/Llama-2-7b-hf', 'microsoft/phi-2', 'Ttimofeyka/MistralRP-Noromaid-NSFW-Mistral-7B-GGUF',
47
- 'google/gemma-2b', 'google/gemma-7b', 'sophosympatheia/Midnight-Miqu-70B-v1.5', 'jiviai/medX_v2', 'Alibaba-NLP/gte-Qwen2-7B-instruct', 'google/gemma-2-9b-it', 'meta-llama/Llama-Guard-3-8B', 'microsoft/Phi-3.5-vision-instruct',
48
- 'MarinaraSpaghetti/NemoMix-Unleashed-12B', 'Qwen/Qwen2.5-0.5B', 'Qwen/Qwen2.5-7B', 'Qwen/Qwen2.5-32B-Instruct', 'meta-llama/Llama-3.2-3B', 'allenai/Molmo-7B-D-0924',
49
- 'HuggingFaceTB/SmolLM2-360M-Instruct', 'Zhengyi/LLaMA-Mesh', 'ibm-granite/granite-3.1-8b-instruct', 'livekit/turn-detector', 'SakanaAI/TinySwallow-1.5B', 'saheedniyi/YarnGPT',
50
- 'ContactDoctor/Bio-Medical-Llama-3-8B-CoT-012025', 'MiniMaxAI/MiniMax-VL-01', 'prithivMLmods/Omni-Reasoner4-Merged', 'unsloth/DeepSeek-R1', 'prithivMLmods/Calcium-Opus-14B-Elite2', 'prithivMLmods/Calcium-Opus-14B-Elite3',
51
- 'prithivMLmods/Bellatrix-Tiny-0.5B', 'prithivMLmods/Calcium-Opus-14B-Elite-Stock', 'prithivMLmods/Bellatrix-Tiny-1B', 'm-a-p/YuE-s1-7B-anneal-en-icl', 'arcee-ai/Virtuoso-Lite', 'stelterlab/Mistral-Small-24B-Instruct-2501-AWQ',
52
- 'prithivMLmods/Triangulum-v2-10B', 'prithivMLmods/Bellatrix-Tiny-1B-R1', 'huihui-ai/Mistral-Small-24B-Instruct-2501-abliterated', 'rubenroy/Gilgamesh-72B', 'rubenroy/Perseus-3192B', 'Nitral-Archive/NightWing3_Virtuoso-10B-v0.2',
53
- 'ibm-granite/granite-3.2-8b-instruct-preview', 'distilbert/distilgpt2', 'deepseek-ai/deepseek-coder-33b-instruct', 'microsoft/Phi-3-mini-4k-instruct', 'mistralai/Codestral-22B-v0.1', 'NovaSearch/stella_en_1.5B_v5', 'google/gemma-2-2b',
54
- 'lmms-lab/LLaVA-Video-7B-Qwen2', 'deepseek-ai/DeepSeek-V2.5', 'Qwen/Qwen2.5-Math-7B', 'AIDC-AI/Marco-o1', 'allenai/Llama-3.1-Tulu-3-8B-SFT', 'utter-project/EuroLLM-9B-Instruct', 'tiiuae/Falcon3-1B-Instruct',
55
- 'cognitivecomputations/DeepSeek-V3-AWQ', 'prithivMLmods/LwQ-10B-Instruct', 'prithivMLmods/LwQ-30B-Instruct', 'prithivMLmods/Calcium-20B', 'unsloth/DeepSeek-R1-Distill-Qwen-32B-bnb-4bit',
56
- 'opensourcerelease/DeepSeek-R1-bf16', 'prithivMLmods/Llama-Express.1-Math', 'prithivMLmods/Llama-Express.1', 'prithivMLmods/Llama-Express.1-Tiny', 'prithivMLmods/Llama-Express.1-Merged',
57
- 'Delta-Vector/Rei-12B', 'kingabzpro/DeepSeek-R1-Medical-COT', 'prithivMLmods/Calme-Ties-78B', 'prithivMLmods/Qwen2.5-1.5B-DeepSeek-R1-Instruct', 'prithivMLmods/Calme-Ties2-78B', 'prithivMLmods/Bellatrix-Tiny-1B-v3',
58
- 'sometimesanotion/Qwenvergence-14B-v12-Prose-DS', 'TIGER-Lab/Qwen2.5-32B-Instruct-CFT', 'unsloth/Mistral-Small-24B-Instruct-2501-unsloth-bnb-4bit', 'rubenroy/Geneva-12B-GCv2-1m', 'sometimesanotion/Qwenvergence-14B-v13-Prose-DS',
59
- 'deepseek-ai/deepseek-coder-6.7b-instruct', 'deepseek-ai/deepseek-moe-16b-base', 'deepseek-ai/deepseek-moe-16b-chat', 'microsoft/Phi-3-mini-128k-instruct', 'google/gemma-2-9b', 'AI-MO/NuminaMath-7B-TIR', 'CohereForAI/c4ai-command-r-plus-08-2024',
60
- 'Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24', 'nvidia/Llama-3.1-Nemotron-70B-Instruct-HF', 'CohereForAI/aya-expanse-8b', 'HuggingFaceTB/SmolLM2-135M', 'brgx53/3Blarenegv3-ECE-PRYMMAL-Martial', 'tiiuae/Falcon3-1B-Base',
61
- 'PocketDoc/Dans-PersonalityEngine-V1.1.0-12b', 'Kaoeiri/Magnum-v4-Cydonia-vXXX-22B', 'prithivMLmods/Blaze.1-32B-Instruct', 'kyutai/helium-1-preview-2b', 'prithivMLmods/Blaze.1-27B-Preview', 'prithivMLmods/Blaze.1-27B-Reflection',
62
- 'prithivMLmods/PyThagorean-10B', 'prithivMLmods/PyThagorean-3B', 'prithivMLmods/PyThagorean-Tiny', 'unsloth/DeepSeek-R1-Distill-Llama-70B-bnb-4bit', 'unsloth/DeepSeek-R1-Distill-Qwen-14B-unsloth-bnb-4bit', 'bespokelabs/Bespoke-Stratos-32B',
63
- 'Tarek07/Progenitor-V1.1-LLaMa-70B', 'mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-7B-v1.1', 'm-a-p/YuE-s1-7B-anneal-zh-cot', 'emredeveloper/DeepSeek-R1-Medical-COT', 'HelpingAI/HAI-SER', 'rubenroy/Geneva-12B-GCv2-10k', 'rubenroy/Geneva-12B-GCv2-50k',
64
- 'rubenroy/Geneva-12B-GCv2-100k', 'allura-org/GPT-J-6b-Disco-Elysium', 'fblgit/miniclaus-qw1.5B-UNAMGS-GRPO', 'suayptalha/Luminis-phi-4', 'EleutherAI/gpt-neo-2.7B', 'tiiuae/falcon-7b-instruct', 'deepseek-ai/deepseek-coder-1.3b-instruct',
65
- 'teknium/OpenHermes-2.5-Mistral-7B', 'maritaca-ai/sabia-7b', 'bigcode/starcoder2-3b', 'mistralai/Mixtral-8x7B-v0.1', 'Rijgersberg/GEITje-7B', 'segolilylabs/Lily-Cybersecurity-7B-v0.2', 'deepseek-ai/deepseek-coder-7b-instruct-v1.5',
66
- 'deepseek-ai/deepseek-math-7b-rl', 'SherlockAssistant/Mistral-7B-Instruct-Ukrainian', 'meta-llama/CodeLlama-7b-hf', 'databricks/dbrx-instruct', 'UnfilteredAI/Promt-generator', 'mistralai/Mixtral-8x22B-Instruct-v0.1', 'cognitivecomputations/dolphin-2.9-llama3-8b',
67
- 'ruslanmv/Medical-Llama3-8B', 'deepseek-ai/DeepSeek-V2-Chat', 'microsoft/llava-med-v1.5-mistral-7b', 'deepseek-ai/DeepSeek-V2-Lite-Chat', 'CohereForAI/aya-23-8B', 'ProbeMedicalYonseiMAILab/medllama3-v20', 'cognitivecomputations/dolphin-2.9.2-qwen2-72b',
68
- 'mlabonne/NeuralDaredevil-8B-abliterated', 'yentinglin/Llama-3-Taiwan-8B-Instruct', 'Sao10K/L3-8B-Stheno-v3.2', 'elyza/Llama-3-ELYZA-JP-8B', 'meta-llama/Llama-3.1-70B-Instruct', 'princeton-nlp/gemma-2-9b-it-SimPO', 'meta-llama/Llama-3.1-405B-Instruct',
69
- 'mistralai/Mistral-Nemo-Base-2407', 'unsloth/Meta-Llama-3.1-8B-Instruct', 'mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated', 'microsoft/maira-2', 'ContactDoctor/Bio-Medical-Llama-3-8B', 'ystemsrx/Qwen2-Boundless', 'upstage/solar-pro-preview-instruct',
70
- 'Epiculous/Violet_Twilight-v0.2', 'flowaicom/Flow-Judge-v0.1', 'Qwen/Qwen2.5-14B-Instruct', 'Qwen/Qwen2.5-Math-1.5B-Instruct', 'meta-llama/Llama-Guard-3-1B', 'google/gemma-2-2b-jpn-it', 'unsloth/Llama-3.2-1B-Instruct', 'numind/NuExtract-1.5',
71
- 'rombodawg/Rombos-LLM-V2.5-Qwen-32b', 'anthracite-org/magnum-v4-22b', 'CohereForAI/aya-expanse-32b', 'VongolaChouko/Starcannon-Unleashed-12B-v1.0', 'Qwen/Qwen2.5-Coder-14B-Instruct', 'Qwen/Qwen2.5-Coder-32B', 'SmallDoge/Doge-60M', 'MaziyarPanahi/calme-3.2-instruct-78b',
72
- 'lianghsun/Llama-3.2-Taiwan-3B-Instruct', 'allenai/Llama-3.1-Tulu-3-8B-DPO', 'allenai/Llama-3.1-Tulu-3-70B', 'knifeayumu/Cydonia-v1.3-Magnum-v4-22B', 'utter-project/EuroLLM-9B', 'Skywork/Skywork-o1-Open-Llama-3.1-8B', 'Moraliane/SAINEMO-reMIX', 'LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct',
73
- 'NousResearch/Hermes-3-Llama-3.2-3B', 'recursal/QRWKV6-32B-Instruct-Preview-v0.1', 'allenai/OLMo-2-1124-13B-Instruct', 'huihui-ai/Llama-3.3-70B-Instruct-abliterated-finetuned-GPTQ-Int8', 'WiroAI/wiroai-turkish-llm-9b', 'SmallDoge/Doge-20M', 'FreedomIntelligence/HuatuoGPT-o1-70B',
74
- 'Sao10K/70B-L3.3-Cirrus-x1', 'internlm/internlm3-8b-instruct', 'prithivMLmods/PRM-Math-7B-Reasoner', 'prithivMLmods/QwQ-LCoT2-7B-Instruct', 'netease-youdao/Confucius-o1-14B', 'unsloth/DeepSeek-R1-Zero', 'unsloth/DeepSeek-R1-BF16', 'unsloth/DeepSeek-R1-Distill-Qwen-1.5B-unsloth-bnb-4bit',
75
- 'suayptalha/Falcon3-Jessi-v0.4-7B-Slerp', 'RDson/CoderO1-DeepSeekR1-Coder-32B-Preview', 'bespokelabs/Bespoke-Stratos-7B', 'unsloth/DeepSeek-R1-Distill-Qwen-32B-unsloth-bnb-4bit', 'RWKV-Red-Team/ARWKV-7B-Preview-0.1', 'lightblue/Karasu-DPO-7B', 'Spestly/Atlas-Pro-7B-Preview-1M',
76
- 'llm-jp/llm-jp-3-13b-instruct3', 'm-a-p/YuE-s1-7B-anneal-jp-kr-cot', 'm-a-p/YuE-s1-7B-anneal-jp-kr-icl', 'm-a-p/YuE-s1-7B-anneal-zh-icl', 'huihui-ai/Qwen2.5-14B-Instruct-1M-abliterated', 'AXCXEPT/phi-4-deepseek-R1K-RL-EZO', 'grimjim/DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B',
77
- 'sthenno/tempesthenno-icy-0130', 'neuralmagic/Mistral-Small-24B-Instruct-2501-FP8-Dynamic', 'Omartificial-Intelligence-Space/Arabic-DeepSeek-R1-Distill-8B', 'OddTheGreat/Badman_12B', 'MasterControlAIML/DeepSeek-R1-Strategy-Qwen-2.5-1.5b-Unstructured-To-Structured',
78
- 'rubenroy/Geneva-12B-GCv2-500k', 'bunnycore/Llama-3.2-3B-Bespoke-Thought', 'justinj92/Qwen2.5-1.5B-Thinking', 'RefalMachine/RuadaptQwen2.5-14B-Instruct', 'v2ray/GPT4chan-24B-QLoRA', 'CultriX/Qwen2.5-14B-Qwentangledv2', 'CultriX/Qwen2.5-14B-Ultimav2',
79
- 'Tarek07/Progenitor-V2.2-LLaMa-70B', 'dwetzel/DeepSeek-R1-Distill-Qwen-32B-GPTQ-INT4', 'Nitral-Archive/NightWing3-R1_Virtuoso-10B-v0.3e2', 'ucalyptus/prem-1B-grpo', 'Sakalti/Saka-14B', 'bunnycore/Qwen2.5-7B-MixStock-V0.1', 'braindao/DeepSeek-R1-Distill-Llama-8B-Uncensored',
80
- 'scb10x/llama3.1-typhoon2-deepseek-r1-70b', 'RefalMachine/RuadaptQwen2.5-14B-R1-distill-preview-v1',
81
- 'openai-community/gpt2-medium', 'openai-community/gpt2-xl', 'meta-llama/Llama-2-13b-hf', 'Trelis/Llama-2-7b-chat-hf-function-calling-v2', 'ByteWave/prompt-generator', 'HuggingFaceH4/zephyr-7b-beta', 'TheBloke/deepseek-llm-67b-chat-GPTQ', 'sarvamai/OpenHathi-7B-Hi-v0.1-Base',
82
- 'cognitivecomputations/dolphin-2.5-mixtral-8x7b',
83
- 'SanjiWatsuki/Sonya-7B', 'openchat/openchat-3.5-0106', 'ZySec-AI/SecurityLLM', 'defog/sqlcoder-70b-alpha', 'nakodanei/Blue-Orchid-2x7b', 'liuhaotian/llava-v1.6-mistral-7b', 'BioMistral/BioMistral-7B-AWQ-QGS128-W4-GEMM', 'google/gemma-2b-it', 'bigcode/starcoder2-7b',
84
- 'nbeerbower/Maidphin-Kunoichi-7B-GGUF-Q4_K_M', 'HuggingFaceH4/starchat2-15b-v0.1', 'CohereForAI/c4ai-command-r-plus',
85
- 'HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1', 'UnfilteredAI/UNfilteredAI-1B', 'MaziyarPanahi/WizardLM-2-7B-GGUF', 'hiieu/Meta-Llama-3-8B-Instruct-function-calling-json-mode', 'shenzhi-wang/Llama3-8B-Chinese-Chat', 'Orenguteng/Llama-3-8B-Lexi-Uncensored', 'NTQAI/Nxcode-CQ-7B-orpo',
86
- 'lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half', 'taide/Llama3-TAIDE-LX-8B-Chat-Alpha1', 'Nitral-AI/Poppy_Porpoise-0.72-L3-8B',
87
- 'WhiteRabbitNeo/Llama-3-WhiteRabbitNeo-8B-v2.0', 'marketeam/LLa-Marketing', 'microsoft/Phi-3-vision-128k-instruct', 'CohereForAI/aya-23-35B', 'shisa-ai/shisa-v1-llama3-8b', 'mistralai/Mistral-7B-v0.3', 'MaziyarPanahi/Mistral-7B-Instruct-v0.3-GGUF', 'yentinglin/Llama-3-Taiwan-70B-Instruct',
88
- 'deepseek-ai/DeepSeek-Coder-V2-Lite-Base', 'Sao10K/L3-8B-Stheno-v3.3-32K', 'google/gemma-2-27b',
89
- 'Alibaba-NLP/gte-Qwen2-1.5B-instruct', 'm42-health/Llama3-Med42-8B', 'cognitivecomputations/dolphin-vision-7b', 'TheDrummer/Big-Tiger-Gemma-27B-v1', 'meta-llama/Llama-3.1-405B', 'google/shieldgemma-2b', 'amd/AMD-Llama-135m', 'unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit',
90
- 'aifeifei798/DarkIdol-Llama-3.1-8B-Instruct-1.2-Uncensored',
91
- 'NousResearch/Hermes-3-Llama-3.1-8B', 'mlabonne/TwinLlama-3.1-8B', 'ClosedCharacter/Peach-9B-8k-Roleplay', 'utter-project/EuroLLM-1.7B-Instruct', 'ai21labs/AI21-Jamba-1.5-Mini', 'Zyphra/Zamba2-2.7B-instruct', 'google/gemma-7b-aps-it', 'ifable/gemma-2-Ifable-9B', 'Qwen/Qwen2.5-1.5B',
92
- 'Qwen/Qwen2.5-7B-Instruct-GPTQ-Int4', 'Qwen/Qwen2.5-32B-Instruct-AWQ', 'brunopio/Llama3-8B-1.58-100B-tokens-GGUF', 'anthracite-org/magnum-v4-72b', 'nvidia/Llama-3_1-Nemotron-51B-Instruct', 'unsloth/Qwen2.5-14B-Instruct-bnb-4bit', 'katanemo/Arch-Function-3B', 'allenai/Molmo-7B-O-0924',
93
- 'unsloth/Llama-3.2-1B', 'lianghsun/Llama-3.2-Taiwan-Legal-3B-Instruct', 'BSC-LT/salamandra-2b-instruct', 'Steelskull/MSM-MS-Cydrion-22B', 'Bllossom/llama-3.2-Korean-Bllossom-3B', 'sam-paech/Delirium-v1', 'fblgit/TheBeagle-v2beta-32B-MGS', 'sarvamai/sarvam-1', 'HuggingFaceTB/SmolLM2-1.7B',
94
- 'Qwen/Qwen2.5-Coder-0.5B-Instruct', 'rombodawg/Rombos-Coder-V2.5-Qwen-14b', 'Nexusflow/Athene-V2-Chat', 'FallenMerick/MN-Violet-Lotus-12B', 'EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2', 'allenai/OLMo-2-1124-7B-Instruct-preview', 'sometimesanotion/KytheraMix-7B-v0.2', 'LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct',
95
- 'LGAI-EXAONE/EXAONE-3.5-32B-Instruct', 'ibm-granite/granite-3.1-2b-instruct', 'unsloth/Llama-3.3-70B-Instruct-bnb-4bit', 'ibnzterrell/Meta-Llama-3.3-70B-Instruct-AWQ-INT4', 'Sao10K/L3.3-70B-Euryale-v2.3', 'BSC-LT/ALIA-40b', 'huihui-ai/Llama-3.3-70B-Instruct-abliterated', 'SmallDoge/Doge-20M-Instruct',
96
- 'tiiuae/Falcon3-10B-Instruct', 'winninghealth/WiNGPT-Babel', 'FreedomIntelligence/HuatuoGPT-o1-8B', 'FreedomIntelligence/HuatuoGPT-o1-72B', 'prithivMLmods/Llama-3.1-5B-Instruct', 'prithivMLmods/Llama-Thinker-3B-Preview2', 'simplescaling/step-conditional-control-old', 'ngxson/MiniThinky-v2-1B-Llama-3.2',
97
- 'unsloth/phi-4-unsloth-bnb-4bit', 'KBlueLeaf/TIPO-500M-ft', 'bunnycore/Phi-4-RP-v0', 'Rombo-Org/Rombo-LLM-V2.5-Qwen-14b', 'nbeerbower/mistral-nemo-kartoffel-12B', 'sethuiyer/Llamaverse-3.1-8B-Instruct', 'Shaleen123/llama-3.1-8b-reasoning', 'Nohobby/L3.3-Prikol-70B-v0.3', 'nvidia/AceInstruct-1.5B', 'SmallDoge/Doge-20M-checkpoint',
98
- 'carsenk/llama3.2_1b_2025_uncensored_v2', 'bunnycore/Phi-4-Model-Stock-v2', 'Shaleen123/llama-3.1-8B-chain-reasoning', 'bunnycore/Phi-4-Model-Stock-v3', 'IVentureISB/MahaKumbh-Llama3.3-70B', 'DavidLanz/Llama-3.2-Taiwan-3B-Instruct', 'SmallDoge/Doge-60M-checkpoint', 'unsloth/DeepSeek-R1-Distill-Qwen-7B-unsloth-bnb-4bit',
99
- 'unsloth/DeepSeek-R1-Distill-Llama-8B-unsloth-bnb-4bit', 'roleplaiapp/DeepSeek-R1-Distill-Qwen-32B-Q4_0-GGUF', 'arcee-ai/DeepSeek-R1-bf16', 'inarikami/DeepSeek-R1-Distill-Qwen-32B-AWQ', 'mlx-community/DeepSeek-R1-Distill-Llama-70B-4bit', 'prithivMLmods/QwQ-LCoT1-Merged', 'prithivMLmods/Llama-3.2-3B-Math-Oct',
100
- 'Nitral-AI/Wayfarer_Eris_Noctis-12B', 'thirdeyeai/DeepSeek-R1-Distill-Qwen-7B-uncensored', 'NovaSky-AI/Sky-T1-32B-Flash', 'SZTAKI-HLT/Llama-3.2-1B-HuAMR', 'stepenZEN/DeepSeek-R1-Distill-Qwen-1.5B-Abliterated-dpo', 'mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-1.5B-v1.0', 'prithivMLmods/Phi-4-Super-1', 'prithivMLmods/Calcium-Opus-14B-Merge',
101
- 'prithivMLmods/COCO-7B-Instruct-1M', 'prithivMLmods/Taurus-Opus-7B', 'ReadyArt/L3.3-Nevoria-R1-70b_EXL2_5.0bpw_H8', 'NickyNicky/Llama-1B-GRPO_Final', 'unsloth/Qwen2.5-14B-Instruct-1M', 'RefalMachine/RuadaptQwen2.5-7B-Lite-Beta', 'hiieu/R1_tool_call_Distill-Qwen-7B', 'fla-hub/rwkv7-1.5B-world', 'KatyTheCutie/Repose-12B', 'DoppelReflEx/MN-12B-WolFrame',
102
- 'huihui-ai/DeepSeek-R1-Distill-Qwen-7B-abliterated', 'SubtleOne/Qwen2.5-32b-Erudite-Writer', 'ZeroXClem/Qwen2.5-7B-CelestialHarmony-1M', 'safe049/SmolTuring-8B-Instruct', 'unsloth/Mistral-Small-24B-Instruct-2501', 'unsloth/Mistral-Small-24B-Base-2501',
103
- 'llm-jp/llm-jp-3-150m-instruct3', 'llm-jp/llm-jp-3-7.2b-instruct3', 'suayptalha/Maestro-10B', 'Quazim0t0/Phi4.Turn.R1Distill_v1.5.1-Tensors', 'OddTheGreat/Malevolent_12B.v2', 'nbeerbower/Dumpling-Qwen2.5-7B-1k-r16', 'kromeurus/L3.1-Tivir-v0.1-10B', 'suayptalha/Maestro-R1-Llama-8B', 'rubenroy/Zurich-1.5B-GCv2-50k',
104
- 'rubenroy/Zurich-1.5B-GCv2-100k', 'rubenroy/Zurich-1.5B-GCv2-1m', 'enhanceaiteam/xea-llama', 'eridai/eridaRE', 'lianghsun/Marble-3B', 'DataSoul/MwM-7B-CoT-Merge1', 'Erland/Mistral-Small-24B-Base-ChatML-2501-bnb-4bit', 'chameleon-lizard/Qwen-2.5-7B-DTF', 'Vikhrmodels/QVikhr-2.5-1.5B-Instruct-SMPO_MLX-8bit', 'RecurvAI/Recurv-Clinical-Deepseek-R1',
105
- 'Darkhn/L3.3-Damascus-R1-5.0bpw-h8-exl2', 'Vikhrmodels/QVikhr-2.5-1.5B-Instruct-SMPO_MLX-4bit', 'BarBarickoza/Dans-Picaro-MagNante-v4-v1-12b-V3', 'skzxjus/Qwen2.5-7B-1m-Open-R1-Distill', 'CultriX/Qwen2.5-14B-Ultima', 'CultriX/Enhanced-TIES-Base-v1', 'loaiabdalslam/beetelware-saudi-R1-Distill-Llama-8B',
106
- 'Triangle104/Gemmadevi-Stock-10B', 'avemio-digital/German-RAG-HERMES-MOBIUS-R1-LLAMA', 'syubraj/MedicalChat-Phi-3.5-mini-instruct', 'Xiaojian9992024/Qwen2.5-THREADRIPPER-Small', 'jpacifico/Chocolatine-2-merged-qwen25arch', 'mobiuslabsgmbh/Meta-Llama-3-8B-Instruct_4bitgs64_hqq_hf', 'pabloce/esbieta-ec-qwen-2.5-3B', 'TareksLab/Progenitor-V2.3-LLaMa-70B',
107
- 'suayptalha/Lamarckvergence-14B', 'jpacifico/Chocolatine-2-14B-Instruct-v2.0.3', 'bunnycore/DeepThinker-7B-Sce-v2',
108
- 'sometimesanotion/Qwen2.5-7B-Gordion-v0.1', 'openai-community/gpt2-large', 'openai-community/openai-gpt', 'EleutherAI/gpt-neo-1.3B', 'EleutherAI/gpt-neo-125m', 'GroNLP/gpt2-small-italian', 'LorenzoDeMattei/GePpeTto', 'Vamsi/T5_Paraphrase_Paws', 'ethzanalytics/distilgpt2-tiny-conversational', 'microsoft/DialoGPT-small', 'mrm8488/spanish-gpt2',
109
- 'shibing624/code-autocomplete-distilgpt2-python', 'EleutherAI/gpt-neox-20b', 'bigscience/bloom-560m', 'bigscience/bloom-1b7', 'rinna/japanese-gpt-neox-small', 'Langboat/bloom-1b4-zh',
110
- 'EleutherAI/polyglot-ko-1.3b', 'bigscience/bloomz', 'Gustavosta/MagicPrompt-Stable-Diffusion', 'EleutherAI/polyglot-ko-5.8b', 'bigscience/bloomz-560m', 'bigscience/bloomz-3b', 'Norod78/gpt-fluentui-flat-svg', 'EleutherAI/pythia-160m', 'EleutherAI/pythia-1b-deduped', 'EleutherAI/pythia-12b', 'medalpaca/medalpaca-7b', 'huggyllama/llama-7b',
111
- 'vicgalle/gpt2-open-instruct-v1', 'bigcode/starcoder', 'TheBloke/stable-vicuna-13B-GPTQ', 'TheBloke/Wizard-Vicuna-13B-Uncensored-GPTQ', 'bigcode/tiny_starcoder_py', 'TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ', 'Monero/WizardLM-30B-Uncensored-Guanaco-SuperCOT-30b', 'TheBloke/Wizard-Vicuna-30B-Uncensored-GPTQ', 'nomic-ai/gpt4all-falcon',
112
- 'TheBloke/Karen_theEditor_13B-GPTQ', 'TheBloke/Nous-Hermes-13B-GPTQ', 'pankajmathur/orca_alpaca_3b', 'pankajmathur/orca_mini_3b', 'TheBloke/WizardLM-13B-V1-0-Uncensored-SuperHOT-8K-GPTQ', 'TheBloke/Wizard-Vicuna-13B-Uncensored-SuperHOT-8K-GPTQ',
113
- 'bigcode/starcoderbase-1b', 'NumbersStation/nsql-6B', 'HuggingFaceM4/idefics-80b', 'TheBloke/Pygmalion-7B-SuperHOT-8K-GPTQ', 'Maykeye/TinyLLama-v0', 'meta-llama/Llama-2-70b-hf', 'meta-llama/Llama-2-13b-chat-hf', 'meta-llama/Llama-2-70b-chat-hf', 'TheBloke/Llama-2-13B-chat-GPTQ', 'NousResearch/Llama-2-7b-chat-hf', 'TheBloke/Llama-2-70B-Chat-GPTQ', 'NousResearch/Llama-2-13b-chat-hf', 'georgesung/llama2_7b_chat_uncensored', 'NousResearch/Nous-Hermes-Llama2-13b', 'TheBloke/30B-Epsilon-GPTQ', 'TheBloke/Dolphin-Llama-13B-GPTQ', 'bigcode/octocoder', 'Qwen/Qwen-7B', 'Qwen/Qwen-7B-Chat', 'uoe-nlp/gpt-neo-125m_instruction-tuned_sni', 'TheBloke/MythoMax-L2-13B-GPTQ', 'quantumaikr/llama-2-70b-fb16-korean', 'cenkersisman/gpt2-turkish-900m', 'codellama/CodeLlama-7b-hf', 'codellama/CodeLlama-13b-hf', 'codellama/CodeLlama-13b-Python-hf', 'codellama/CodeLlama-7b-Instruct-hf', 'codellama/CodeLlama-13b-Instruct-hf', 'codellama/CodeLlama-34b-hf', 'codellama/CodeLlama-34b-Python-hf', 'codellama/CodeLlama-34b-Instruct-hf', 'tiiuae/falcon-180B', 'uukuguy/speechless-llama2-luban-orca-platypus-13b', 'TinyLlama/TinyLlama-1.1B-step-50K-105b', 'diabolic6045/itineraries_Generator', '42dot/42dot_LLM-PLM-1.3B', '42dot/42dot_LLM-SFT-1.3B', 'tiiuae/falcon-180B-chat', 'PygmalionAI/pygmalion-2-13b', 'PygmalionAI/mythalion-13b', 'microsoft/phi-1_5', 'microsoft/phi-1', 'Undi95/UndiMix-v4-13B', 'teknium/Phi-Hermes-1.3B', 'TinyLlama/TinyLlama-1.1B-Chat-v0.1', 'AdaptLLM/medicine-LLM', 'AdaptLLM/law-LLM', 'AdaptLLM/finance-LLM', 'Dans-DiscountModels/Dans-RetroRodeo-13b', 'TheBloke/30B-Epsilon-AWQ', 'TheBloke/Wizard-Vicuna-7B-Uncensored-AWQ', 'TheBloke/Xwin-LM-13B-V0.1-GPTQ', 'Duxiaoman-DI/XuanYuan-70B', 'TheBloke/storytime-13B-GPTQ', 'Qwen/Qwen-14B-Chat', 'TheBloke/Mistral-7B-v0.1-AWQ', 'TheBloke/Mistral-7B-Instruct-v0.1-AWQ', 'TheBloke/Mistral-7B-v0.1-GPTQ', 'stabilityai/stablelm-3b-4e1t', 'rmanluo/RoG', 'lizpreciatior/lzlv_70b_fp16_hf', 'Dans-Archive/Dans-TotSirocco-7b', 'basilepp19/bloom-1b7_it', 'WisdomShell/CodeShell-7B', 'mychen76/mistral7b_ocr_to_json_v1', 'TheBloke/Athena-v4-GPTQ', 'HuggingFaceH4/zephyr-7b-alpha', 'cognitivecomputations/dolphin-2.1-mistral-7b', 'TheBloke/llava-v1.5-13B-AWQ', 'TheBloke/llava-v1.5-13B-GPTQ', 'THUDM/agentlm-7b', 'LumiOpen/Poro-34B', 'jondurbin/airoboros-m-7b-3.1.2', 'KoboldAI/LLaMA2-13B-Tiefighter-GPTQ', 'deepseek-ai/deepseek-coder-6.7b-base', 'aisingapore/sea-lion-3b', 'TRAC-MTRY/traclm-v1-3b-base', 'pfnet/plamo-13b-instruct', 'bkai-foundation-models/vietnamese-llama2-7b-40GB', 'flozi00/Mistral-7B-german-assistant-v4', 'TheBloke/zephyr-7B-beta-GPTQ', 'squarelike/Gugugo-koen-7B-V1.1', 'deepseek-ai/deepseek-coder-33b-base', 'TheBloke/Athnete-13B-GPTQ', 'TheBloke/Nethena-20B-GPTQ', 'cognitivecomputations/dolphin-2.2.1-mistral-7b', '01-ai/Yi-34B', 'TheBloke/deepseek-coder-33B-instruct-AWQ', 'alpindale/goliath-120b', 'Pclanglais/MonadGPT', 'epfl-llm/meditron-70b', 'epfl-llm/meditron-7b', 'alignment-handbook/zephyr-7b-sft-full', 'OpenLLM-France/Claire-7B-0.1', 'hakurei/mommygpt-3B', 'allenai/tulu-2-dpo-70b', 'NeverSleep/Noromaid-13b-v0.1.1', 'KoboldAI/LLaMA2-13B-Psyfighter2', 'Intel/neural-chat-7b-v3-1', 'OrionStarAI/OrionStar-Yi-34B-Chat', 'FPHam/Karen_TheEditor_V2_STRICT_Mistral_7B', 'Doctor-Shotgun/Nous-Capybara-limarpv3-34B', 'TinyLlama/TinyLlama-1.1B-Chat-v0.4', 'MohamedRashad/AceGPT-13B-chat-AWQ', 'THUDM/cogvlm-chat-hf', 'TheBloke/merlyn-education-safety-GPTQ', 'AntibodyGeneration/fine-tuned-progen2-small', 'TinyLlama/TinyLlama-1.1B-Chat-v0.6', 'OrionStarAI/OrionStar-Yi-34B-Chat-Llama', 'stabilityai/stablelm-zephyr-3b', 'FPHam/Karen_TheEditor_V2_CREATIVE_Mistral_7B', 'Jiayi-Pan/Tiny-Vicuna-1B', 'ethz-spylab/poisoned-rlhf-7b-SUDO-10', 'maywell/PiVoT-0.1-early', 'berkeley-nest/Starling-LM-7B-alpha', 'google/madlad400-8b-lm', 'SparseLLM/ReluLLaMA-7B', 'shleeeee/mistral-7b-wiki', 'ceadar-ie/FinanceConnect-13B', 'brucethemoose/CapyTessBorosYi-34B-200K-DARE-Ties-exl2-4bpw-fiction', 'TheBloke/saiga_mistral_7b-GPTQ', 'unsloth/llama-2-7b-bnb-4bit', 'Qwen/Qwen-72B-Chat', 'mlabonne/NeuralHermes-2.5-Mistral-7B', 'TheBloke/open-llama-3b-v2-wizard-evol-instuct-v2-196k-AWQ', 'TheBloke/deepseek-llm-7B-chat-GPTQ', 'beomi/Yi-Ko-6B', 'm-a-p/ChatMusician', 'maywell/Synatra-42dot-1.3B', 'Qwen/Qwen-Audio', 'Qwen/Qwen-Audio-Chat', 'mhenrichsen/context-aware-splitter-1b-english', 'jondurbin/cinematika-7b-v0.1', 'eci-io/climategpt-7b', 'simonveitner/MathHermes-2.5-Mistral-7B', 'ise-uiuc/Magicoder-DS-6.7B', 'ise-uiuc/Magicoder-S-DS-6.7B', 'migueldeguzmandev/paperclippetertodd3', 'sophosympatheia/Rogue-Rose-103b-v0.2', 'timpal0l/Mistral-7B-v0.1-flashback-v2', 'Trelis/Llama-2-7b-chat-hf-function-calling-v3', 'togethercomputer/StripedHyena-Nous-7B', 'Trelis/deepseek-llm-67b-chat-function-calling-v3', 'meta-llama/LlamaGuard-7b', 'openaccess-ai-collective/DPOpenHermes-7B-v2', 'tokyotech-llm/Swallow-7b-instruct-hf', 'AdaptLLM/finance-chat', 'AdaptLLM/law-chat', 'Intel/neural-chat-7b-v3-3', 'Rijgersberg/GEITje-7B-chat', 'TinyLlama/TinyLlama-1.1B-intermediate-step-1195k-token-2.5T', 'TheBloke/Mistral-7B-Instruct-v0.2-AWQ', 'DaizeDong/GraphsGPT-2W', 'upstage/SOLAR-10.7B-Instruct-v1.0', 'upstage/SOLAR-10.7B-v1.0', 'w4r10ck/SOLAR-10.7B-Instruct-v1.0-uncensored', 'seyabde/mistral_7b_yo_instruct', 'TheBloke/dolphin-2.5-mixtral-8x7b-GPTQ', 'joey00072/ToxicHermes-2.5-Mistral-7B', 'THUDM/cogagent-vqa-hf', 'Rijgersberg/GEITje-7B-chat-v2', 'silk-road/ChatHaruhi_RolePlaying_qwen_7b', 'AdaptLLM/finance-LLM-13B', 'bkai-foundation-models/vietnamese-llama2-7b-120GB', 'scb10x/typhoon-7b', 'Felladrin/Llama-160M-Chat-v1', 'SuperAGI/SAM', 'Nero10578/Mistral-7B-Sunda-v1.0', 'NousResearch/Nous-Hermes-2-Yi-34B', 'ericpolewski/AIRIC-The-Mistral', 'charent/Phi2-Chinese-0.2B', 'unum-cloud/uform-gen', 'unsloth/mistral-7b-bnb-4bit', 'NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT', 'LR-AI-Labs/vbd-llama2-7B-50b-chat', 'unsloth/codellama-34b-bnb-4bit', 'cognitivecomputations/dolphin-2.6-mistral-7b', 'unsloth/llama-2-13b-bnb-4bit', 'OpenPipe/mistral-ft-optimized-1227', 'TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T', 'GRMenon/mental-health-mistral-7b-instructv0.2-finetuned-V2', 'sethuiyer/SynthIQ-7b', 'unsloth/zephyr-sft-bnb-4bit', 'jondurbin/bagel-34b-v0.2', 'SkunkworksAI/tinyfrank-1.4B', 'NeuralNovel/Panda-7B-v0.1', 'unsloth/tinyllama-bnb-4bit', 'NousResearch/Nous-Hermes-2-SOLAR-10.7B', 'cognitivecomputations/dolphin-2.6-mistral-7b-dpo-laser', 'Vikhrmodels/Vikhr-7b-0.1', 'nicholasKluge/TeenyTinyLlama-460m', 'jsfs11/OH-dpov2', 'Unbabel/TowerBase-7B-v0.1', 'Doctor-Shotgun/Mixtral-8x7B-Instruct-v0.1-LimaRP-ZLoss', 'WizardLMTeam/WizardCoder-33B-V1.1', 'SanjiWatsuki/Kunoichi-7B', 'Unbabel/TowerInstruct-7B-v0.1', 'WYNN747/Burmese-GPT', 'NousResearch/Genstruct-7B', 'broskicodes/simple-stories-4M', 'STEM-AI-mtl/phi-2-electrical-engineering', 'mlabonne/phixtral-2x2_8', 'ross-dev/sexyGPT-Uncensored', 'HuggingFaceM4/VLM_WebSight_finetuned', 'stabilityai/stable-code-3b', 'huskyhong/noname-ai-v2_2-light', 'aari1995/germeo-7b-laser', 'argilla/distilabeled-OpenHermes-2.5-Mistral-7B', 'fblgit/UNA-TheBeagle-7b-v1', 'cognitivecomputations/MegaDolphin-120b', 'herisan/tinyllama-mental_health_counseling_conversations', 'NeverSleep/Noromaid-7B-0.4-DPO', 'therealcyberlord/TinyLlama-1.1B-Medical', 'NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO', 'szymonrucinski/Curie-7B-v1', 'MaziyarPanahi/Synatra-7B-v0.3-RP-Mistral-7B-Instruct-v0.2-slerp', 'SicariusSicariiStuff/Tenebra_30B_Alpha01_FP16', 'charlesdedampierre/TopicNeuralHermes-2.5-Mistral-7B', 'CodeGPTPlus/deepseek-coder-1.3b-typescript', 'herisan/Mistral-7b-bnb-4bit_mental_health_counseling_conversations', 'Viet-Mistral/Vistral-7B-Chat', 'sophosympatheia/Midnight-Rose-70B-v1.0', 'itsskofficial/falcon-7b-blooms-taxonomy-merged', 'AI-B/UTENA-7B-NSFW-V2', 'KoboldAI/LLaMA2-13B-Estopia', 'DiscoResearch/DiscoLM_German_7b_v1', 'CallComply/zephyr-7b-beta-32k', 'mlabonne/NeuralBeagle14-7B', 'jat-project/jat', 'macadeliccc/piccolo-math-2x7b', 'Isotonic/Dolphin-5.1-7b', 'shadowml/DareBeagle-7B', 'Karko/Proctora', 'haoranxu/ALMA-13B-R', 'yanolja/KoSOLAR-10.7B-v0.2', 'Tensoic/Kan-Llama-7B-SFT-v0.5', 'stabilityai/stablelm-2-1_6b', 'stabilityai/stablelm-2-zephyr-1_6b', 'lrds-code/boana-7b-instruct', 'vikhyatk/moondream1', 'gate369/Blurdus-7b-v0.1', 'Blizado/discolm-mfto-7b-german-v0.1', 'unsloth/mistral-7b-instruct-v0.2-bnb-4bit', 'senseable/WestLake-7B-v2', 'Qwen/Qwen1.5-0.5B', 'Qwen/Qwen1.5-1.8B', 'Qwen/Qwen1.5-7B', 'epinnock/deepseek-coder-33B-evol-feedback-v3', 'LanguageBind/MoE-LLaVA-StableLM-1.6B-4e', 'AISimplyExplained/Vakil-7B', 'RaviNaik/Llava-Phi2', 'motherduckdb/DuckDB-NSQL-7B-v0.1', 'deepseek-ai/deepseek-coder-7b-base-v1.5', 'KatyTheCutie/EstopianMaid-13B', 'abacusai/TheProfessor-155b', 'allenai/OLMo-1B', 'cfahlgren1/natural-functions', 'macadeliccc/WestLake-7B-v2-laser-truthy-dpo', 'jsfs11/WestOrcaDPO-7B-GTA', 'cckevinn/SeeClick', 'Unbabel/TowerInstruct-13B-v0.1', 'codellama/CodeLlama-70b-hf', 'codellama/CodeLlama-70b-Python-hf', 'codellama/CodeLlama-70b-Instruct-hf', 'seedboxai/KafkaLM-70B-German-V0.1', 'Qwen/Qwen1.5-7B-Chat', 'Qwen/Qwen1.5-72B-Chat', 'liuhaotian/llava-v1.6-vicuna-7b', 'liuhaotian/llava-v1.6-vicuna-13b', 'LoneStriker/Lily-Cybersecurity-7B-v0.2-8.0bpw-h8-exl2', 'Qwen/Qwen1.5-0.5B-Chat', 'unsloth/codellama-7b-bnb-4bit', 'Gille/StrangeMerges_17-7B-dare_ties', 'Gille/StrangeMerges_19-7B-dare_ties', 'Bread-AI/Crumb-13B', 'Druvith/MEDMISTRAL', 'alchemonaut/BoreanGale-70B', 'Gille/StrangeMerges_20-7B-slerp', 'PipableAI/pip-SQL-1B', 'cais/HarmBench-Llama-2-13b-cls', 'sophosympatheia/Midnight-Rose-70B-v2.0.3', 'defog/sqlcoder-7b-2', 'RUCKBReasoning/TableLLM-13b', 'RUCKBReasoning/TableLLM-7b', 'Sao10K/Fimbulvetr-11B-v2', 'nvidia/OpenMath-Mistral-7B-v0.1-hf', 'yanolja/EEVE-Korean-10.8B-v1.0', 'WhiteRabbitNeo/Trinity-33B-v1.0', 'hon9kon9ize/CantoneseLLM-6B-preview202402', 'Nitral-Archive/Pasta-Lake-7b', 'kennylam/Breeze-7B-Cantonese-v0.1', 'Unbabel/TowerInstruct-7B-v0.2', 'GritLM/GritLM-7B', 'google/gemma-7b-it', 'ytu-ce-cosmos/turkish-gpt2-large', 'prometheus-eval/prometheus-7b-v2.0', 'NingLab/eCeLLM-M', 'PipableAI/pip-sql-1.3b', 'rhplus0831/maid-yuzu-v8', 'proxectonos/Carballo-bloom-1.3B', 'sambanovasystems/SambaLingo-Arabic-Chat', 'shahzebnaveed/StarlingHermes-2.5-Mistral-7B-slerp', 'LumiOpen/Viking-7B', 'tanamettpk/TC-instruct-DPO', 'Tann-dev/sex-chat-dirty-girlfriend', 'BioMistral/BioMistral-7B-DARE-AWQ-QGS128-W4-GEMM', 'NousResearch/Nous-Hermes-2-Mistral-7B-DPO', 'SparseLLM/prosparse-llama-2-7b', 'HuggingFaceTB/cosmo-1b', 'Efficient-Large-Model/VILA-13b', 'scb10x/typhoon-7b-instruct-02-19-2024', 'LumiOpen/Viking-33B', 'prometheus-eval/prometheus-8x7b-v2.0', 'bigcode/starcoder2-15b', 'togethercomputer/evo-1-131k-base', 'unsloth/gemma-7b-bnb-4bit', 'unsloth/gemma-2b-bnb-4bit', 'unsloth/gemma-2b-it-bnb-4bit', 'unsloth/gemma-7b-it-bnb-4bit', 'yanolja/EEVE-Korean-Instruct-10.8B-v1.0', 'yanolja/EEVE-Korean-2.8B-v1.0', 'yanolja/EEVE-Korean-Instruct-2.8B-v1.0', 'gordicaleksa/YugoGPT', 'timpal0l/Mistral-7B-v0.1-flashback-v2-instruct', 'allenai/OLMo-7B-Instruct', 'coggpt/qwen-1.5-patent-translation', 'GreatCaptainNemo/ProLLaMA', 'Felladrin/Minueza-32M-Base', 'Felladrin/Minueza-32M-Chat', 'm-a-p/OpenCodeInterpreter-DS-1.3B', 'MaziyarPanahi/LongAlpaca-13B-GGUF', 'OPI-PG/Qra-1b', 'MathGenie/MathGenie-InterLM-20B', 'MaziyarPanahi/Mistral-7B-Instruct-Aya-101', 'ENERGY-DRINK-LOVE/eeve_dpo-v3', 'Stopwolf/Tito-7B-slerp', 'MaziyarPanahi/Mistral-7B-Instruct-Aya-101-GGUF', 'PORTULAN/gervasio-7b-portuguese-ptbr-decoder', 'JinghuiLuAstronaut/DocLLM_baichuan2_7b', 'vicgalle/RoleBeagle-11B', 'HuggingFaceH4/zephyr-7b-gemma-v0.1', 'KatyTheCutie/LemonadeRP-4.5.3', 'Kooten/LemonadeRP-4.5.3-4bpw-exl2', 'sophosympatheia/Midnight-Miqu-103B-v1.0', 'soketlabs/pragna-1b', 'remyxai/SpaceLLaVA', 'Efficient-Large-Model/VILA-2.7b', 'hiyouga/Llama-2-70b-AQLM-2Bit-QLoRA-function-calling', 'occiglot/occiglot-7b-de-en-instruct', 'erythropygia/Gemma2b-Turkish-Instruction', 'state-spaces/mamba-2.8b-hf', 'state-spaces/mamba-130m-hf', 'zamal/gemma-7b-finetuned', 'Divyanshu04/LLM3', 'yam-peleg/Hebrew-Gemma-11B', 'yam-peleg/Hebrew-Gemma-11B-Instruct', 'stabilityai/stable-code-instruct-3b', 'Gille/StrangeMerges_35-7B-slerp', 'stanford-oval/llama-7b-wikiwebquestions', 'cstr/Spaetzle-v8-7b', 'ChaoticNeutrals/BuRP_7B', 'cstr/Spaetzle-v12-7b', 'lightblue/ao-karasu-72B', 'NousResearch/Hermes-2-Pro-Mistral-7B', 'hiieu/Vistral-7B-Chat-function-calling', 'CohereForAI/c4ai-command-r-v01', 'ND911/Franken-Mistral-Merlinite-Maid', 'fhai50032/Mistral-4B', 'meta-llama/CodeLlama-7b-Python-hf', 'meta-llama/CodeLlama-7b-Instruct-hf', 'meta-llama/CodeLlama-13b-hf', 'meta-llama/CodeLlama-13b-Instruct-hf', 'ministral/Ministral-3b-instruct', 'CohereForAI/c4ai-command-r-v01-4bit', 'KissanAI/Dhenu-vision-lora-0.1', 'MaziyarPanahi/Calme-7B-Instruct-v0.2', 'icefog72/Kunokukulemonchini-7b-4.1bpw-exl2', 'ChaoticNeutrals/Infinitely-Laydiculous-7B', 'Virt-io/Nina-v2-7B', 'BAAI/bge-reranker-v2-minicpm-layerwise', 'NexaAIDev/Octopus-v2', 'jhu-clsp/FollowIR-7B', 'cais/HarmBench-Mistral-7b-val-cls', 'ezelikman/quietstar-8-ahead', 'szymonrucinski/Krakowiak-7B-v3', 'FluffyKaeloky/Midnight-Miqu-103B-v1.5', 'Nekochu/Confluence-Renegade-7B', 'fxmarty/tiny-dummy-qwen2', 'ytu-ce-cosmos/turkish-gpt2-large-750m-instruct-v0.1', 'ChaoticNeutrals/Eris_PrimeV3-Vision-7B', 'somosnlp/Sam_Diagnostic', 'google/codegemma-2b', 'google/codegemma-7b', 'google/codegemma-7b-it', 'stabilityai/stablelm-2-12b', 'unsloth/mistral-7b-v0.2-bnb-4bit', 'Praneeth/code-gemma-2b-it', 'Inv/Konstanta-V4-Alpha-7B', 'liminerity/e.star.7.b', 'Sahi19/Gemma2bLegalChatbot', 'gokaygokay/moondream-prompt', 'YanweiLi/MGM-7B', 'beomi/gemma-ko-2b', 'Anant58/Genshin-chat-ARM', 'thtskaran/sanskritayam-gpt', 'Natkituwu/Erosumika-7B-v3-7.1bpw-exl2', 'MarsupialAI/SkunkApe-14b', 'google/gemma-1.1-7b-it', 'Smuggling1710/InfinToppyKuno-DARE-7b', 'botbot-ai/CabraQwen7b', 'bsen26/113-Aspect-Emotion-Model', 'arcee-ai/Saul-Nous-Hermes-2-Mistral-7B-DPO-Ties', 'cognitivecomputations/dolphin-2.8-mistral-7b-v02', 'ai21labs/Jamba-v0.1', 'grimjim/Mistral-Starling-merge-trial1-7B', 'mikewang/PVD-160k-Mistral-7b', 'Eurdem/Pinokio_v1.0', 'keeeeenw/MicroLlama', '1bitLLM/bitnet_b1_58-3B', '1bitLLM/bitnet_b1_58-xl', '1bitLLM/bitnet_b1_58-large', 'EdBerg/MISTRALNEURAL-7B-slerp', 'Kukedlc/Neural-4-QA-7b']
114
 
115
- # Define quantization types
116
- QUANT_TYPES = ["Q2_K", "Q3_K_l", "Q3_K_M", "Q3_K_S", "Q4_0", "Q4_1", "Q4_K_M", "Q4_K_S", "Q5_0", "Q5_1", "Q5_K_M", "Q5_K_S", "Q6_K", "Q8_0", "BF16", "F16", "F32"]
117
 
118
- QUANT_DESCRIPTIONS = {
119
- "Q2_K": "Smallest size, acceptable for simple tasks",
120
- "Q3_K_l": "Good balance for lightweight applications",
121
- "Q3_K_M": "Medium quality, good for general text generation",
122
- "Q3_K_S": "Small size, suitable for simple interactions",
123
- "Q4_0": "Legacy format, basic compression",
124
- "Q4_1": "Better than Q4_0, good general purpose",
125
- "Q4_K_M": "Recommended for most uses, good balance",
126
- "Q4_K_S": "Smaller than Q4_K_M, still good quality",
127
- "Q5_0": "Higher precision than Q4, legacy format",
128
- "Q5_1": "Improved Q5, good for complex tasks",
129
- "Q5_K_M": "High quality, larger size, good for complex reasoning",
130
- "Q5_K_S": "Balanced quality and size in Q5 family",
131
- "Q6_K": "Very high quality, larger size",
132
- "Q8_0": "Highest quality quantized, largest size",
133
- "BF16": "Brain Float 16, good for GPU inference",
134
- "F16": "Full 16-bit precision, high accuracy",
135
- "F32": "Full 32-bit precision, highest accuracy, largest size"
136
- }
137
 
138
- model_dir_path = check_directory_path("/app/llama.cpp")
139
 
140
- def download_model(hf_model_name, output_dir="/tmp/models"):
141
- """
142
- Downloads a Hugging Face model and saves it locally.
143
- """
144
- st.write(f"๐Ÿ“ฅ Downloading `{hf_model_name}` from Hugging Face...")
145
- os.makedirs(output_dir, exist_ok=True)
146
- snapshot_download(repo_id=hf_model_name, local_dir=output_dir, local_dir_use_symlinks=False)
147
- st.success("โœ… Model downloaded successfully!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
- def convert_to_gguf(model_dir, output_file):
150
- """
151
- Converts a Hugging Face model to GGUF format.
152
- """
153
- st.write(f"๐Ÿ”„ Converting `{model_dir}` to GGUF format...")
154
- os.makedirs(os.path.dirname(output_file), exist_ok=True)
155
- cmd = [
156
- "python3", "/app/llama.cpp/convert_hf_to_gguf.py", model_dir,
157
- "--outfile", output_file
158
- ]
159
- process = subprocess.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
160
- if process.returncode == 0:
161
- st.success(f"โœ… Conversion complete: `{output_file}`")
162
  else:
163
- st.error(f"โŒ Conversion failed: {process.stderr}")
164
 
165
- def quantize_llama(model_path, quantized_output_path, quant_type):
166
- """
167
- Quantizes a GGUF model.
168
- """
169
- st.write(f"โšก Quantizing `{model_path}` with `{quant_type}` precision...")
170
- os.makedirs(os.path.dirname(quantized_output_path), exist_ok=True)
171
- quantize_path = "/app/llama.cpp/build/bin/llama-quantize"
172
-
173
- cmd = [
174
- "/app/llama.cpp/build/bin/llama-quantize",
175
- model_path,
176
- quantized_output_path,
177
- quant_type
178
- ]
179
-
180
- process = subprocess.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
 
182
- if process.returncode == 0:
183
- st.success(f"โœ… Quantized model saved at `{quantized_output_path}`")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  else:
185
- st.error(f"โŒ Quantization failed: {process.stderr}")
 
 
186
 
187
- def automate_llama_quantization(hf_model_name, quant_type):
188
- """
189
- Orchestrates the entire quantization process.
190
- """
191
- output_dir = "/tmp/models"
192
- gguf_file = os.path.join(output_dir, f"{hf_model_name.replace('/', '_')}.gguf")
193
- quantized_file = gguf_file.replace(".gguf", f"-{quant_type}.gguf")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
 
195
- progress_bar = st.progress(0)
 
196
 
197
- # Step 1: Download
198
- st.write("### Step 1: Downloading Model")
199
- download_model(hf_model_name, output_dir)
200
- progress_bar.progress(33)
201
 
202
- # Step 2: Convert to GGUF
203
- st.write("### Step 2: Converting Model to GGUF Format")
204
- convert_to_gguf(output_dir, gguf_file)
205
- progress_bar.progress(66)
 
 
206
 
207
- # Step 3: Quantize Model
208
- st.write("### Step 3: Quantizing Model")
209
- quantize_llama(gguf_file, quantized_file, quant_type.lower())
210
- progress_bar.progress(100)
211
 
212
- st.success(f"๐ŸŽ‰ All steps completed! Quantized model available at: `{quantized_file}`")
213
- return quantized_file
214
 
215
- def upload_to_huggingface(file_path, repo_id, token):
216
- """
217
- Uploads a file to Hugging Face Hub.
218
- """
219
- try:
220
- # Log in to Hugging Face
221
- login(token=token)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
 
223
- # Initialize HfApi
224
- api = HfApi()
 
 
 
 
 
225
 
226
- # Create the repository if it doesn't exist
227
- api.create_repo(repo_id, exist_ok=True, repo_type="model")
 
 
228
 
229
- # Upload the file
230
- api.upload_file(
231
- path_or_fileobj=file_path,
232
- path_in_repo=os.path.basename(file_path),
233
- repo_id=repo_id,
234
- )
235
- st.success(f"โœ… File uploaded to Hugging Face: {repo_id}")
236
 
237
- # Reset session state and rerun
238
- st.session_state.quantized_model_path = None
239
- st.session_state.upload_to_hf = False
240
- st.rerun()
241
- except Exception as e:
242
- st.error(f"โŒ Failed to upload file: {e}")
 
243
 
244
- st.title("๐Ÿฆ™ LLaMA Model Quantization (llama.cpp)")
 
 
 
 
 
245
 
 
246
 
247
- selected_model = st.selectbox("Select the Hugging Face Model", models_list, index=None)
248
- hf_model_name = selected_model if selected_model else st.text_input("Enter Hugging Face Model (If not there in the above list)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
 
250
- selected_quant = st.selectbox(
251
- "Select Quantization Type",
252
- QUANT_TYPES,
253
- help="Hover over options to see descriptions",
254
- format_func=lambda x: f"{x} - {QUANT_DESCRIPTIONS[x]}"
255
- )
256
- start_button = st.button("๐Ÿš€ Start Quantization")
257
 
258
- if start_button:
259
- if hf_model_name and selected_quant:
260
- with st.spinner("Processing..."):
261
- st.session_state.quantized_model_path = automate_llama_quantization(hf_model_name, selected_quant)
 
 
 
 
 
 
 
 
262
  else:
263
- st.warning("Please select/enter the necessary fields.")
264
 
265
- if st.session_state.quantized_model_path:
266
- with open(st.session_state.quantized_model_path, "rb") as f:
267
- if st.download_button("โฌ‡๏ธ Download Quantized Model", f, file_name=os.path.basename(st.session_state.quantized_model_path)):
268
- st.session_state.quantized_model_path = None
269
- st.session_state.upload_to_hf = False
270
- st.rerun()
271
-
272
- # Checkbox for upload section
273
- st.session_state.upload_to_hf = st.checkbox("Upload to Hugging Face", value=st.session_state.upload_to_hf)
274
-
275
- if st.session_state.upload_to_hf:
276
- st.write("### Upload to Hugging Face")
277
- repo_id = st.text_input("Enter Hugging Face Repository ID (e.g., 'username/repo-name')")
278
- hf_token = st.text_input("Enter Hugging Face Token", type="password")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
 
280
- if st.button("๐Ÿ“ค Upload to Hugging Face"):
281
- if repo_id and hf_token:
282
- with st.spinner("Uploading..."):
283
- upload_to_huggingface(st.session_state.quantized_model_path, repo_id, hf_token)
 
 
 
 
 
 
 
 
 
 
 
 
284
  else:
285
- st.warning("Please provide a valid repository ID and Hugging Face token.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import json
3
  import subprocess
4
+ import os
5
+ from llama_cpp import Llama
6
+ from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
7
+ from llama_cpp_agent.providers import LlamaCppPythonProvider
8
+ from llama_cpp_agent.chat_history import BasicChatHistory
9
+ from llama_cpp_agent.chat_history.messages import Roles
10
+ import gradio as gr
11
+ from huggingface_hub import hf_hub_download
12
+ import tempfile
13
+ from typing import List, Tuple, Optional
14
+
15
+ # PDF ์ฒ˜๋ฆฌ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ์กฐ๊ฑด๋ถ€ import
16
+ try:
17
+ from docling.document_converter import DocumentConverter
18
+ DOCLING_AVAILABLE = True
19
+ except ImportError:
20
+ DOCLING_AVAILABLE = False
21
+ print("Docling not available, using alternative PDF processing")
22
+ try:
23
+ import PyPDF2
24
+ import pdfplumber
25
+ except ImportError:
26
+ print("Warning: PDF processing libraries not fully installed")
27
 
28
+ # ํ™˜๊ฒฝ ๋ณ€์ˆ˜์—์„œ HF_TOKEN ๊ฐ€์ ธ์˜ค๊ธฐ
29
+ HF_TOKEN = os.getenv("HF_TOKEN")
 
 
30
 
31
+ # ์ „์—ญ ๋ณ€์ˆ˜ ์ดˆ๊ธฐํ™” (์ค‘์š”!)
32
+ llm = None
33
+ llm_model = None
34
+ document_context = "" # PDF์—์„œ ์ถ”์ถœํ•œ ๋ฌธ์„œ ์ปจํ…์ŠคํŠธ ์ €์žฅ
35
+ document_filename = "" # ํ˜„์žฌ ๋กœ๋“œ๋œ ๋ฌธ์„œ์˜ ํŒŒ์ผ๋ช…
36
 
37
+ print("์ „์—ญ ๋ณ€์ˆ˜ ์ดˆ๊ธฐํ™” ์™„๋ฃŒ")
38
+ print(f"document_context ์ดˆ๊ธฐ๊ฐ’: '{document_context}'")
39
+ print(f"document_filename ์ดˆ๊ธฐ๊ฐ’: '{document_filename}'")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
+ # ๋ชจ๋ธ ์ด๋ฆ„๊ณผ ๊ฒฝ๋กœ๋ฅผ ์ •์˜
42
+ MISTRAL_MODEL_NAME = "Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf"
43
 
44
+ # ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ (HF_TOKEN ์‚ฌ์šฉ)
45
+ model_path = hf_hub_download(
46
+ repo_id="ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503",
47
+ filename=MISTRAL_MODEL_NAME,
48
+ local_dir="./models",
49
+ token=HF_TOKEN
50
+ )
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
+ print(f"Downloaded model path: {model_path}")
53
 
54
+ css = """
55
+ .bubble-wrap {
56
+ padding-top: calc(var(--spacing-xl) * 3) !important;
57
+ }
58
+ .message-row {
59
+ justify-content: space-evenly !important;
60
+ width: 100% !important;
61
+ max-width: 100% !important;
62
+ margin: calc(var(--spacing-xl)) 0 !important;
63
+ padding: 0 calc(var(--spacing-xl) * 3) !important;
64
+ }
65
+ .flex-wrap.user {
66
+ border-bottom-right-radius: var(--radius-lg) !important;
67
+ }
68
+ .flex-wrap.bot {
69
+ border-bottom-left-radius: var(--radius-lg) !important;
70
+ }
71
+ .message.user{
72
+ padding: 10px;
73
+ }
74
+ .message.bot{
75
+ text-align: right;
76
+ width: 100%;
77
+ padding: 10px;
78
+ border-radius: 10px;
79
+ }
80
+ .message-bubble-border {
81
+ border-radius: 6px !important;
82
+ }
83
+ .message-buttons {
84
+ justify-content: flex-end !important;
85
+ }
86
+ .message-buttons-left {
87
+ align-self: end !important;
88
+ }
89
+ .message-buttons-bot, .message-buttons-user {
90
+ right: 10px !important;
91
+ left: auto !important;
92
+ bottom: 2px !important;
93
+ }
94
+ .dark.message-bubble-border {
95
+ border-color: #343140 !important;
96
+ }
97
+ .dark.user {
98
+ background: #1e1c26 !important;
99
+ }
100
+ .dark.assistant.dark, .dark.pending.dark {
101
+ background: #16141c !important;
102
+ }
103
+ .upload-container {
104
+ margin-bottom: 20px;
105
+ padding: 15px;
106
+ border: 2px dashed #666;
107
+ border-radius: 10px;
108
+ background-color: #f0f0f0;
109
+ }
110
+ .dark .upload-container {
111
+ background-color: #292733;
112
+ border-color: #444;
113
+ }
114
+ """
115
 
116
+ def get_messages_formatter_type(model_name):
117
+ if "Mistral" in model_name or "BitSix" in model_name:
118
+ return MessagesFormatterType.MISTRAL # CHATML ๋Œ€์‹  MISTRAL ํ˜•์‹ ์‚ฌ์šฉ
 
 
 
 
 
 
 
 
 
 
119
  else:
120
+ raise ValueError(f"Unsupported model: {model_name}")
121
 
122
+ @spaces.GPU
123
+ def convert_pdf_to_markdown(file):
124
+ """PDF ํŒŒ์ผ์„ Markdown์œผ๋กœ ๋ณ€ํ™˜"""
125
+ global document_context, document_filename
126
+
127
+ if file is None:
128
+ return "ํŒŒ์ผ์ด ์—…๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.", {}
129
+
130
+ try:
131
+ print(f"\n=== PDF ๋ณ€ํ™˜ ์‹œ์ž‘ ===")
132
+ print(f"ํŒŒ์ผ ๊ฒฝ๋กœ: {file.name}")
133
+
134
+ # DocumentConverter ์ธ์Šคํ„ด์Šค ์ƒ์„ฑ
135
+ converter = DocumentConverter()
136
+
137
+ # ํŒŒ์ผ ๋ณ€ํ™˜
138
+ result = converter.convert(file.name)
139
+
140
+ # Markdown์œผ๋กœ ๋‚ด๋ณด๋‚ด๊ธฐ
141
+ markdown_content = result.document.export_to_markdown()
142
+
143
+ # ๋ฌธ์„œ ์ปจํ…์ŠคํŠธ ์—…๋ฐ์ดํŠธ (์ค‘์š”!)
144
+ document_context = markdown_content
145
+ document_filename = os.path.basename(file.name)
146
+
147
+ # ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ์ถ”์ถœ
148
+ metadata = {
149
+ "filename": document_filename,
150
+ "conversion_status": "success",
151
+ "content_length": len(markdown_content),
152
+ "preview": markdown_content[:500] + "..." if len(markdown_content) > 500 else markdown_content
153
+ }
154
+
155
+ print(f"โœ… PDF ๋ณ€ํ™˜ ์„ฑ๊ณต!")
156
+ print(f"๐Ÿ“„ ํŒŒ์ผ๋ช…: {document_filename}")
157
+ print(f"๐Ÿ“ ๋ฌธ์„œ ๊ธธ์ด: {len(markdown_content)} ๋ฌธ์ž")
158
+ print(f"๐Ÿ“ ๋ฌธ์„œ ์‹œ์ž‘ 300์ž:\n{markdown_content[:300]}...")
159
+ print(f"=== PDF ๋ณ€ํ™˜ ์™„๋ฃŒ ===\n")
160
+
161
+ # ์ „์—ญ ๋ณ€์ˆ˜ ํ™•์ธ ๋ฐ ๊ฐ•์ œ ์„ค์ •
162
+ print(f"\n=== ์ „์—ญ ๋ณ€์ˆ˜ ์„ค์ • ์ „ ===")
163
+ print(f"global document_context ๊ธธ์ด: {len(document_context)}")
164
+ print(f"global document_filename: {document_filename}")
165
+
166
+ # globals() ํ•จ์ˆ˜๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ๊ฐ•์ œ๋กœ ์ „์—ญ ๋ณ€์ˆ˜ ์„ค์ •
167
+ globals()['document_context'] = markdown_content
168
+ globals()['document_filename'] = document_filename
169
+
170
+ print(f"\n=== ์ „์—ญ ๋ณ€์ˆ˜ ์„ค์ • ํ›„ ===")
171
+ print(f"global document_context ๊ธธ์ด: {len(globals()['document_context'])}")
172
+ print(f"global document_filename: {globals()['document_filename']}")
173
+
174
+ return markdown_content, metadata
175
+
176
+ except Exception as e:
177
+ error_msg = f"PDF ๋ณ€ํ™˜ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
178
+ print(f"โŒ {error_msg}")
179
+ document_context = ""
180
+ document_filename = ""
181
+ return error_msg, {"error": str(e)}
182
 
183
+ def find_relevant_chunks(document, query, chunk_size=1500, overlap=300):
184
+ """๋ฌธ์„œ์—์„œ ์งˆ๋ฌธ๊ณผ ๊ด€๋ จ๋œ ์ฒญํฌ ์ฐพ๊ธฐ"""
185
+ if not document:
186
+ return ""
187
+
188
+ print(f"๊ด€๋ จ ์ฒญํฌ ์ฐพ๊ธฐ ์‹œ์ž‘ - ์ฟผ๋ฆฌ: {query}")
189
+
190
+ # ๊ฐ„๋‹จํ•œ ํ‚ค์›Œ๋“œ ๊ธฐ๋ฐ˜ ๊ฒ€์ƒ‰
191
+ query_words = query.lower().split()
192
+ chunks = []
193
+
194
+ # ๋ฌธ์„œ๋ฅผ ์ฒญํฌ๋กœ ๋‚˜๋ˆ„๊ธฐ
195
+ for i in range(0, len(document), chunk_size - overlap):
196
+ chunk = document[i:i + chunk_size]
197
+ chunks.append((i, chunk))
198
+
199
+ print(f"์ด {len(chunks)}๊ฐœ์˜ ์ฒญํฌ๋กœ ๋ถ„ํ• ๋จ")
200
+
201
+ # ๊ฐ ์ฒญํฌ์˜ ๊ด€๋ จ์„ฑ ์ ์ˆ˜ ๊ณ„์‚ฐ
202
+ scored_chunks = []
203
+ for idx, chunk in chunks:
204
+ chunk_lower = chunk.lower()
205
+ score = sum(1 for word in query_words if word in chunk_lower)
206
+ if score > 0:
207
+ scored_chunks.append((score, idx, chunk))
208
+
209
+ # ์ƒ์œ„ 2๊ฐœ ์ฒญํฌ ์„ ํƒ (๋ฉ”๋ชจ๋ฆฌ ์ ˆ์•ฝ)
210
+ scored_chunks.sort(reverse=True, key=lambda x: x[0])
211
+ relevant_chunks = scored_chunks[:2]
212
+
213
+ if relevant_chunks:
214
+ result = ""
215
+ for score, idx, chunk in relevant_chunks:
216
+ result += f"\n[๋ฌธ์„œ์˜ {idx}๋ฒˆ์งธ ์œ„์น˜์—์„œ ๋ฐœ์ทŒ - ๊ด€๋ จ๋„: {score}]\n{chunk}\n"
217
+ print(f"{len(relevant_chunks)}๊ฐœ์˜ ๊ด€๋ จ ์ฒญํฌ ์ฐพ์Œ")
218
+ return result
219
  else:
220
+ # ๊ด€๋ จ ์ฒญํฌ๋ฅผ ์ฐพ์ง€ ๋ชปํ•œ ๊ฒฝ์šฐ ๋ฌธ์„œ ์‹œ์ž‘ ๋ถ€๋ถ„ ๋ฐ˜ํ™˜
221
+ print("๊ด€๋ จ ์ฒญํฌ๋ฅผ ์ฐพ์ง€ ๋ชปํ•จ, ๋ฌธ์„œ ์‹œ์ž‘ ๋ถ€๋ถ„ ๋ฐ˜ํ™˜")
222
+ return document[:2000]
223
 
224
+ @spaces.GPU(duration=120)
225
+ def respond(
226
+ message,
227
+ history: list[dict],
228
+ system_message,
229
+ max_tokens,
230
+ temperature,
231
+ top_p,
232
+ top_k,
233
+ repeat_penalty,
234
+ ):
235
+ global llm, llm_model
236
+
237
+ # globals()๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ์ „์—ญ ๋ณ€์ˆ˜์— ์ ‘๊ทผ
238
+ document_context = globals().get('document_context', '')
239
+ document_filename = globals().get('document_filename', '')
240
+
241
+ # ๋””๋ฒ„๊น…์„ ์œ„ํ•œ ์ƒ์„ธ ๋กœ๊ทธ
242
+ print(f"\n=== RESPOND ํ•จ์ˆ˜ ์‹œ์ž‘ ===")
243
+ print(f"์‚ฌ์šฉ์ž ๋ฉ”์‹œ์ง€: {message}")
244
+ print(f"๋ฌธ์„œ ์ปจํ…์ŠคํŠธ ์กด์žฌ ์—ฌ๋ถ€: {bool(document_context)}")
245
+ if document_context:
246
+ print(f"๋ฌธ์„œ ๊ธธ์ด: {len(document_context)}")
247
+ print(f"๋ฌธ์„œ ํŒŒ์ผ๋ช…: {document_filename}")
248
+ print(f"๋ฌธ์„œ ์‹œ์ž‘ 100์ž: {document_context[:100]}...")
249
+ else:
250
+ print("โš ๏ธ document_context๊ฐ€ ๋น„์–ด์žˆ์Šต๋‹ˆ๋‹ค!")
251
+ print(f"globals()์˜ ํ‚ค๋“ค: {list(globals().keys())[:20]}...") # ์ฒ˜์Œ 20๊ฐœ ํ‚ค๋งŒ
252
+
253
+ chat_template = get_messages_formatter_type(MISTRAL_MODEL_NAME)
254
+
255
+ # ๋ชจ๋ธ ํŒŒ์ผ ๊ฒฝ๋กœ ํ™•์ธ
256
+ model_path_local = os.path.join("./models", MISTRAL_MODEL_NAME)
257
+
258
+ if llm is None or llm_model != MISTRAL_MODEL_NAME:
259
+ print("LLM ๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘...")
260
+ llm = Llama(
261
+ model_path=model_path_local,
262
+ flash_attn=True,
263
+ n_gpu_layers=81,
264
+ n_batch=1024,
265
+ n_ctx=16384, # ์ปจํ…์ŠคํŠธ ํฌ๊ธฐ
266
+ verbose=True # ๋””๋ฒ„๊น…์„ ์œ„ํ•œ ์ƒ์„ธ ๋กœ๊ทธ
267
+ )
268
+ llm_model = MISTRAL_MODEL_NAME
269
+ print("LLM ๋ชจ๋ธ ๋กœ๋”ฉ ์™„๋ฃŒ!")
270
+
271
+ provider = LlamaCppPythonProvider(llm)
272
+
273
+ # ํ•œ๊ตญ์–ด ๋‹ต๋ณ€์„ ์œ„ํ•œ ๊ธฐ๋ณธ ์‹œ์Šคํ…œ ๋ฉ”์‹œ์ง€
274
+ korean_system_message = system_message # ์‚ฌ์šฉ์ž๊ฐ€ ์„ค์ •ํ•œ ์‹œ์Šคํ…œ ๋ฉ”์‹œ์ง€ ์‚ฌ์šฉ
275
+
276
+ # ๋ฌธ์„œ ์ปจํ…์ŠคํŠธ๊ฐ€ ์žˆ์œผ๋ฉด ์‹œ์Šคํ…œ ๋ฉ”์‹œ์ง€์™€ ์‚ฌ์šฉ์ž ๋ฉ”์‹œ์ง€ ๋ชจ๋‘์— ํฌํ•จ
277
+ if document_context and len(document_context) > 0:
278
+ doc_length = len(document_context)
279
+ print(f"๐Ÿ“„ ๋ฌธ์„œ ์ปจํ…์ŠคํŠธ๋ฅผ ๋ฉ”์‹œ์ง€์— ํฌํ•จํ•ฉ๋‹ˆ๋‹ค: {doc_length} ๋ฌธ์ž")
280
+
281
+ # ์‹œ์Šคํ…œ ๋ฉ”์‹œ์ง€์—๋„ ๋ฌธ์„œ ์ •๋ณด ์ถ”๊ฐ€
282
+ korean_system_message += f"\n\nํ˜„์žฌ '{document_filename}' PDF ๋ฌธ์„œ๊ฐ€ ๋กœ๋“œ๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค. ์‚ฌ์šฉ์ž์˜ ๋ชจ๋“  ์งˆ๋ฌธ์— ๋Œ€ํ•ด ์ด ๋ฌธ์„œ์˜ ๋‚ด์šฉ์„ ๋ฐ˜๋“œ์‹œ ์ฐธ์กฐํ•˜์—ฌ ๋‹ต๋ณ€ํ•˜์„ธ์š”."
283
+
284
+ # ๋ฌธ์„œ ๋‚ด์šฉ์„ ์ ์ ˆํ•œ ํฌ๊ธฐ๋กœ ์ œํ•œ
285
+ max_doc_length = 4000 # ์ตœ๋Œ€ 4000์ž๋กœ ์ œํ•œ
286
+ if doc_length > max_doc_length:
287
+ # ๋ฌธ์„œ๊ฐ€ ๋„ˆ๋ฌด ๊ธด ๊ฒฝ์šฐ ์ฒ˜์Œ๊ณผ ๋ ๋ถ€๋ถ„๋งŒ ํฌํ•จ
288
+ doc_snippet = document_context[:2000] + "\n\n[... ์ค‘๊ฐ„ ๋‚ด์šฉ ์ƒ๋žต ...]\n\n" + document_context[-1500:]
289
+ enhanced_message = f"""์—…๋กœ๋“œ๋œ PDF ๋ฌธ์„œ ์ •๋ณด:
290
+ - ํŒŒ์ผ๋ช…: {document_filename}
291
+ - ๋ฌธ์„œ ๊ธธ์ด: {doc_length} ๋ฌธ์ž
292
 
293
+ ๋ฌธ์„œ ๋‚ด์šฉ (์ผ๋ถ€):
294
+ {doc_snippet}
295
 
296
+ ์‚ฌ์šฉ์ž ์งˆ๋ฌธ: {message}
 
 
 
297
 
298
+ ์œ„ ๋ฌธ์„œ๋ฅผ ์ฐธ๊ณ ํ•˜์—ฌ ํ•œ๊ตญ์–ด๋กœ ๋‹ต๋ณ€ํ•ด์ฃผ์„ธ์š”."""
299
+ else:
300
+ # ์งง์€ ๋ฌธ์„œ๋Š” ์ „์ฒด ํฌํ•จ
301
+ enhanced_message = f"""์—…๋กœ๋“œ๋œ PDF ๋ฌธ์„œ ์ •๋ณด:
302
+ - ํŒŒ์ผ๋ช…: {document_filename}
303
+ - ๋ฌธ์„œ ๊ธธ์ด: {doc_length} ๋ฌธ์ž
304
 
305
+ ๋ฌธ์„œ ๋‚ด์šฉ:
306
+ {document_context}
 
 
307
 
308
+ ์‚ฌ์šฉ์ž ์งˆ๋ฌธ: {message}
 
309
 
310
+ ์œ„ ๋ฌธ์„œ๋ฅผ ์ฐธ๊ณ ํ•˜์—ฌ ํ•œ๊ตญ์–ด๋กœ ๋‹ต๋ณ€ํ•ด์ฃผ์„ธ์š”."""
311
+
312
+ print(f"๊ฐ•ํ™”๋œ ๋ฉ”์‹œ์ง€ ๊ธธ์ด: {len(enhanced_message)}")
313
+ print(f"๋ฉ”์‹œ์ง€ ๋ฏธ๋ฆฌ๋ณด๊ธฐ (์ฒ˜์Œ 300์ž):\n{enhanced_message[:300]}...")
314
+
315
+ # ๋””๋ฒ„๊ทธ: ์ตœ์ข… ๋ฉ”์‹œ์ง€ ํŒŒ์ผ๋กœ ์ €์žฅ (ํ™•์ธ์šฉ)
316
+ with open("debug_last_message.txt", "w", encoding="utf-8") as f:
317
+ f.write(f"=== ๋””๋ฒ„๊ทธ ์ •๋ณด ===\n")
318
+ f.write(f"๋ฌธ์„œ ๊ธธ์ด: {len(document_context)}\n")
319
+ f.write(f"ํŒŒ์ผ๋ช…: {document_filename}\n")
320
+ f.write(f"์‚ฌ์šฉ์ž ์งˆ๋ฌธ: {message}\n")
321
+ f.write(f"\n=== ์ „์†ก๋  ๋ฉ”์‹œ์ง€ ===\n")
322
+ f.write(enhanced_message)
323
+ else:
324
+ # ๋ฌธ์„œ๊ฐ€ ์—†๋Š” ๊ฒฝ์šฐ
325
+ enhanced_message = message
326
+ if any(keyword in message.lower() for keyword in ["๋ฌธ์„œ", "pdf", "์—…๋กœ๋“œ", "ํŒŒ์ผ", "๋‚ด์šฉ", "์š”์•ฝ"]):
327
+ enhanced_message = f"{message}\n\n[์‹œ์Šคํ…œ ๋ฉ”์‹œ์ง€: ํ˜„์žฌ ์—…๋กœ๋“œ๋œ PDF ๋ฌธ์„œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค. PDF ํŒŒ์ผ์„ ๋จผ์ € ์—…๋กœ๋“œํ•ด์ฃผ์„ธ์š”.]"
328
+ print("๋ฌธ์„œ ๊ด€๋ จ ์งˆ๋ฌธ์ด์ง€๋งŒ ๋ฌธ์„œ๊ฐ€ ์—†์Œ")
329
+
330
+ # ๋””๋ฒ„๊ทธ ๋ฉ”์‹œ์ง€
331
+ print("โš ๏ธ ๊ฒฝ๊ณ : document_context๊ฐ€ ๋น„์–ด์žˆ์Šต๋‹ˆ๋‹ค!")
332
+ print(f"document_context ํƒ€์ž…: {type(document_context)}")
333
+ print(f"document_context ๊ฐ’: {repr(document_context)}")
334
+ print(f"document_filename: {document_filename}")
335
 
336
+ settings = provider.get_provider_default_settings()
337
+ settings.temperature = temperature
338
+ settings.top_k = top_k
339
+ settings.top_p = top_p
340
+ settings.max_tokens = max_tokens
341
+ settings.repeat_penalty = repeat_penalty
342
+ settings.stream = True
343
 
344
+ # ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ์— ๋ฌธ์„œ ๋‚ด์šฉ ์ง์ ‘ ํฌํ•จ (๋ฌธ์„œ๊ฐ€ ์žˆ๋Š” ๊ฒฝ์šฐ)
345
+ if document_context and len(document_context) > 0:
346
+ doc_snippet = document_context[:3000] # ์ฒ˜์Œ 3000์ž๋งŒ ์‚ฌ์šฉ
347
+ enhanced_system_prompt = f"""{korean_system_message}
348
 
349
+ ํ˜„์žฌ ๋กœ๋“œ๋œ PDF ๋ฌธ์„œ:
350
+ ํŒŒ์ผ๋ช…: {document_filename}
351
+ ๋ฌธ์„œ ๋‚ด์šฉ:
352
+ {doc_snippet}
353
+ {'' if len(document_context) <= 3000 else '... (์ดํ•˜ ์ƒ๋žต)'}
 
 
354
 
355
+ ์œ„ ๋ฌธ์„œ์˜ ๋‚ด์šฉ์„ ๋ฐ”ํƒ•์œผ๋กœ ์‚ฌ์šฉ์ž์˜ ์งˆ๋ฌธ์— ๋‹ต๋ณ€ํ•˜์„ธ์š”."""
356
+
357
+ # ์‚ฌ์šฉ์ž ๋ฉ”์‹œ์ง€๋Š” ๋‹จ์ˆœํ•˜๊ฒŒ
358
+ final_message = message
359
+ else:
360
+ enhanced_system_prompt = korean_system_message
361
+ final_message = enhanced_message
362
 
363
+ agent = LlamaCppAgent(
364
+ provider,
365
+ system_prompt=enhanced_system_prompt,
366
+ predefined_messages_formatter_type=chat_template,
367
+ debug_output=True
368
+ )
369
 
370
+ messages = BasicChatHistory()
371
 
372
+ # ์ด์ „ ๋Œ€ํ™” ๊ธฐ๋ก ์ถ”๊ฐ€ (์ˆ˜์ •๋จ)
373
+ for i in range(0, len(history)):
374
+ # ํ˜„์žฌ ๋ฉ”์‹œ์ง€๋Š” ์ œ์™ธ
375
+ if i < len(history) - 1 and history[i][1] is not None:
376
+ # ์‚ฌ์šฉ์ž ๋ฉ”์‹œ์ง€
377
+ messages.add_message({
378
+ 'role': Roles.user,
379
+ 'content': history[i][0]
380
+ })
381
+ # ์–ด์‹œ์Šคํ„ดํŠธ ๋ฉ”์‹œ์ง€
382
+ messages.add_message({
383
+ 'role': Roles.assistant,
384
+ 'content': history[i][1]
385
+ })
386
+
387
+ print(f"์ตœ์ข… ๋ฉ”์‹œ์ง€ ์ „์†ก ์ค‘: {final_message}")
388
+
389
+ # ์ŠคํŠธ๋ฆผ ์‘๋‹ต ์ƒ์„ฑ
390
+ try:
391
+ stream = agent.get_chat_response(
392
+ final_message, # ๋‹จ์ˆœํ•œ ๋ฉ”์‹œ์ง€ ์‚ฌ์šฉ
393
+ llm_sampling_settings=settings,
394
+ chat_history=messages,
395
+ returns_streaming_generator=True,
396
+ print_output=False
397
+ )
398
+
399
+ outputs = ""
400
+ for output in stream:
401
+ outputs += output
402
+ yield outputs
403
+ except Exception as e:
404
+ print(f"์ŠคํŠธ๋ฆผ ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜: {e}")
405
+ yield "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ์‘๋‹ต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค. ๋‹ค์‹œ ์‹œ๋„ํ•ด์ฃผ์„ธ์š”."
406
 
407
+ def clear_document_context():
408
+ """๋ฌธ์„œ ์ปจํ…์ŠคํŠธ ์ดˆ๊ธฐํ™”"""
409
+ global document_context, document_filename
410
+ document_context = ""
411
+ document_filename = ""
412
+ return "๐Ÿ“ญ ๋ฌธ์„œ ์ปจํ…์ŠคํŠธ๊ฐ€ ์ดˆ๊ธฐํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค. ์ƒˆ๋กœ์šด PDF๋ฅผ ์—…๋กœ๋“œํ•ด์ฃผ์„ธ์š”."
 
413
 
414
+ def check_document_status():
415
+ """ํ˜„์žฌ ๋ฌธ์„œ ์ƒํƒœ ํ™•์ธ"""
416
+ global document_context, document_filename
417
+ print(f"\n=== ๋ฌธ์„œ ์ƒํƒœ ํ™•์ธ ===")
418
+ print(f"document_context ํƒ€์ž…: {type(document_context)}")
419
+ print(f"document_context ๊ธธ์ด: {len(document_context) if document_context else 0}")
420
+ print(f"document_filename: '{document_filename}'")
421
+
422
+ if document_context and len(document_context) > 0:
423
+ status = f"โœ… ๋ฌธ์„œ๊ฐ€ ๋กœ๋“œ๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค.\n๐Ÿ“„ ํŒŒ์ผ๋ช…: {document_filename}\n๐Ÿ“ ๋ฌธ์„œ ๊ธธ์ด: {len(document_context):,} ๋ฌธ์ž"
424
+ print(f"๋ฌธ์„œ ์ฒซ 100์ž: {document_context[:100]}")
425
+ return status
426
  else:
427
+ return "๐Ÿ“ญ ๋กœ๋“œ๋œ ๋ฌธ์„œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค. PDF ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•ด์ฃผ์„ธ์š”."
428
 
429
+ # Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ
430
+ with gr.Blocks(theme=gr.themes.Soft(
431
+ primary_hue="blue",
432
+ secondary_hue="cyan",
433
+ neutral_hue="gray",
434
+ font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]
435
+ ).set(
436
+ body_background_fill="#f8f9fa",
437
+ block_background_fill="#ffffff",
438
+ block_border_width="1px",
439
+ block_title_background_fill="#e9ecef",
440
+ input_background_fill="#ffffff",
441
+ button_secondary_background_fill="#e9ecef",
442
+ border_color_accent="#dee2e6",
443
+ border_color_primary="#ced4da",
444
+ background_fill_secondary="#f8f9fa",
445
+ color_accent_soft="transparent",
446
+ code_background_fill="#f1f3f5",
447
+ ), css=css) as demo:
448
+
449
+ gr.Markdown("# ์˜จํ”„๋ ˆ๋ฏธ์Šค ์ตœ์ ํ™” 'LLM+RAG ๋ชจ๋ธ' ์„œ๋น„์Šค์Šค")
450
+ gr.Markdown("๐Ÿ“„ PDF ๋ฌธ์„œ๋ฅผ ์—…๋กœ๋“œํ•˜๋ฉด AI๊ฐ€ ๋ฌธ์„œ ๋‚ด์šฉ์„ ๋ถ„์„ํ•˜์—ฌ ์งˆ๋ฌธ์— ๋‹ต๋ณ€ํ•ฉ๋‹ˆ๋‹ค.")
451
+ gr.Markdown("๐Ÿ’ก ์‚ฌ์šฉ๋ฒ•: 1) ์•„๋ž˜์—์„œ PDF ์—…๋กœ๋“œ โ†’ 2) ๋ฌธ์„œ์— ๋Œ€ํ•œ ์งˆ๋ฌธ ์ž…๋ ฅ โ†’ 3) AI๊ฐ€ ํ•œ๊ตญ์–ด๋กœ ๋‹ต๋ณ€")
452
+
453
+ # ์ฑ„ํŒ… ์ธํ„ฐํŽ˜์ด์Šค๋ฅผ ์œ„์ชฝ์— ๋ฐฐ์น˜
454
+ with gr.Row():
455
+ with gr.Column():
456
+ # ์ฑ„ํŒ… ์ธํ„ฐํŽ˜์ด์Šค
457
+ chatbot = gr.Chatbot(elem_id="chatbot", height=500)
458
+ msg = gr.Textbox(
459
+ label="๋ฉ”์‹œ์ง€ ์ž…๋ ฅ",
460
+ placeholder="์งˆ๋ฌธ์„ ์ž…๋ ฅํ•˜์„ธ์š”... (PDF๋ฅผ ์—…๋กœ๋“œํ•˜๋ฉด ๋ฌธ์„œ ๋‚ด์šฉ์— ๋Œ€ํ•ด ์งˆ๋ฌธํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค)",
461
+ lines=2
462
+ )
463
+ with gr.Row():
464
+ submit = gr.Button("์ „์†ก", variant="primary")
465
+ clear_chat = gr.Button("๋Œ€ํ™” ์ดˆ๊ธฐํ™”")
466
+
467
+ # ์˜ˆ์ œ๋ฅผ ์ค‘๊ฐ„์— ๋ฐฐ์น˜
468
+ gr.Examples(
469
+ examples=[
470
+ ["์ด ๋ฌธ์„œ๋Š” ๋ฌด์—‡์— ๊ด€ํ•œ ๋‚ด์šฉ์ธ๊ฐ€์š”?"],
471
+ ["์—…๋กœ๋“œ๏ฟฝ๏ฟฝ๏ฟฝ PDF ๋ฌธ์„œ์˜ ์ฃผ์š” ๋‚ด์šฉ์„ ํ•œ๊ตญ์–ด๋กœ ์š”์•ฝํ•ด์ฃผ์„ธ์š”."],
472
+ ["๋ฌธ์„œ์— ๋‚˜์˜จ ์ผ์ •์„ ์•Œ๋ ค์ฃผ์„ธ์š”."],
473
+ ["๋ฌธ์„œ์—์„œ ๊ฐ€์žฅ ์ค‘์š”ํ•œ 3๊ฐ€์ง€ ํ•ต์‹ฌ ํฌ์ธํŠธ๋Š” ๋ฌด์—‡์ธ๊ฐ€์š”?"],
474
+ ["์ด ํ–‰์‚ฌ์˜ ๊ฐœ์š”๋ฅผ ์„ค๋ช…ํ•ด์ฃผ์„ธ์š”."]
475
+ ],
476
+ inputs=msg
477
+ )
478
+
479
+ # PDF ์—…๋กœ๋“œ ์„น์…˜์„ ์•„๋ž˜์ชฝ์— ๋ฐฐ์น˜
480
+ with gr.Accordion("๐Ÿ“„ PDF ๋ฌธ์„œ ์—…๋กœ๋“œ", open=True):
481
+ with gr.Row():
482
+ with gr.Column(scale=1):
483
+ file_input = gr.File(
484
+ label="PDF ๋ฌธ์„œ ์„ ํƒ",
485
+ file_types=[".pdf"],
486
+ type="filepath"
487
+ )
488
+ with gr.Row():
489
+ convert_button = gr.Button("๋ฌธ์„œ ๋ณ€ํ™˜", variant="primary")
490
+ clear_button = gr.Button("๋ฌธ์„œ ์ดˆ๊ธฐํ™”", variant="secondary")
491
+ test_button = gr.Button("๋ฌธ์„œ ํ…Œ์ŠคํŠธ", variant="secondary")
492
+
493
+ status_text = gr.Textbox(
494
+ label="๋ฌธ์„œ ์ƒํƒœ",
495
+ interactive=False,
496
+ value=check_document_status(),
497
+ lines=3
498
+ )
499
+
500
+ with gr.Column(scale=1):
501
+ with gr.Accordion("๋ณ€ํ™˜๋œ ๋ฌธ์„œ ๋ฏธ๋ฆฌ๋ณด๊ธฐ", open=False):
502
+ converted_text = gr.Textbox(
503
+ label="Markdown ๋ณ€ํ™˜ ๊ฒฐ๊ณผ",
504
+ lines=10,
505
+ max_lines=20,
506
+ interactive=False
507
+ )
508
+ metadata_output = gr.JSON(label="๋ฉ”ํƒ€๋ฐ์ดํ„ฐ")
509
+
510
+ # ๊ณ ๊ธ‰ ์„ค์ •์„ ๊ฐ€์žฅ ์•„๋ž˜์— ๋ฐฐ์น˜
511
+ with gr.Accordion("โš™๏ธ ๊ณ ๊ธ‰ ์„ค์ •", open=False):
512
+ system_message = gr.Textbox(
513
+ value="๋‹น์‹ ์€ ํ•œ๊ตญ์–ด๋กœ ๋‹ต๋ณ€ํ•˜๋Š” AI ์–ด์‹œ์Šคํ„ดํŠธ์ž…๋‹ˆ๋‹ค. PDF ๋ฌธ์„œ๊ฐ€ ์ œ๊ณต๋˜๋ฉด ๊ทธ ๋‚ด์šฉ์„ ์ •ํ™•ํžˆ ๋ถ„์„ํ•˜์—ฌ ๋‹ต๋ณ€ํ•ฉ๋‹ˆ๋‹ค.",
514
+ label="์‹œ์Šคํ…œ ๋ฉ”์‹œ์ง€",
515
+ lines=3
516
+ )
517
+ max_tokens = gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="์ตœ๋Œ€ ํ† ํฐ ์ˆ˜")
518
+ temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.3, step=0.1, label="Temperature (๋‚ฎ์„์ˆ˜๋ก ์ผ๊ด€์„ฑ ์žˆ์Œ)")
519
+ top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.90, step=0.05, label="Top-p")
520
+ top_k = gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k")
521
+ repeat_penalty = gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty")
522
+
523
+ # ์ด๋ฒคํŠธ ํ•ธ๋“ค๋Ÿฌ
524
+ def user_submit(message, history):
525
+ return "", history + [[message, None]]
526
+
527
+ def bot_response(history, system_msg, max_tok, temp, top_p_val, top_k_val, rep_pen):
528
+ if history and history[-1][1] is None:
529
+ user_message = history[-1][0]
530
+
531
+ # ๋””๋ฒ„๊น…: ๋ฌธ์„œ ์ปจํ…์ŠคํŠธ ์ƒํƒœ ํ™•์ธ
532
+ global document_context, document_filename
533
+ print(f"\n=== BOT RESPONSE ์‹œ์ž‘ ===")
534
+ print(f"์‚ฌ์šฉ์ž ๋ฉ”์‹œ์ง€: {user_message}")
535
+ if document_context:
536
+ print(f"๐Ÿ“„ ๋ฌธ์„œ ์ปจํ…์ŠคํŠธ ํ™œ์„ฑ: {document_filename} ({len(document_context)} ๋ฌธ์ž)")
537
+ print(f"๋ฌธ์„œ ์ฒซ 200์ž: {document_context[:200]}...")
538
+ else:
539
+ print("๐Ÿ“ญ ๋ฌธ์„œ ์ปจํ…์ŠคํŠธ ์—†์Œ")
540
+
541
+ # ๋‹จ์ˆœํ•œ ํ˜•์‹ ์‚ฌ์šฉ - [user_message, assistant_message]
542
+ previous_history = []
543
+ for i in range(len(history) - 1):
544
+ if history[i][1] is not None:
545
+ previous_history.append({
546
+ "user": history[i][0],
547
+ "assistant": history[i][1]
548
+ })
549
+
550
+ print(f"์ด์ „ ๋Œ€ํ™” ์ˆ˜: {len(previous_history)}")
551
+
552
+ # ๋ฌธ์„œ๊ฐ€ ์žˆ๋Š” ๊ฒฝ์šฐ ํŠน๋ณ„ ์ฒ˜๋ฆฌ
553
+ if document_context and len(document_context) > 0:
554
+ print(f"๐Ÿ“„ ๋ฌธ์„œ ๊ธฐ๋ฐ˜ ์‘๋‹ต ์ƒ์„ฑ ์ค‘... (๋ฌธ์„œ ๊ธธ์ด: {len(document_context)})")
555
+
556
+ bot_message = ""
557
+ try:
558
+ for token in respond(
559
+ user_message,
560
+ previous_history,
561
+ system_msg,
562
+ max_tok,
563
+ temp,
564
+ top_p_val,
565
+ top_k_val,
566
+ rep_pen
567
+ ):
568
+ bot_message = token
569
+ history[-1][1] = bot_message
570
+ yield history
571
+ except Exception as e:
572
+ print(f"โŒ ์‘๋‹ต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜: {e}")
573
+ import traceback
574
+ traceback.print_exc()
575
+ history[-1][1] = "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ์‘๋‹ต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค. ๋‹ค์‹œ ์‹œ๋„ํ•ด์ฃผ์„ธ์š”."
576
+ yield history
577
+
578
+ # PDF ๋ณ€ํ™˜ ์ด๋ฒคํŠธ
579
+ def on_pdf_convert(file):
580
+ """PDF ๋ณ€ํ™˜ ๋ฐ ์ƒํƒœ ์—…๋ฐ์ดํŠธ"""
581
+ global document_context, document_filename
582
+
583
+ if file is None:
584
+ return "", {}, "โŒ ํŒŒ์ผ์ด ์„ ํƒ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
585
+
586
+ markdown_content, metadata = convert_pdf_to_markdown(file)
587
 
588
+ if "error" in metadata:
589
+ status = f"โŒ ๋ณ€ํ™˜ ์‹คํŒจ: {metadata['error']}"
590
+ else:
591
+ # ์ „์—ญ ๋ณ€์ˆ˜ ๋‹ค์‹œ ํ•œ๋ฒˆ ํ™•์ธ ๋ฐ ์„ค์ • (globals() ์‚ฌ์šฉ)
592
+ globals()['document_context'] = markdown_content
593
+ globals()['document_filename'] = metadata['filename']
594
+
595
+ status = f"โœ… PDF ๋ฌธ์„œ๊ฐ€ ์„ฑ๊ณต์ ์œผ๋กœ ๋ณ€ํ™˜๋˜์—ˆ์Šต๋‹ˆ๋‹ค!\n๐Ÿ“„ ํŒŒ์ผ๋ช…: {metadata['filename']}\n๐Ÿ“ ๋ฌธ์„œ ๊ธธ์ด: {metadata['content_length']:,} ๋ฌธ์ž\n\n์ด์ œ ๋ฌธ์„œ ๋‚ด์šฉ์— ๋Œ€ํ•ด ํ•œ๊ตญ์–ด๋กœ ์งˆ๋ฌธํ•˜์‹ค ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.\n\n์˜ˆ์‹œ ์งˆ๋ฌธ:\n- ์ด ๋ฌธ์„œ์˜ ์ฃผ์š” ๋‚ด์šฉ์„ ์š”์•ฝํ•ด์ฃผ์„ธ์š”\n- ๋ฌธ์„œ์— ๋‚˜์˜จ ํ•ต์‹ฌ ๊ฐœ๋…์„ ์„ค๋ช…ํ•ด์ฃผ์„ธ์š”"
596
+
597
+ print(f"\nโœ… ๋ฌธ์„œ ๋กœ๋“œ ์™„๋ฃŒ ํ™•์ธ:")
598
+ print(f"- globals()['document_context'] ๊ธธ์ด: {len(globals()['document_context'])}")
599
+ print(f"- globals()['document_filename']: {globals()['document_filename']}")
600
+
601
+ # ์ตœ์ข… ํ™•์ธ
602
+ if len(globals()['document_context']) > 0:
603
+ print("โœ… ๋ฌธ์„œ๊ฐ€ ์„ฑ๊ณต์ ์œผ๋กœ ์ „์—ญ ๋ณ€์ˆ˜์— ์ €์žฅ๋˜์—ˆ์Šต๋‹ˆ๋‹ค!")
604
  else:
605
+ print("โŒ ๊ฒฝ๊ณ : ๋ฌธ์„œ๊ฐ€ ์ „์—ญ ๋ณ€์ˆ˜์— ์ €์žฅ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค!")
606
+
607
+ return markdown_content, metadata, status
608
+
609
+ # ํŒŒ์ผ ์—…๋กœ๋“œ ์‹œ ์ž๋™ ๋ณ€ํ™˜
610
+ file_input.change(
611
+ fn=on_pdf_convert,
612
+ inputs=[file_input],
613
+ outputs=[converted_text, metadata_output, status_text]
614
+ )
615
+
616
+ # ์ˆ˜๋™ ๋ณ€ํ™˜ ๋ฒ„ํŠผ
617
+ convert_button.click(
618
+ fn=on_pdf_convert,
619
+ inputs=[file_input],
620
+ outputs=[converted_text, metadata_output, status_text]
621
+ )
622
+
623
+ # ๋ฌธ์„œ ํ…Œ์ŠคํŠธ ํ•จ์ˆ˜
624
+ def test_document():
625
+ """ํ˜„์žฌ ๋กœ๋“œ๋œ ๋ฌธ์„œ ํ…Œ์ŠคํŠธ"""
626
+ global document_context, document_filename
627
+ if document_context:
628
+ test_msg = f"โœ… ๋ฌธ์„œ ํ…Œ์ŠคํŠธ ๊ฒฐ๊ณผ:\n"
629
+ test_msg += f"๐Ÿ“„ ํŒŒ์ผ๋ช…: {document_filename}\n"
630
+ test_msg += f"๐Ÿ“ ์ „์ฒด ๊ธธ์ด: {len(document_context):,} ๋ฌธ์ž\n"
631
+ test_msg += f"๐Ÿ“ ์ฒซ 500์ž:\n{document_context[:500]}..."
632
+ return test_msg
633
+ else:
634
+ return "โŒ ํ˜„์žฌ ๋กœ๋“œ๋œ ๋ฌธ์„œ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค."
635
+
636
+ test_button.click(
637
+ fn=test_document,
638
+ outputs=[status_text]
639
+ )
640
+
641
+ clear_button.click(
642
+ fn=clear_document_context,
643
+ outputs=[status_text]
644
+ ).then(
645
+ fn=lambda: ("", {}, check_document_status()),
646
+ outputs=[converted_text, metadata_output, status_text]
647
+ )
648
+
649
+ # ์ฑ„ํŒ… ์ด๋ฒคํŠธ
650
+ msg.submit(user_submit, [msg, chatbot], [msg, chatbot]).then(
651
+ bot_response,
652
+ [chatbot, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty],
653
+ chatbot
654
+ )
655
+
656
+ submit.click(user_submit, [msg, chatbot], [msg, chatbot]).then(
657
+ bot_response,
658
+ [chatbot, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty],
659
+ chatbot
660
+ )
661
+
662
+ clear_chat.click(lambda: [], None, chatbot)
663
+
664
+ if __name__ == "__main__":
665
+ # ํ•„์š”ํ•œ ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ
666
+ os.makedirs("./models", exist_ok=True)
667
+
668
+ # ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ํ™•์ธ
669
+ if not HF_TOKEN:
670
+ print("โš ๏ธ ๊ฒฝ๊ณ : HF_TOKEN์ด ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ์— ์ œํ•œ์ด ์žˆ์„ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.")
671
+ print("ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋ฅผ ์„ค์ •ํ•˜๋ ค๋ฉด: export HF_TOKEN='your_huggingface_token'")
672
+
673
+ demo.launch(
674
+ server_name="0.0.0.0", # ๋กœ์ปฌ ๋„คํŠธ์›Œํฌ์—์„œ ์ ‘๊ทผ ๊ฐ€๋Šฅ
675
+ server_port=7860,
676
+ share=False # ์˜จํ”„๋ ˆ๋ฏธ์Šค ํ™˜๊ฒฝ์ด๋ฏ€๋กœ ๊ณต์œ  ๋น„ํ™œ์„ฑํ™”
677
+ )