Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,285 +1,677 @@
|
|
1 |
-
import
|
|
|
2 |
import subprocess
|
3 |
-
import
|
4 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
if "upload_to_hf" not in st.session_state:
|
9 |
-
st.session_state.upload_to_hf = False
|
10 |
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
|
|
15 |
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
'deepseek-ai/DeepSeek-R1-Distill-Llama-70B', 'deepseek-ai/DeepSeek-R1-Zero', 'deepseek-ai/DeepSeek-R1-Distill-Llama-8B',
|
20 |
-
'deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', 'm-a-p/YuE-s1-7B-anneal-en-cot', 'deepseek-ai/DeepSeek-R1-Distill-Qwen-14B',
|
21 |
-
'microsoft/phi-4', 'huihui-ai/DeepSeek-R1-Distill-Qwen-32B-abliterated', 'meta-llama/Llama-3.3-70B-Instruct',
|
22 |
-
'cognitivecomputations/Dolphin3.0-R1-Mistral-24B', 'allenai/Llama-3.1-Tulu-3-405B', 'meta-llama/Llama-3.1-8B',
|
23 |
-
'meta-llama/Llama-3.1-8B-Instruct', 'Qwen/Qwen2.5-14B-Instruct-1M', 'mistralai/Mistral-Small-24B-Base-2501',
|
24 |
-
'huihui-ai/DeepSeek-R1-Distill-Llama-70B-abliterated', 'huihui-ai/DeepSeek-R1-Distill-Qwen-14B-abliterated-v2',
|
25 |
-
'Qwen/Qwen2.5-7B-Instruct-1M', 'open-thoughts/OpenThinker-7B', 'Almawave/Velvet-14B', 'cognitivecomputations/Dolphin3.0-Mistral-24B',
|
26 |
-
'Steelskull/L3.3-Damascus-R1', 'Qwen/Qwen2.5-Coder-32B-Instruct', 'huihui-ai/DeepSeek-R1-Distill-Llama-8B-abliterated',
|
27 |
-
'cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese', 'jinaai/ReaderLM-v2', 'mistralai/Mistral-7B-Instruct-v0.3',
|
28 |
-
'meta-llama/Llama-3.2-1B', 'xwen-team/Xwen-7B-Chat', 'meta-llama/Llama-3.2-3B-Instruct', 'cognitivecomputations/DeepSeek-R1-AWQ',
|
29 |
-
'HuggingFaceTB/SmolLM2-1.7B-Instruct', 'xwen-team/Xwen-72B-Chat', 'openai-community/gpt2', 'meta-llama/Llama-2-7b-chat-hf', 'google/gemma-2-2b-it',
|
30 |
-
'mistralai/Mistral-7B-v0.1', 'meta-llama/Meta-Llama-3-8B', 'mistralai/Mistral-Nemo-Instruct-2407', 'microsoft/Phi-3.5-mini-instruct',
|
31 |
-
'arcee-ai/Virtuoso-Small-v2', 'MiniMaxAI/MiniMax-Text-01', 'AtlaAI/Selene-1-Mini-Llama-3.1-8B', 'Steelskull/L3.3-Nevoria-R1-70b',
|
32 |
-
'prithivMLmods/Calcium-Opus-14B-Elite2-R1', 'pfnet/plamo-2-1b', 'huihui-ai/DeepSeek-R1-Distill-Qwen-7B-abliterated-v2', 'Vikhrmodels/QVikhr-2.5-1.5B-Instruct-SMPO',
|
33 |
-
'mistralai/Mixtral-8x7B-Instruct-v0.1', 'vikhyatk/moondream2', 'meta-llama/Meta-Llama-3-8B-Instruct', 'deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct',
|
34 |
-
'Steelskull/L3.3-MS-Nevoria-70b', 'unsloth/DeepSeek-R1-Distill-Llama-8B', 'cyberagent/DeepSeek-R1-Distill-Qwen-14B-Japanese', 'mistralai/Mistral-7B-Instruct-v0.2',
|
35 |
-
'deepseek-ai/DeepSeek-Coder-V2-Instruct', 'Qwen/Qwen2.5-32B', 'Qwen/Qwen2.5-72B-Instruct', 'allenai/Llama-3.1-Tulu-3-8B', 'SakanaAI/TinySwallow-1.5B-Instruct',
|
36 |
-
'm-a-p/YuE-s2-1B-general', 'arcee-ai/Virtuoso-Medium-v2', 'Black-Ink-Guild/Pernicious_Prophecy_70B', 'Qwen/Qwen2.5-14B', 'inflatebot/MN-12B-Mag-Mell-R1', 'Qwen/Qwen2.5-Math-1.5B',
|
37 |
-
'Qwen/Qwen2.5-Coder-7B-Instruct', 'Qwen/Qwen2.5-1.5B-Instruct', 'Qwen/QwQ-32B-Preview', 'NovaSky-AI/Sky-T1-32B-Preview', 'sometimesanotion/Lamarck-14B-v0.7',
|
38 |
-
'SentientAGI/Dobby-Mini-Leashed-Llama-3.1-8B', 'NaniDAO/deepseek-r1-qwen-2.5-32B-ablated', 'rubenroy/Zurich-14B-GCv2-5m', 'rubenroy/Geneva-12B-GCv2-5m', 'prithivMLmods/Primal-Opus-14B-Optimus-v1',
|
39 |
-
'prithivMLmods/Megatron-Opus-14B-Exp', 'prithivMLmods/Primal-Mini-3B-Exp', 'TinyLlama/TinyLlama-1.1B-Chat-v1.0', 'Qwen/Qwen2.5-0.5B-Instruct',
|
40 |
-
'Qwen/Qwen2.5-3B-Instruct', 'meta-llama/Llama-3.2-1B-Instruct', 'HuggingFaceTB/SmolLM2-135M-Instruct', 'PowerInfer/SmallThinker-3B-Preview',
|
41 |
-
'Valdemardi/DeepSeek-R1-Distill-Qwen-32B-AWQ', 'huihui-ai/DeepSeek-R1-Distill-Qwen-14B-abliterated', 'SentientAGI/Dobby-Mini-Unhinged-Llama-3.1-8B',
|
42 |
-
'lightblue/DeepSeek-R1-Distill-Qwen-7B-Japanese', 'Ihor/Text2Graph-R1-Qwen2.5-0.5b', 'prithivMLmods/Bellatrix-Tiny-3B-R1', 'prithivMLmods/Bellatrix-Tiny-1.5B-R1', 'prithivMLmods/Megatron-Opus-14B-Stock',
|
43 |
-
'prithivMLmods/Jolt-v0.1', 'prithivMLmods/Sqweeks-7B-Instruct', 'bigscience/bloom', 'mistralai/Mistral-7B-Instruct-v0.1', 'google/gemma-2-27b-it', 'meta-llama/Llama-3.1-70B', 'Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2',
|
44 |
-
'Qwen/Qwen2.5-7B-Instruct', 'LatitudeGames/Wayfarer-12B', 'prithivMLmods/QwQ-Math-IO-500M', 'prithivMLmods/Llama-3.2-6B-AlgoCode', 'prithivMLmods/Omni-Reasoner-Merged', 'Valdemardi/DeepSeek-R1-Distill-Llama-70B-AWQ',
|
45 |
-
'silma-ai/SILMA-Kashif-2B-Instruct-v1.0', 'mkurman/Qwen2.5-14B-DeepSeek-R1-1M', 'prithivMLmods/Blaze-14B-xElite', 'prithivMLmods/Megatron-Opus-7B-Exp', 'v2ray/GPT4chan-24B', 'prithivMLmods/Elita-1', 'prithivMLmods/Viper-Coder-v0.1',
|
46 |
-
'prithivMLmods/WebMind-7B-v0.1', 'prithivMLmods/Megatron-Corpus-14B-Exp.v2', 'prithivMLmods/Feynman-Grpo-Exp', 'meta-llama/Llama-2-7b-hf', 'microsoft/phi-2', 'Ttimofeyka/MistralRP-Noromaid-NSFW-Mistral-7B-GGUF',
|
47 |
-
'google/gemma-2b', 'google/gemma-7b', 'sophosympatheia/Midnight-Miqu-70B-v1.5', 'jiviai/medX_v2', 'Alibaba-NLP/gte-Qwen2-7B-instruct', 'google/gemma-2-9b-it', 'meta-llama/Llama-Guard-3-8B', 'microsoft/Phi-3.5-vision-instruct',
|
48 |
-
'MarinaraSpaghetti/NemoMix-Unleashed-12B', 'Qwen/Qwen2.5-0.5B', 'Qwen/Qwen2.5-7B', 'Qwen/Qwen2.5-32B-Instruct', 'meta-llama/Llama-3.2-3B', 'allenai/Molmo-7B-D-0924',
|
49 |
-
'HuggingFaceTB/SmolLM2-360M-Instruct', 'Zhengyi/LLaMA-Mesh', 'ibm-granite/granite-3.1-8b-instruct', 'livekit/turn-detector', 'SakanaAI/TinySwallow-1.5B', 'saheedniyi/YarnGPT',
|
50 |
-
'ContactDoctor/Bio-Medical-Llama-3-8B-CoT-012025', 'MiniMaxAI/MiniMax-VL-01', 'prithivMLmods/Omni-Reasoner4-Merged', 'unsloth/DeepSeek-R1', 'prithivMLmods/Calcium-Opus-14B-Elite2', 'prithivMLmods/Calcium-Opus-14B-Elite3',
|
51 |
-
'prithivMLmods/Bellatrix-Tiny-0.5B', 'prithivMLmods/Calcium-Opus-14B-Elite-Stock', 'prithivMLmods/Bellatrix-Tiny-1B', 'm-a-p/YuE-s1-7B-anneal-en-icl', 'arcee-ai/Virtuoso-Lite', 'stelterlab/Mistral-Small-24B-Instruct-2501-AWQ',
|
52 |
-
'prithivMLmods/Triangulum-v2-10B', 'prithivMLmods/Bellatrix-Tiny-1B-R1', 'huihui-ai/Mistral-Small-24B-Instruct-2501-abliterated', 'rubenroy/Gilgamesh-72B', 'rubenroy/Perseus-3192B', 'Nitral-Archive/NightWing3_Virtuoso-10B-v0.2',
|
53 |
-
'ibm-granite/granite-3.2-8b-instruct-preview', 'distilbert/distilgpt2', 'deepseek-ai/deepseek-coder-33b-instruct', 'microsoft/Phi-3-mini-4k-instruct', 'mistralai/Codestral-22B-v0.1', 'NovaSearch/stella_en_1.5B_v5', 'google/gemma-2-2b',
|
54 |
-
'lmms-lab/LLaVA-Video-7B-Qwen2', 'deepseek-ai/DeepSeek-V2.5', 'Qwen/Qwen2.5-Math-7B', 'AIDC-AI/Marco-o1', 'allenai/Llama-3.1-Tulu-3-8B-SFT', 'utter-project/EuroLLM-9B-Instruct', 'tiiuae/Falcon3-1B-Instruct',
|
55 |
-
'cognitivecomputations/DeepSeek-V3-AWQ', 'prithivMLmods/LwQ-10B-Instruct', 'prithivMLmods/LwQ-30B-Instruct', 'prithivMLmods/Calcium-20B', 'unsloth/DeepSeek-R1-Distill-Qwen-32B-bnb-4bit',
|
56 |
-
'opensourcerelease/DeepSeek-R1-bf16', 'prithivMLmods/Llama-Express.1-Math', 'prithivMLmods/Llama-Express.1', 'prithivMLmods/Llama-Express.1-Tiny', 'prithivMLmods/Llama-Express.1-Merged',
|
57 |
-
'Delta-Vector/Rei-12B', 'kingabzpro/DeepSeek-R1-Medical-COT', 'prithivMLmods/Calme-Ties-78B', 'prithivMLmods/Qwen2.5-1.5B-DeepSeek-R1-Instruct', 'prithivMLmods/Calme-Ties2-78B', 'prithivMLmods/Bellatrix-Tiny-1B-v3',
|
58 |
-
'sometimesanotion/Qwenvergence-14B-v12-Prose-DS', 'TIGER-Lab/Qwen2.5-32B-Instruct-CFT', 'unsloth/Mistral-Small-24B-Instruct-2501-unsloth-bnb-4bit', 'rubenroy/Geneva-12B-GCv2-1m', 'sometimesanotion/Qwenvergence-14B-v13-Prose-DS',
|
59 |
-
'deepseek-ai/deepseek-coder-6.7b-instruct', 'deepseek-ai/deepseek-moe-16b-base', 'deepseek-ai/deepseek-moe-16b-chat', 'microsoft/Phi-3-mini-128k-instruct', 'google/gemma-2-9b', 'AI-MO/NuminaMath-7B-TIR', 'CohereForAI/c4ai-command-r-plus-08-2024',
|
60 |
-
'Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24', 'nvidia/Llama-3.1-Nemotron-70B-Instruct-HF', 'CohereForAI/aya-expanse-8b', 'HuggingFaceTB/SmolLM2-135M', 'brgx53/3Blarenegv3-ECE-PRYMMAL-Martial', 'tiiuae/Falcon3-1B-Base',
|
61 |
-
'PocketDoc/Dans-PersonalityEngine-V1.1.0-12b', 'Kaoeiri/Magnum-v4-Cydonia-vXXX-22B', 'prithivMLmods/Blaze.1-32B-Instruct', 'kyutai/helium-1-preview-2b', 'prithivMLmods/Blaze.1-27B-Preview', 'prithivMLmods/Blaze.1-27B-Reflection',
|
62 |
-
'prithivMLmods/PyThagorean-10B', 'prithivMLmods/PyThagorean-3B', 'prithivMLmods/PyThagorean-Tiny', 'unsloth/DeepSeek-R1-Distill-Llama-70B-bnb-4bit', 'unsloth/DeepSeek-R1-Distill-Qwen-14B-unsloth-bnb-4bit', 'bespokelabs/Bespoke-Stratos-32B',
|
63 |
-
'Tarek07/Progenitor-V1.1-LLaMa-70B', 'mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-7B-v1.1', 'm-a-p/YuE-s1-7B-anneal-zh-cot', 'emredeveloper/DeepSeek-R1-Medical-COT', 'HelpingAI/HAI-SER', 'rubenroy/Geneva-12B-GCv2-10k', 'rubenroy/Geneva-12B-GCv2-50k',
|
64 |
-
'rubenroy/Geneva-12B-GCv2-100k', 'allura-org/GPT-J-6b-Disco-Elysium', 'fblgit/miniclaus-qw1.5B-UNAMGS-GRPO', 'suayptalha/Luminis-phi-4', 'EleutherAI/gpt-neo-2.7B', 'tiiuae/falcon-7b-instruct', 'deepseek-ai/deepseek-coder-1.3b-instruct',
|
65 |
-
'teknium/OpenHermes-2.5-Mistral-7B', 'maritaca-ai/sabia-7b', 'bigcode/starcoder2-3b', 'mistralai/Mixtral-8x7B-v0.1', 'Rijgersberg/GEITje-7B', 'segolilylabs/Lily-Cybersecurity-7B-v0.2', 'deepseek-ai/deepseek-coder-7b-instruct-v1.5',
|
66 |
-
'deepseek-ai/deepseek-math-7b-rl', 'SherlockAssistant/Mistral-7B-Instruct-Ukrainian', 'meta-llama/CodeLlama-7b-hf', 'databricks/dbrx-instruct', 'UnfilteredAI/Promt-generator', 'mistralai/Mixtral-8x22B-Instruct-v0.1', 'cognitivecomputations/dolphin-2.9-llama3-8b',
|
67 |
-
'ruslanmv/Medical-Llama3-8B', 'deepseek-ai/DeepSeek-V2-Chat', 'microsoft/llava-med-v1.5-mistral-7b', 'deepseek-ai/DeepSeek-V2-Lite-Chat', 'CohereForAI/aya-23-8B', 'ProbeMedicalYonseiMAILab/medllama3-v20', 'cognitivecomputations/dolphin-2.9.2-qwen2-72b',
|
68 |
-
'mlabonne/NeuralDaredevil-8B-abliterated', 'yentinglin/Llama-3-Taiwan-8B-Instruct', 'Sao10K/L3-8B-Stheno-v3.2', 'elyza/Llama-3-ELYZA-JP-8B', 'meta-llama/Llama-3.1-70B-Instruct', 'princeton-nlp/gemma-2-9b-it-SimPO', 'meta-llama/Llama-3.1-405B-Instruct',
|
69 |
-
'mistralai/Mistral-Nemo-Base-2407', 'unsloth/Meta-Llama-3.1-8B-Instruct', 'mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated', 'microsoft/maira-2', 'ContactDoctor/Bio-Medical-Llama-3-8B', 'ystemsrx/Qwen2-Boundless', 'upstage/solar-pro-preview-instruct',
|
70 |
-
'Epiculous/Violet_Twilight-v0.2', 'flowaicom/Flow-Judge-v0.1', 'Qwen/Qwen2.5-14B-Instruct', 'Qwen/Qwen2.5-Math-1.5B-Instruct', 'meta-llama/Llama-Guard-3-1B', 'google/gemma-2-2b-jpn-it', 'unsloth/Llama-3.2-1B-Instruct', 'numind/NuExtract-1.5',
|
71 |
-
'rombodawg/Rombos-LLM-V2.5-Qwen-32b', 'anthracite-org/magnum-v4-22b', 'CohereForAI/aya-expanse-32b', 'VongolaChouko/Starcannon-Unleashed-12B-v1.0', 'Qwen/Qwen2.5-Coder-14B-Instruct', 'Qwen/Qwen2.5-Coder-32B', 'SmallDoge/Doge-60M', 'MaziyarPanahi/calme-3.2-instruct-78b',
|
72 |
-
'lianghsun/Llama-3.2-Taiwan-3B-Instruct', 'allenai/Llama-3.1-Tulu-3-8B-DPO', 'allenai/Llama-3.1-Tulu-3-70B', 'knifeayumu/Cydonia-v1.3-Magnum-v4-22B', 'utter-project/EuroLLM-9B', 'Skywork/Skywork-o1-Open-Llama-3.1-8B', 'Moraliane/SAINEMO-reMIX', 'LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct',
|
73 |
-
'NousResearch/Hermes-3-Llama-3.2-3B', 'recursal/QRWKV6-32B-Instruct-Preview-v0.1', 'allenai/OLMo-2-1124-13B-Instruct', 'huihui-ai/Llama-3.3-70B-Instruct-abliterated-finetuned-GPTQ-Int8', 'WiroAI/wiroai-turkish-llm-9b', 'SmallDoge/Doge-20M', 'FreedomIntelligence/HuatuoGPT-o1-70B',
|
74 |
-
'Sao10K/70B-L3.3-Cirrus-x1', 'internlm/internlm3-8b-instruct', 'prithivMLmods/PRM-Math-7B-Reasoner', 'prithivMLmods/QwQ-LCoT2-7B-Instruct', 'netease-youdao/Confucius-o1-14B', 'unsloth/DeepSeek-R1-Zero', 'unsloth/DeepSeek-R1-BF16', 'unsloth/DeepSeek-R1-Distill-Qwen-1.5B-unsloth-bnb-4bit',
|
75 |
-
'suayptalha/Falcon3-Jessi-v0.4-7B-Slerp', 'RDson/CoderO1-DeepSeekR1-Coder-32B-Preview', 'bespokelabs/Bespoke-Stratos-7B', 'unsloth/DeepSeek-R1-Distill-Qwen-32B-unsloth-bnb-4bit', 'RWKV-Red-Team/ARWKV-7B-Preview-0.1', 'lightblue/Karasu-DPO-7B', 'Spestly/Atlas-Pro-7B-Preview-1M',
|
76 |
-
'llm-jp/llm-jp-3-13b-instruct3', 'm-a-p/YuE-s1-7B-anneal-jp-kr-cot', 'm-a-p/YuE-s1-7B-anneal-jp-kr-icl', 'm-a-p/YuE-s1-7B-anneal-zh-icl', 'huihui-ai/Qwen2.5-14B-Instruct-1M-abliterated', 'AXCXEPT/phi-4-deepseek-R1K-RL-EZO', 'grimjim/DeepSauerHuatuoSkywork-R1-o1-Llama-3.1-8B',
|
77 |
-
'sthenno/tempesthenno-icy-0130', 'neuralmagic/Mistral-Small-24B-Instruct-2501-FP8-Dynamic', 'Omartificial-Intelligence-Space/Arabic-DeepSeek-R1-Distill-8B', 'OddTheGreat/Badman_12B', 'MasterControlAIML/DeepSeek-R1-Strategy-Qwen-2.5-1.5b-Unstructured-To-Structured',
|
78 |
-
'rubenroy/Geneva-12B-GCv2-500k', 'bunnycore/Llama-3.2-3B-Bespoke-Thought', 'justinj92/Qwen2.5-1.5B-Thinking', 'RefalMachine/RuadaptQwen2.5-14B-Instruct', 'v2ray/GPT4chan-24B-QLoRA', 'CultriX/Qwen2.5-14B-Qwentangledv2', 'CultriX/Qwen2.5-14B-Ultimav2',
|
79 |
-
'Tarek07/Progenitor-V2.2-LLaMa-70B', 'dwetzel/DeepSeek-R1-Distill-Qwen-32B-GPTQ-INT4', 'Nitral-Archive/NightWing3-R1_Virtuoso-10B-v0.3e2', 'ucalyptus/prem-1B-grpo', 'Sakalti/Saka-14B', 'bunnycore/Qwen2.5-7B-MixStock-V0.1', 'braindao/DeepSeek-R1-Distill-Llama-8B-Uncensored',
|
80 |
-
'scb10x/llama3.1-typhoon2-deepseek-r1-70b', 'RefalMachine/RuadaptQwen2.5-14B-R1-distill-preview-v1',
|
81 |
-
'openai-community/gpt2-medium', 'openai-community/gpt2-xl', 'meta-llama/Llama-2-13b-hf', 'Trelis/Llama-2-7b-chat-hf-function-calling-v2', 'ByteWave/prompt-generator', 'HuggingFaceH4/zephyr-7b-beta', 'TheBloke/deepseek-llm-67b-chat-GPTQ', 'sarvamai/OpenHathi-7B-Hi-v0.1-Base',
|
82 |
-
'cognitivecomputations/dolphin-2.5-mixtral-8x7b',
|
83 |
-
'SanjiWatsuki/Sonya-7B', 'openchat/openchat-3.5-0106', 'ZySec-AI/SecurityLLM', 'defog/sqlcoder-70b-alpha', 'nakodanei/Blue-Orchid-2x7b', 'liuhaotian/llava-v1.6-mistral-7b', 'BioMistral/BioMistral-7B-AWQ-QGS128-W4-GEMM', 'google/gemma-2b-it', 'bigcode/starcoder2-7b',
|
84 |
-
'nbeerbower/Maidphin-Kunoichi-7B-GGUF-Q4_K_M', 'HuggingFaceH4/starchat2-15b-v0.1', 'CohereForAI/c4ai-command-r-plus',
|
85 |
-
'HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1', 'UnfilteredAI/UNfilteredAI-1B', 'MaziyarPanahi/WizardLM-2-7B-GGUF', 'hiieu/Meta-Llama-3-8B-Instruct-function-calling-json-mode', 'shenzhi-wang/Llama3-8B-Chinese-Chat', 'Orenguteng/Llama-3-8B-Lexi-Uncensored', 'NTQAI/Nxcode-CQ-7B-orpo',
|
86 |
-
'lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half', 'taide/Llama3-TAIDE-LX-8B-Chat-Alpha1', 'Nitral-AI/Poppy_Porpoise-0.72-L3-8B',
|
87 |
-
'WhiteRabbitNeo/Llama-3-WhiteRabbitNeo-8B-v2.0', 'marketeam/LLa-Marketing', 'microsoft/Phi-3-vision-128k-instruct', 'CohereForAI/aya-23-35B', 'shisa-ai/shisa-v1-llama3-8b', 'mistralai/Mistral-7B-v0.3', 'MaziyarPanahi/Mistral-7B-Instruct-v0.3-GGUF', 'yentinglin/Llama-3-Taiwan-70B-Instruct',
|
88 |
-
'deepseek-ai/DeepSeek-Coder-V2-Lite-Base', 'Sao10K/L3-8B-Stheno-v3.3-32K', 'google/gemma-2-27b',
|
89 |
-
'Alibaba-NLP/gte-Qwen2-1.5B-instruct', 'm42-health/Llama3-Med42-8B', 'cognitivecomputations/dolphin-vision-7b', 'TheDrummer/Big-Tiger-Gemma-27B-v1', 'meta-llama/Llama-3.1-405B', 'google/shieldgemma-2b', 'amd/AMD-Llama-135m', 'unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit',
|
90 |
-
'aifeifei798/DarkIdol-Llama-3.1-8B-Instruct-1.2-Uncensored',
|
91 |
-
'NousResearch/Hermes-3-Llama-3.1-8B', 'mlabonne/TwinLlama-3.1-8B', 'ClosedCharacter/Peach-9B-8k-Roleplay', 'utter-project/EuroLLM-1.7B-Instruct', 'ai21labs/AI21-Jamba-1.5-Mini', 'Zyphra/Zamba2-2.7B-instruct', 'google/gemma-7b-aps-it', 'ifable/gemma-2-Ifable-9B', 'Qwen/Qwen2.5-1.5B',
|
92 |
-
'Qwen/Qwen2.5-7B-Instruct-GPTQ-Int4', 'Qwen/Qwen2.5-32B-Instruct-AWQ', 'brunopio/Llama3-8B-1.58-100B-tokens-GGUF', 'anthracite-org/magnum-v4-72b', 'nvidia/Llama-3_1-Nemotron-51B-Instruct', 'unsloth/Qwen2.5-14B-Instruct-bnb-4bit', 'katanemo/Arch-Function-3B', 'allenai/Molmo-7B-O-0924',
|
93 |
-
'unsloth/Llama-3.2-1B', 'lianghsun/Llama-3.2-Taiwan-Legal-3B-Instruct', 'BSC-LT/salamandra-2b-instruct', 'Steelskull/MSM-MS-Cydrion-22B', 'Bllossom/llama-3.2-Korean-Bllossom-3B', 'sam-paech/Delirium-v1', 'fblgit/TheBeagle-v2beta-32B-MGS', 'sarvamai/sarvam-1', 'HuggingFaceTB/SmolLM2-1.7B',
|
94 |
-
'Qwen/Qwen2.5-Coder-0.5B-Instruct', 'rombodawg/Rombos-Coder-V2.5-Qwen-14b', 'Nexusflow/Athene-V2-Chat', 'FallenMerick/MN-Violet-Lotus-12B', 'EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2', 'allenai/OLMo-2-1124-7B-Instruct-preview', 'sometimesanotion/KytheraMix-7B-v0.2', 'LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct',
|
95 |
-
'LGAI-EXAONE/EXAONE-3.5-32B-Instruct', 'ibm-granite/granite-3.1-2b-instruct', 'unsloth/Llama-3.3-70B-Instruct-bnb-4bit', 'ibnzterrell/Meta-Llama-3.3-70B-Instruct-AWQ-INT4', 'Sao10K/L3.3-70B-Euryale-v2.3', 'BSC-LT/ALIA-40b', 'huihui-ai/Llama-3.3-70B-Instruct-abliterated', 'SmallDoge/Doge-20M-Instruct',
|
96 |
-
'tiiuae/Falcon3-10B-Instruct', 'winninghealth/WiNGPT-Babel', 'FreedomIntelligence/HuatuoGPT-o1-8B', 'FreedomIntelligence/HuatuoGPT-o1-72B', 'prithivMLmods/Llama-3.1-5B-Instruct', 'prithivMLmods/Llama-Thinker-3B-Preview2', 'simplescaling/step-conditional-control-old', 'ngxson/MiniThinky-v2-1B-Llama-3.2',
|
97 |
-
'unsloth/phi-4-unsloth-bnb-4bit', 'KBlueLeaf/TIPO-500M-ft', 'bunnycore/Phi-4-RP-v0', 'Rombo-Org/Rombo-LLM-V2.5-Qwen-14b', 'nbeerbower/mistral-nemo-kartoffel-12B', 'sethuiyer/Llamaverse-3.1-8B-Instruct', 'Shaleen123/llama-3.1-8b-reasoning', 'Nohobby/L3.3-Prikol-70B-v0.3', 'nvidia/AceInstruct-1.5B', 'SmallDoge/Doge-20M-checkpoint',
|
98 |
-
'carsenk/llama3.2_1b_2025_uncensored_v2', 'bunnycore/Phi-4-Model-Stock-v2', 'Shaleen123/llama-3.1-8B-chain-reasoning', 'bunnycore/Phi-4-Model-Stock-v3', 'IVentureISB/MahaKumbh-Llama3.3-70B', 'DavidLanz/Llama-3.2-Taiwan-3B-Instruct', 'SmallDoge/Doge-60M-checkpoint', 'unsloth/DeepSeek-R1-Distill-Qwen-7B-unsloth-bnb-4bit',
|
99 |
-
'unsloth/DeepSeek-R1-Distill-Llama-8B-unsloth-bnb-4bit', 'roleplaiapp/DeepSeek-R1-Distill-Qwen-32B-Q4_0-GGUF', 'arcee-ai/DeepSeek-R1-bf16', 'inarikami/DeepSeek-R1-Distill-Qwen-32B-AWQ', 'mlx-community/DeepSeek-R1-Distill-Llama-70B-4bit', 'prithivMLmods/QwQ-LCoT1-Merged', 'prithivMLmods/Llama-3.2-3B-Math-Oct',
|
100 |
-
'Nitral-AI/Wayfarer_Eris_Noctis-12B', 'thirdeyeai/DeepSeek-R1-Distill-Qwen-7B-uncensored', 'NovaSky-AI/Sky-T1-32B-Flash', 'SZTAKI-HLT/Llama-3.2-1B-HuAMR', 'stepenZEN/DeepSeek-R1-Distill-Qwen-1.5B-Abliterated-dpo', 'mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-1.5B-v1.0', 'prithivMLmods/Phi-4-Super-1', 'prithivMLmods/Calcium-Opus-14B-Merge',
|
101 |
-
'prithivMLmods/COCO-7B-Instruct-1M', 'prithivMLmods/Taurus-Opus-7B', 'ReadyArt/L3.3-Nevoria-R1-70b_EXL2_5.0bpw_H8', 'NickyNicky/Llama-1B-GRPO_Final', 'unsloth/Qwen2.5-14B-Instruct-1M', 'RefalMachine/RuadaptQwen2.5-7B-Lite-Beta', 'hiieu/R1_tool_call_Distill-Qwen-7B', 'fla-hub/rwkv7-1.5B-world', 'KatyTheCutie/Repose-12B', 'DoppelReflEx/MN-12B-WolFrame',
|
102 |
-
'huihui-ai/DeepSeek-R1-Distill-Qwen-7B-abliterated', 'SubtleOne/Qwen2.5-32b-Erudite-Writer', 'ZeroXClem/Qwen2.5-7B-CelestialHarmony-1M', 'safe049/SmolTuring-8B-Instruct', 'unsloth/Mistral-Small-24B-Instruct-2501', 'unsloth/Mistral-Small-24B-Base-2501',
|
103 |
-
'llm-jp/llm-jp-3-150m-instruct3', 'llm-jp/llm-jp-3-7.2b-instruct3', 'suayptalha/Maestro-10B', 'Quazim0t0/Phi4.Turn.R1Distill_v1.5.1-Tensors', 'OddTheGreat/Malevolent_12B.v2', 'nbeerbower/Dumpling-Qwen2.5-7B-1k-r16', 'kromeurus/L3.1-Tivir-v0.1-10B', 'suayptalha/Maestro-R1-Llama-8B', 'rubenroy/Zurich-1.5B-GCv2-50k',
|
104 |
-
'rubenroy/Zurich-1.5B-GCv2-100k', 'rubenroy/Zurich-1.5B-GCv2-1m', 'enhanceaiteam/xea-llama', 'eridai/eridaRE', 'lianghsun/Marble-3B', 'DataSoul/MwM-7B-CoT-Merge1', 'Erland/Mistral-Small-24B-Base-ChatML-2501-bnb-4bit', 'chameleon-lizard/Qwen-2.5-7B-DTF', 'Vikhrmodels/QVikhr-2.5-1.5B-Instruct-SMPO_MLX-8bit', 'RecurvAI/Recurv-Clinical-Deepseek-R1',
|
105 |
-
'Darkhn/L3.3-Damascus-R1-5.0bpw-h8-exl2', 'Vikhrmodels/QVikhr-2.5-1.5B-Instruct-SMPO_MLX-4bit', 'BarBarickoza/Dans-Picaro-MagNante-v4-v1-12b-V3', 'skzxjus/Qwen2.5-7B-1m-Open-R1-Distill', 'CultriX/Qwen2.5-14B-Ultima', 'CultriX/Enhanced-TIES-Base-v1', 'loaiabdalslam/beetelware-saudi-R1-Distill-Llama-8B',
|
106 |
-
'Triangle104/Gemmadevi-Stock-10B', 'avemio-digital/German-RAG-HERMES-MOBIUS-R1-LLAMA', 'syubraj/MedicalChat-Phi-3.5-mini-instruct', 'Xiaojian9992024/Qwen2.5-THREADRIPPER-Small', 'jpacifico/Chocolatine-2-merged-qwen25arch', 'mobiuslabsgmbh/Meta-Llama-3-8B-Instruct_4bitgs64_hqq_hf', 'pabloce/esbieta-ec-qwen-2.5-3B', 'TareksLab/Progenitor-V2.3-LLaMa-70B',
|
107 |
-
'suayptalha/Lamarckvergence-14B', 'jpacifico/Chocolatine-2-14B-Instruct-v2.0.3', 'bunnycore/DeepThinker-7B-Sce-v2',
|
108 |
-
'sometimesanotion/Qwen2.5-7B-Gordion-v0.1', 'openai-community/gpt2-large', 'openai-community/openai-gpt', 'EleutherAI/gpt-neo-1.3B', 'EleutherAI/gpt-neo-125m', 'GroNLP/gpt2-small-italian', 'LorenzoDeMattei/GePpeTto', 'Vamsi/T5_Paraphrase_Paws', 'ethzanalytics/distilgpt2-tiny-conversational', 'microsoft/DialoGPT-small', 'mrm8488/spanish-gpt2',
|
109 |
-
'shibing624/code-autocomplete-distilgpt2-python', 'EleutherAI/gpt-neox-20b', 'bigscience/bloom-560m', 'bigscience/bloom-1b7', 'rinna/japanese-gpt-neox-small', 'Langboat/bloom-1b4-zh',
|
110 |
-
'EleutherAI/polyglot-ko-1.3b', 'bigscience/bloomz', 'Gustavosta/MagicPrompt-Stable-Diffusion', 'EleutherAI/polyglot-ko-5.8b', 'bigscience/bloomz-560m', 'bigscience/bloomz-3b', 'Norod78/gpt-fluentui-flat-svg', 'EleutherAI/pythia-160m', 'EleutherAI/pythia-1b-deduped', 'EleutherAI/pythia-12b', 'medalpaca/medalpaca-7b', 'huggyllama/llama-7b',
|
111 |
-
'vicgalle/gpt2-open-instruct-v1', 'bigcode/starcoder', 'TheBloke/stable-vicuna-13B-GPTQ', 'TheBloke/Wizard-Vicuna-13B-Uncensored-GPTQ', 'bigcode/tiny_starcoder_py', 'TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ', 'Monero/WizardLM-30B-Uncensored-Guanaco-SuperCOT-30b', 'TheBloke/Wizard-Vicuna-30B-Uncensored-GPTQ', 'nomic-ai/gpt4all-falcon',
|
112 |
-
'TheBloke/Karen_theEditor_13B-GPTQ', 'TheBloke/Nous-Hermes-13B-GPTQ', 'pankajmathur/orca_alpaca_3b', 'pankajmathur/orca_mini_3b', 'TheBloke/WizardLM-13B-V1-0-Uncensored-SuperHOT-8K-GPTQ', 'TheBloke/Wizard-Vicuna-13B-Uncensored-SuperHOT-8K-GPTQ',
|
113 |
-
'bigcode/starcoderbase-1b', 'NumbersStation/nsql-6B', 'HuggingFaceM4/idefics-80b', 'TheBloke/Pygmalion-7B-SuperHOT-8K-GPTQ', 'Maykeye/TinyLLama-v0', 'meta-llama/Llama-2-70b-hf', 'meta-llama/Llama-2-13b-chat-hf', 'meta-llama/Llama-2-70b-chat-hf', 'TheBloke/Llama-2-13B-chat-GPTQ', 'NousResearch/Llama-2-7b-chat-hf', 'TheBloke/Llama-2-70B-Chat-GPTQ', 'NousResearch/Llama-2-13b-chat-hf', 'georgesung/llama2_7b_chat_uncensored', 'NousResearch/Nous-Hermes-Llama2-13b', 'TheBloke/30B-Epsilon-GPTQ', 'TheBloke/Dolphin-Llama-13B-GPTQ', 'bigcode/octocoder', 'Qwen/Qwen-7B', 'Qwen/Qwen-7B-Chat', 'uoe-nlp/gpt-neo-125m_instruction-tuned_sni', 'TheBloke/MythoMax-L2-13B-GPTQ', 'quantumaikr/llama-2-70b-fb16-korean', 'cenkersisman/gpt2-turkish-900m', 'codellama/CodeLlama-7b-hf', 'codellama/CodeLlama-13b-hf', 'codellama/CodeLlama-13b-Python-hf', 'codellama/CodeLlama-7b-Instruct-hf', 'codellama/CodeLlama-13b-Instruct-hf', 'codellama/CodeLlama-34b-hf', 'codellama/CodeLlama-34b-Python-hf', 'codellama/CodeLlama-34b-Instruct-hf', 'tiiuae/falcon-180B', 'uukuguy/speechless-llama2-luban-orca-platypus-13b', 'TinyLlama/TinyLlama-1.1B-step-50K-105b', 'diabolic6045/itineraries_Generator', '42dot/42dot_LLM-PLM-1.3B', '42dot/42dot_LLM-SFT-1.3B', 'tiiuae/falcon-180B-chat', 'PygmalionAI/pygmalion-2-13b', 'PygmalionAI/mythalion-13b', 'microsoft/phi-1_5', 'microsoft/phi-1', 'Undi95/UndiMix-v4-13B', 'teknium/Phi-Hermes-1.3B', 'TinyLlama/TinyLlama-1.1B-Chat-v0.1', 'AdaptLLM/medicine-LLM', 'AdaptLLM/law-LLM', 'AdaptLLM/finance-LLM', 'Dans-DiscountModels/Dans-RetroRodeo-13b', 'TheBloke/30B-Epsilon-AWQ', 'TheBloke/Wizard-Vicuna-7B-Uncensored-AWQ', 'TheBloke/Xwin-LM-13B-V0.1-GPTQ', 'Duxiaoman-DI/XuanYuan-70B', 'TheBloke/storytime-13B-GPTQ', 'Qwen/Qwen-14B-Chat', 'TheBloke/Mistral-7B-v0.1-AWQ', 'TheBloke/Mistral-7B-Instruct-v0.1-AWQ', 'TheBloke/Mistral-7B-v0.1-GPTQ', 'stabilityai/stablelm-3b-4e1t', 'rmanluo/RoG', 'lizpreciatior/lzlv_70b_fp16_hf', 'Dans-Archive/Dans-TotSirocco-7b', 'basilepp19/bloom-1b7_it', 'WisdomShell/CodeShell-7B', 'mychen76/mistral7b_ocr_to_json_v1', 'TheBloke/Athena-v4-GPTQ', 'HuggingFaceH4/zephyr-7b-alpha', 'cognitivecomputations/dolphin-2.1-mistral-7b', 'TheBloke/llava-v1.5-13B-AWQ', 'TheBloke/llava-v1.5-13B-GPTQ', 'THUDM/agentlm-7b', 'LumiOpen/Poro-34B', 'jondurbin/airoboros-m-7b-3.1.2', 'KoboldAI/LLaMA2-13B-Tiefighter-GPTQ', 'deepseek-ai/deepseek-coder-6.7b-base', 'aisingapore/sea-lion-3b', 'TRAC-MTRY/traclm-v1-3b-base', 'pfnet/plamo-13b-instruct', 'bkai-foundation-models/vietnamese-llama2-7b-40GB', 'flozi00/Mistral-7B-german-assistant-v4', 'TheBloke/zephyr-7B-beta-GPTQ', 'squarelike/Gugugo-koen-7B-V1.1', 'deepseek-ai/deepseek-coder-33b-base', 'TheBloke/Athnete-13B-GPTQ', 'TheBloke/Nethena-20B-GPTQ', 'cognitivecomputations/dolphin-2.2.1-mistral-7b', '01-ai/Yi-34B', 'TheBloke/deepseek-coder-33B-instruct-AWQ', 'alpindale/goliath-120b', 'Pclanglais/MonadGPT', 'epfl-llm/meditron-70b', 'epfl-llm/meditron-7b', 'alignment-handbook/zephyr-7b-sft-full', 'OpenLLM-France/Claire-7B-0.1', 'hakurei/mommygpt-3B', 'allenai/tulu-2-dpo-70b', 'NeverSleep/Noromaid-13b-v0.1.1', 'KoboldAI/LLaMA2-13B-Psyfighter2', 'Intel/neural-chat-7b-v3-1', 'OrionStarAI/OrionStar-Yi-34B-Chat', 'FPHam/Karen_TheEditor_V2_STRICT_Mistral_7B', 'Doctor-Shotgun/Nous-Capybara-limarpv3-34B', 'TinyLlama/TinyLlama-1.1B-Chat-v0.4', 'MohamedRashad/AceGPT-13B-chat-AWQ', 'THUDM/cogvlm-chat-hf', 'TheBloke/merlyn-education-safety-GPTQ', 'AntibodyGeneration/fine-tuned-progen2-small', 'TinyLlama/TinyLlama-1.1B-Chat-v0.6', 'OrionStarAI/OrionStar-Yi-34B-Chat-Llama', 'stabilityai/stablelm-zephyr-3b', 'FPHam/Karen_TheEditor_V2_CREATIVE_Mistral_7B', 'Jiayi-Pan/Tiny-Vicuna-1B', 'ethz-spylab/poisoned-rlhf-7b-SUDO-10', 'maywell/PiVoT-0.1-early', 'berkeley-nest/Starling-LM-7B-alpha', 'google/madlad400-8b-lm', 'SparseLLM/ReluLLaMA-7B', 'shleeeee/mistral-7b-wiki', 'ceadar-ie/FinanceConnect-13B', 'brucethemoose/CapyTessBorosYi-34B-200K-DARE-Ties-exl2-4bpw-fiction', 'TheBloke/saiga_mistral_7b-GPTQ', 'unsloth/llama-2-7b-bnb-4bit', 'Qwen/Qwen-72B-Chat', 'mlabonne/NeuralHermes-2.5-Mistral-7B', 'TheBloke/open-llama-3b-v2-wizard-evol-instuct-v2-196k-AWQ', 'TheBloke/deepseek-llm-7B-chat-GPTQ', 'beomi/Yi-Ko-6B', 'm-a-p/ChatMusician', 'maywell/Synatra-42dot-1.3B', 'Qwen/Qwen-Audio', 'Qwen/Qwen-Audio-Chat', 'mhenrichsen/context-aware-splitter-1b-english', 'jondurbin/cinematika-7b-v0.1', 'eci-io/climategpt-7b', 'simonveitner/MathHermes-2.5-Mistral-7B', 'ise-uiuc/Magicoder-DS-6.7B', 'ise-uiuc/Magicoder-S-DS-6.7B', 'migueldeguzmandev/paperclippetertodd3', 'sophosympatheia/Rogue-Rose-103b-v0.2', 'timpal0l/Mistral-7B-v0.1-flashback-v2', 'Trelis/Llama-2-7b-chat-hf-function-calling-v3', 'togethercomputer/StripedHyena-Nous-7B', 'Trelis/deepseek-llm-67b-chat-function-calling-v3', 'meta-llama/LlamaGuard-7b', 'openaccess-ai-collective/DPOpenHermes-7B-v2', 'tokyotech-llm/Swallow-7b-instruct-hf', 'AdaptLLM/finance-chat', 'AdaptLLM/law-chat', 'Intel/neural-chat-7b-v3-3', 'Rijgersberg/GEITje-7B-chat', 'TinyLlama/TinyLlama-1.1B-intermediate-step-1195k-token-2.5T', 'TheBloke/Mistral-7B-Instruct-v0.2-AWQ', 'DaizeDong/GraphsGPT-2W', 'upstage/SOLAR-10.7B-Instruct-v1.0', 'upstage/SOLAR-10.7B-v1.0', 'w4r10ck/SOLAR-10.7B-Instruct-v1.0-uncensored', 'seyabde/mistral_7b_yo_instruct', 'TheBloke/dolphin-2.5-mixtral-8x7b-GPTQ', 'joey00072/ToxicHermes-2.5-Mistral-7B', 'THUDM/cogagent-vqa-hf', 'Rijgersberg/GEITje-7B-chat-v2', 'silk-road/ChatHaruhi_RolePlaying_qwen_7b', 'AdaptLLM/finance-LLM-13B', 'bkai-foundation-models/vietnamese-llama2-7b-120GB', 'scb10x/typhoon-7b', 'Felladrin/Llama-160M-Chat-v1', 'SuperAGI/SAM', 'Nero10578/Mistral-7B-Sunda-v1.0', 'NousResearch/Nous-Hermes-2-Yi-34B', 'ericpolewski/AIRIC-The-Mistral', 'charent/Phi2-Chinese-0.2B', 'unum-cloud/uform-gen', 'unsloth/mistral-7b-bnb-4bit', 'NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT', 'LR-AI-Labs/vbd-llama2-7B-50b-chat', 'unsloth/codellama-34b-bnb-4bit', 'cognitivecomputations/dolphin-2.6-mistral-7b', 'unsloth/llama-2-13b-bnb-4bit', 'OpenPipe/mistral-ft-optimized-1227', 'TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T', 'GRMenon/mental-health-mistral-7b-instructv0.2-finetuned-V2', 'sethuiyer/SynthIQ-7b', 'unsloth/zephyr-sft-bnb-4bit', 'jondurbin/bagel-34b-v0.2', 'SkunkworksAI/tinyfrank-1.4B', 'NeuralNovel/Panda-7B-v0.1', 'unsloth/tinyllama-bnb-4bit', 'NousResearch/Nous-Hermes-2-SOLAR-10.7B', 'cognitivecomputations/dolphin-2.6-mistral-7b-dpo-laser', 'Vikhrmodels/Vikhr-7b-0.1', 'nicholasKluge/TeenyTinyLlama-460m', 'jsfs11/OH-dpov2', 'Unbabel/TowerBase-7B-v0.1', 'Doctor-Shotgun/Mixtral-8x7B-Instruct-v0.1-LimaRP-ZLoss', 'WizardLMTeam/WizardCoder-33B-V1.1', 'SanjiWatsuki/Kunoichi-7B', 'Unbabel/TowerInstruct-7B-v0.1', 'WYNN747/Burmese-GPT', 'NousResearch/Genstruct-7B', 'broskicodes/simple-stories-4M', 'STEM-AI-mtl/phi-2-electrical-engineering', 'mlabonne/phixtral-2x2_8', 'ross-dev/sexyGPT-Uncensored', 'HuggingFaceM4/VLM_WebSight_finetuned', 'stabilityai/stable-code-3b', 'huskyhong/noname-ai-v2_2-light', 'aari1995/germeo-7b-laser', 'argilla/distilabeled-OpenHermes-2.5-Mistral-7B', 'fblgit/UNA-TheBeagle-7b-v1', 'cognitivecomputations/MegaDolphin-120b', 'herisan/tinyllama-mental_health_counseling_conversations', 'NeverSleep/Noromaid-7B-0.4-DPO', 'therealcyberlord/TinyLlama-1.1B-Medical', 'NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO', 'szymonrucinski/Curie-7B-v1', 'MaziyarPanahi/Synatra-7B-v0.3-RP-Mistral-7B-Instruct-v0.2-slerp', 'SicariusSicariiStuff/Tenebra_30B_Alpha01_FP16', 'charlesdedampierre/TopicNeuralHermes-2.5-Mistral-7B', 'CodeGPTPlus/deepseek-coder-1.3b-typescript', 'herisan/Mistral-7b-bnb-4bit_mental_health_counseling_conversations', 'Viet-Mistral/Vistral-7B-Chat', 'sophosympatheia/Midnight-Rose-70B-v1.0', 'itsskofficial/falcon-7b-blooms-taxonomy-merged', 'AI-B/UTENA-7B-NSFW-V2', 'KoboldAI/LLaMA2-13B-Estopia', 'DiscoResearch/DiscoLM_German_7b_v1', 'CallComply/zephyr-7b-beta-32k', 'mlabonne/NeuralBeagle14-7B', 'jat-project/jat', 'macadeliccc/piccolo-math-2x7b', 'Isotonic/Dolphin-5.1-7b', 'shadowml/DareBeagle-7B', 'Karko/Proctora', 'haoranxu/ALMA-13B-R', 'yanolja/KoSOLAR-10.7B-v0.2', 'Tensoic/Kan-Llama-7B-SFT-v0.5', 'stabilityai/stablelm-2-1_6b', 'stabilityai/stablelm-2-zephyr-1_6b', 'lrds-code/boana-7b-instruct', 'vikhyatk/moondream1', 'gate369/Blurdus-7b-v0.1', 'Blizado/discolm-mfto-7b-german-v0.1', 'unsloth/mistral-7b-instruct-v0.2-bnb-4bit', 'senseable/WestLake-7B-v2', 'Qwen/Qwen1.5-0.5B', 'Qwen/Qwen1.5-1.8B', 'Qwen/Qwen1.5-7B', 'epinnock/deepseek-coder-33B-evol-feedback-v3', 'LanguageBind/MoE-LLaVA-StableLM-1.6B-4e', 'AISimplyExplained/Vakil-7B', 'RaviNaik/Llava-Phi2', 'motherduckdb/DuckDB-NSQL-7B-v0.1', 'deepseek-ai/deepseek-coder-7b-base-v1.5', 'KatyTheCutie/EstopianMaid-13B', 'abacusai/TheProfessor-155b', 'allenai/OLMo-1B', 'cfahlgren1/natural-functions', 'macadeliccc/WestLake-7B-v2-laser-truthy-dpo', 'jsfs11/WestOrcaDPO-7B-GTA', 'cckevinn/SeeClick', 'Unbabel/TowerInstruct-13B-v0.1', 'codellama/CodeLlama-70b-hf', 'codellama/CodeLlama-70b-Python-hf', 'codellama/CodeLlama-70b-Instruct-hf', 'seedboxai/KafkaLM-70B-German-V0.1', 'Qwen/Qwen1.5-7B-Chat', 'Qwen/Qwen1.5-72B-Chat', 'liuhaotian/llava-v1.6-vicuna-7b', 'liuhaotian/llava-v1.6-vicuna-13b', 'LoneStriker/Lily-Cybersecurity-7B-v0.2-8.0bpw-h8-exl2', 'Qwen/Qwen1.5-0.5B-Chat', 'unsloth/codellama-7b-bnb-4bit', 'Gille/StrangeMerges_17-7B-dare_ties', 'Gille/StrangeMerges_19-7B-dare_ties', 'Bread-AI/Crumb-13B', 'Druvith/MEDMISTRAL', 'alchemonaut/BoreanGale-70B', 'Gille/StrangeMerges_20-7B-slerp', 'PipableAI/pip-SQL-1B', 'cais/HarmBench-Llama-2-13b-cls', 'sophosympatheia/Midnight-Rose-70B-v2.0.3', 'defog/sqlcoder-7b-2', 'RUCKBReasoning/TableLLM-13b', 'RUCKBReasoning/TableLLM-7b', 'Sao10K/Fimbulvetr-11B-v2', 'nvidia/OpenMath-Mistral-7B-v0.1-hf', 'yanolja/EEVE-Korean-10.8B-v1.0', 'WhiteRabbitNeo/Trinity-33B-v1.0', 'hon9kon9ize/CantoneseLLM-6B-preview202402', 'Nitral-Archive/Pasta-Lake-7b', 'kennylam/Breeze-7B-Cantonese-v0.1', 'Unbabel/TowerInstruct-7B-v0.2', 'GritLM/GritLM-7B', 'google/gemma-7b-it', 'ytu-ce-cosmos/turkish-gpt2-large', 'prometheus-eval/prometheus-7b-v2.0', 'NingLab/eCeLLM-M', 'PipableAI/pip-sql-1.3b', 'rhplus0831/maid-yuzu-v8', 'proxectonos/Carballo-bloom-1.3B', 'sambanovasystems/SambaLingo-Arabic-Chat', 'shahzebnaveed/StarlingHermes-2.5-Mistral-7B-slerp', 'LumiOpen/Viking-7B', 'tanamettpk/TC-instruct-DPO', 'Tann-dev/sex-chat-dirty-girlfriend', 'BioMistral/BioMistral-7B-DARE-AWQ-QGS128-W4-GEMM', 'NousResearch/Nous-Hermes-2-Mistral-7B-DPO', 'SparseLLM/prosparse-llama-2-7b', 'HuggingFaceTB/cosmo-1b', 'Efficient-Large-Model/VILA-13b', 'scb10x/typhoon-7b-instruct-02-19-2024', 'LumiOpen/Viking-33B', 'prometheus-eval/prometheus-8x7b-v2.0', 'bigcode/starcoder2-15b', 'togethercomputer/evo-1-131k-base', 'unsloth/gemma-7b-bnb-4bit', 'unsloth/gemma-2b-bnb-4bit', 'unsloth/gemma-2b-it-bnb-4bit', 'unsloth/gemma-7b-it-bnb-4bit', 'yanolja/EEVE-Korean-Instruct-10.8B-v1.0', 'yanolja/EEVE-Korean-2.8B-v1.0', 'yanolja/EEVE-Korean-Instruct-2.8B-v1.0', 'gordicaleksa/YugoGPT', 'timpal0l/Mistral-7B-v0.1-flashback-v2-instruct', 'allenai/OLMo-7B-Instruct', 'coggpt/qwen-1.5-patent-translation', 'GreatCaptainNemo/ProLLaMA', 'Felladrin/Minueza-32M-Base', 'Felladrin/Minueza-32M-Chat', 'm-a-p/OpenCodeInterpreter-DS-1.3B', 'MaziyarPanahi/LongAlpaca-13B-GGUF', 'OPI-PG/Qra-1b', 'MathGenie/MathGenie-InterLM-20B', 'MaziyarPanahi/Mistral-7B-Instruct-Aya-101', 'ENERGY-DRINK-LOVE/eeve_dpo-v3', 'Stopwolf/Tito-7B-slerp', 'MaziyarPanahi/Mistral-7B-Instruct-Aya-101-GGUF', 'PORTULAN/gervasio-7b-portuguese-ptbr-decoder', 'JinghuiLuAstronaut/DocLLM_baichuan2_7b', 'vicgalle/RoleBeagle-11B', 'HuggingFaceH4/zephyr-7b-gemma-v0.1', 'KatyTheCutie/LemonadeRP-4.5.3', 'Kooten/LemonadeRP-4.5.3-4bpw-exl2', 'sophosympatheia/Midnight-Miqu-103B-v1.0', 'soketlabs/pragna-1b', 'remyxai/SpaceLLaVA', 'Efficient-Large-Model/VILA-2.7b', 'hiyouga/Llama-2-70b-AQLM-2Bit-QLoRA-function-calling', 'occiglot/occiglot-7b-de-en-instruct', 'erythropygia/Gemma2b-Turkish-Instruction', 'state-spaces/mamba-2.8b-hf', 'state-spaces/mamba-130m-hf', 'zamal/gemma-7b-finetuned', 'Divyanshu04/LLM3', 'yam-peleg/Hebrew-Gemma-11B', 'yam-peleg/Hebrew-Gemma-11B-Instruct', 'stabilityai/stable-code-instruct-3b', 'Gille/StrangeMerges_35-7B-slerp', 'stanford-oval/llama-7b-wikiwebquestions', 'cstr/Spaetzle-v8-7b', 'ChaoticNeutrals/BuRP_7B', 'cstr/Spaetzle-v12-7b', 'lightblue/ao-karasu-72B', 'NousResearch/Hermes-2-Pro-Mistral-7B', 'hiieu/Vistral-7B-Chat-function-calling', 'CohereForAI/c4ai-command-r-v01', 'ND911/Franken-Mistral-Merlinite-Maid', 'fhai50032/Mistral-4B', 'meta-llama/CodeLlama-7b-Python-hf', 'meta-llama/CodeLlama-7b-Instruct-hf', 'meta-llama/CodeLlama-13b-hf', 'meta-llama/CodeLlama-13b-Instruct-hf', 'ministral/Ministral-3b-instruct', 'CohereForAI/c4ai-command-r-v01-4bit', 'KissanAI/Dhenu-vision-lora-0.1', 'MaziyarPanahi/Calme-7B-Instruct-v0.2', 'icefog72/Kunokukulemonchini-7b-4.1bpw-exl2', 'ChaoticNeutrals/Infinitely-Laydiculous-7B', 'Virt-io/Nina-v2-7B', 'BAAI/bge-reranker-v2-minicpm-layerwise', 'NexaAIDev/Octopus-v2', 'jhu-clsp/FollowIR-7B', 'cais/HarmBench-Mistral-7b-val-cls', 'ezelikman/quietstar-8-ahead', 'szymonrucinski/Krakowiak-7B-v3', 'FluffyKaeloky/Midnight-Miqu-103B-v1.5', 'Nekochu/Confluence-Renegade-7B', 'fxmarty/tiny-dummy-qwen2', 'ytu-ce-cosmos/turkish-gpt2-large-750m-instruct-v0.1', 'ChaoticNeutrals/Eris_PrimeV3-Vision-7B', 'somosnlp/Sam_Diagnostic', 'google/codegemma-2b', 'google/codegemma-7b', 'google/codegemma-7b-it', 'stabilityai/stablelm-2-12b', 'unsloth/mistral-7b-v0.2-bnb-4bit', 'Praneeth/code-gemma-2b-it', 'Inv/Konstanta-V4-Alpha-7B', 'liminerity/e.star.7.b', 'Sahi19/Gemma2bLegalChatbot', 'gokaygokay/moondream-prompt', 'YanweiLi/MGM-7B', 'beomi/gemma-ko-2b', 'Anant58/Genshin-chat-ARM', 'thtskaran/sanskritayam-gpt', 'Natkituwu/Erosumika-7B-v3-7.1bpw-exl2', 'MarsupialAI/SkunkApe-14b', 'google/gemma-1.1-7b-it', 'Smuggling1710/InfinToppyKuno-DARE-7b', 'botbot-ai/CabraQwen7b', 'bsen26/113-Aspect-Emotion-Model', 'arcee-ai/Saul-Nous-Hermes-2-Mistral-7B-DPO-Ties', 'cognitivecomputations/dolphin-2.8-mistral-7b-v02', 'ai21labs/Jamba-v0.1', 'grimjim/Mistral-Starling-merge-trial1-7B', 'mikewang/PVD-160k-Mistral-7b', 'Eurdem/Pinokio_v1.0', 'keeeeenw/MicroLlama', '1bitLLM/bitnet_b1_58-3B', '1bitLLM/bitnet_b1_58-xl', '1bitLLM/bitnet_b1_58-large', 'EdBerg/MISTRALNEURAL-7B-slerp', 'Kukedlc/Neural-4-QA-7b']
|
114 |
|
115 |
-
#
|
116 |
-
|
117 |
|
118 |
-
|
119 |
-
|
120 |
-
"
|
121 |
-
|
122 |
-
"
|
123 |
-
|
124 |
-
|
125 |
-
"Q4_K_M": "Recommended for most uses, good balance",
|
126 |
-
"Q4_K_S": "Smaller than Q4_K_M, still good quality",
|
127 |
-
"Q5_0": "Higher precision than Q4, legacy format",
|
128 |
-
"Q5_1": "Improved Q5, good for complex tasks",
|
129 |
-
"Q5_K_M": "High quality, larger size, good for complex reasoning",
|
130 |
-
"Q5_K_S": "Balanced quality and size in Q5 family",
|
131 |
-
"Q6_K": "Very high quality, larger size",
|
132 |
-
"Q8_0": "Highest quality quantized, largest size",
|
133 |
-
"BF16": "Brain Float 16, good for GPU inference",
|
134 |
-
"F16": "Full 16-bit precision, high accuracy",
|
135 |
-
"F32": "Full 32-bit precision, highest accuracy, largest size"
|
136 |
-
}
|
137 |
|
138 |
-
|
139 |
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
|
149 |
-
def
|
150 |
-
"""
|
151 |
-
|
152 |
-
"""
|
153 |
-
st.write(f"๐ Converting `{model_dir}` to GGUF format...")
|
154 |
-
os.makedirs(os.path.dirname(output_file), exist_ok=True)
|
155 |
-
cmd = [
|
156 |
-
"python3", "/app/llama.cpp/convert_hf_to_gguf.py", model_dir,
|
157 |
-
"--outfile", output_file
|
158 |
-
]
|
159 |
-
process = subprocess.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
160 |
-
if process.returncode == 0:
|
161 |
-
st.success(f"โ
Conversion complete: `{output_file}`")
|
162 |
else:
|
163 |
-
|
164 |
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
"
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
|
182 |
-
|
183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
else:
|
185 |
-
|
|
|
|
|
186 |
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
194 |
|
195 |
-
|
|
|
196 |
|
197 |
-
|
198 |
-
st.write("### Step 1: Downloading Model")
|
199 |
-
download_model(hf_model_name, output_dir)
|
200 |
-
progress_bar.progress(33)
|
201 |
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
|
|
|
|
206 |
|
207 |
-
|
208 |
-
|
209 |
-
quantize_llama(gguf_file, quantized_file, quant_type.lower())
|
210 |
-
progress_bar.progress(100)
|
211 |
|
212 |
-
|
213 |
-
return quantized_file
|
214 |
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
#
|
221 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
222 |
|
223 |
-
|
224 |
-
|
|
|
|
|
|
|
|
|
|
|
225 |
|
226 |
-
|
227 |
-
|
|
|
|
|
228 |
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
)
|
235 |
-
st.success(f"โ
File uploaded to Hugging Face: {repo_id}")
|
236 |
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
|
|
243 |
|
244 |
-
|
|
|
|
|
|
|
|
|
|
|
245 |
|
|
|
246 |
|
247 |
-
|
248 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
249 |
|
250 |
-
|
251 |
-
"
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
start_button = st.button("๐ Start Quantization")
|
257 |
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
262 |
else:
|
263 |
-
|
264 |
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
279 |
|
280 |
-
if
|
281 |
-
|
282 |
-
|
283 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
284 |
else:
|
285 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import spaces
|
2 |
+
import json
|
3 |
import subprocess
|
4 |
+
import os
|
5 |
+
from llama_cpp import Llama
|
6 |
+
from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
|
7 |
+
from llama_cpp_agent.providers import LlamaCppPythonProvider
|
8 |
+
from llama_cpp_agent.chat_history import BasicChatHistory
|
9 |
+
from llama_cpp_agent.chat_history.messages import Roles
|
10 |
+
import gradio as gr
|
11 |
+
from huggingface_hub import hf_hub_download
|
12 |
+
import tempfile
|
13 |
+
from typing import List, Tuple, Optional
|
14 |
+
|
15 |
+
# PDF ์ฒ๋ฆฌ ๋ผ์ด๋ธ๋ฌ๋ฆฌ ์กฐ๊ฑด๋ถ import
|
16 |
+
try:
|
17 |
+
from docling.document_converter import DocumentConverter
|
18 |
+
DOCLING_AVAILABLE = True
|
19 |
+
except ImportError:
|
20 |
+
DOCLING_AVAILABLE = False
|
21 |
+
print("Docling not available, using alternative PDF processing")
|
22 |
+
try:
|
23 |
+
import PyPDF2
|
24 |
+
import pdfplumber
|
25 |
+
except ImportError:
|
26 |
+
print("Warning: PDF processing libraries not fully installed")
|
27 |
|
28 |
+
# ํ๊ฒฝ ๋ณ์์์ HF_TOKEN ๊ฐ์ ธ์ค๊ธฐ
|
29 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
|
|
|
|
30 |
|
31 |
+
# ์ ์ญ ๋ณ์ ์ด๊ธฐํ (์ค์!)
|
32 |
+
llm = None
|
33 |
+
llm_model = None
|
34 |
+
document_context = "" # PDF์์ ์ถ์ถํ ๋ฌธ์ ์ปจํ
์คํธ ์ ์ฅ
|
35 |
+
document_filename = "" # ํ์ฌ ๋ก๋๋ ๋ฌธ์์ ํ์ผ๋ช
|
36 |
|
37 |
+
print("์ ์ญ ๋ณ์ ์ด๊ธฐํ ์๋ฃ")
|
38 |
+
print(f"document_context ์ด๊ธฐ๊ฐ: '{document_context}'")
|
39 |
+
print(f"document_filename ์ด๊ธฐ๊ฐ: '{document_filename}'")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
+
# ๋ชจ๋ธ ์ด๋ฆ๊ณผ ๊ฒฝ๋ก๋ฅผ ์ ์
|
42 |
+
MISTRAL_MODEL_NAME = "Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503.gguf"
|
43 |
|
44 |
+
# ๋ชจ๋ธ ๋ค์ด๋ก๋ (HF_TOKEN ์ฌ์ฉ)
|
45 |
+
model_path = hf_hub_download(
|
46 |
+
repo_id="ginigen/Private-BitSix-Mistral-Small-3.1-24B-Instruct-2503",
|
47 |
+
filename=MISTRAL_MODEL_NAME,
|
48 |
+
local_dir="./models",
|
49 |
+
token=HF_TOKEN
|
50 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
+
print(f"Downloaded model path: {model_path}")
|
53 |
|
54 |
+
css = """
|
55 |
+
.bubble-wrap {
|
56 |
+
padding-top: calc(var(--spacing-xl) * 3) !important;
|
57 |
+
}
|
58 |
+
.message-row {
|
59 |
+
justify-content: space-evenly !important;
|
60 |
+
width: 100% !important;
|
61 |
+
max-width: 100% !important;
|
62 |
+
margin: calc(var(--spacing-xl)) 0 !important;
|
63 |
+
padding: 0 calc(var(--spacing-xl) * 3) !important;
|
64 |
+
}
|
65 |
+
.flex-wrap.user {
|
66 |
+
border-bottom-right-radius: var(--radius-lg) !important;
|
67 |
+
}
|
68 |
+
.flex-wrap.bot {
|
69 |
+
border-bottom-left-radius: var(--radius-lg) !important;
|
70 |
+
}
|
71 |
+
.message.user{
|
72 |
+
padding: 10px;
|
73 |
+
}
|
74 |
+
.message.bot{
|
75 |
+
text-align: right;
|
76 |
+
width: 100%;
|
77 |
+
padding: 10px;
|
78 |
+
border-radius: 10px;
|
79 |
+
}
|
80 |
+
.message-bubble-border {
|
81 |
+
border-radius: 6px !important;
|
82 |
+
}
|
83 |
+
.message-buttons {
|
84 |
+
justify-content: flex-end !important;
|
85 |
+
}
|
86 |
+
.message-buttons-left {
|
87 |
+
align-self: end !important;
|
88 |
+
}
|
89 |
+
.message-buttons-bot, .message-buttons-user {
|
90 |
+
right: 10px !important;
|
91 |
+
left: auto !important;
|
92 |
+
bottom: 2px !important;
|
93 |
+
}
|
94 |
+
.dark.message-bubble-border {
|
95 |
+
border-color: #343140 !important;
|
96 |
+
}
|
97 |
+
.dark.user {
|
98 |
+
background: #1e1c26 !important;
|
99 |
+
}
|
100 |
+
.dark.assistant.dark, .dark.pending.dark {
|
101 |
+
background: #16141c !important;
|
102 |
+
}
|
103 |
+
.upload-container {
|
104 |
+
margin-bottom: 20px;
|
105 |
+
padding: 15px;
|
106 |
+
border: 2px dashed #666;
|
107 |
+
border-radius: 10px;
|
108 |
+
background-color: #f0f0f0;
|
109 |
+
}
|
110 |
+
.dark .upload-container {
|
111 |
+
background-color: #292733;
|
112 |
+
border-color: #444;
|
113 |
+
}
|
114 |
+
"""
|
115 |
|
116 |
+
def get_messages_formatter_type(model_name):
|
117 |
+
if "Mistral" in model_name or "BitSix" in model_name:
|
118 |
+
return MessagesFormatterType.MISTRAL # CHATML ๋์ MISTRAL ํ์ ์ฌ์ฉ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
else:
|
120 |
+
raise ValueError(f"Unsupported model: {model_name}")
|
121 |
|
122 |
+
@spaces.GPU
|
123 |
+
def convert_pdf_to_markdown(file):
|
124 |
+
"""PDF ํ์ผ์ Markdown์ผ๋ก ๋ณํ"""
|
125 |
+
global document_context, document_filename
|
126 |
+
|
127 |
+
if file is None:
|
128 |
+
return "ํ์ผ์ด ์
๋ก๋๋์ง ์์์ต๋๋ค.", {}
|
129 |
+
|
130 |
+
try:
|
131 |
+
print(f"\n=== PDF ๋ณํ ์์ ===")
|
132 |
+
print(f"ํ์ผ ๊ฒฝ๋ก: {file.name}")
|
133 |
+
|
134 |
+
# DocumentConverter ์ธ์คํด์ค ์์ฑ
|
135 |
+
converter = DocumentConverter()
|
136 |
+
|
137 |
+
# ํ์ผ ๋ณํ
|
138 |
+
result = converter.convert(file.name)
|
139 |
+
|
140 |
+
# Markdown์ผ๋ก ๋ด๋ณด๋ด๊ธฐ
|
141 |
+
markdown_content = result.document.export_to_markdown()
|
142 |
+
|
143 |
+
# ๋ฌธ์ ์ปจํ
์คํธ ์
๋ฐ์ดํธ (์ค์!)
|
144 |
+
document_context = markdown_content
|
145 |
+
document_filename = os.path.basename(file.name)
|
146 |
+
|
147 |
+
# ๋ฉํ๋ฐ์ดํฐ ์ถ์ถ
|
148 |
+
metadata = {
|
149 |
+
"filename": document_filename,
|
150 |
+
"conversion_status": "success",
|
151 |
+
"content_length": len(markdown_content),
|
152 |
+
"preview": markdown_content[:500] + "..." if len(markdown_content) > 500 else markdown_content
|
153 |
+
}
|
154 |
+
|
155 |
+
print(f"โ
PDF ๋ณํ ์ฑ๊ณต!")
|
156 |
+
print(f"๐ ํ์ผ๋ช
: {document_filename}")
|
157 |
+
print(f"๐ ๋ฌธ์ ๊ธธ์ด: {len(markdown_content)} ๋ฌธ์")
|
158 |
+
print(f"๐ ๋ฌธ์ ์์ 300์:\n{markdown_content[:300]}...")
|
159 |
+
print(f"=== PDF ๋ณํ ์๋ฃ ===\n")
|
160 |
+
|
161 |
+
# ์ ์ญ ๋ณ์ ํ์ธ ๋ฐ ๊ฐ์ ์ค์
|
162 |
+
print(f"\n=== ์ ์ญ ๋ณ์ ์ค์ ์ ===")
|
163 |
+
print(f"global document_context ๊ธธ์ด: {len(document_context)}")
|
164 |
+
print(f"global document_filename: {document_filename}")
|
165 |
+
|
166 |
+
# globals() ํจ์๋ฅผ ์ฌ์ฉํ์ฌ ๊ฐ์ ๋ก ์ ์ญ ๋ณ์ ์ค์
|
167 |
+
globals()['document_context'] = markdown_content
|
168 |
+
globals()['document_filename'] = document_filename
|
169 |
+
|
170 |
+
print(f"\n=== ์ ์ญ ๋ณ์ ์ค์ ํ ===")
|
171 |
+
print(f"global document_context ๊ธธ์ด: {len(globals()['document_context'])}")
|
172 |
+
print(f"global document_filename: {globals()['document_filename']}")
|
173 |
+
|
174 |
+
return markdown_content, metadata
|
175 |
+
|
176 |
+
except Exception as e:
|
177 |
+
error_msg = f"PDF ๋ณํ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}"
|
178 |
+
print(f"โ {error_msg}")
|
179 |
+
document_context = ""
|
180 |
+
document_filename = ""
|
181 |
+
return error_msg, {"error": str(e)}
|
182 |
|
183 |
+
def find_relevant_chunks(document, query, chunk_size=1500, overlap=300):
|
184 |
+
"""๋ฌธ์์์ ์ง๋ฌธ๊ณผ ๊ด๋ จ๋ ์ฒญํฌ ์ฐพ๊ธฐ"""
|
185 |
+
if not document:
|
186 |
+
return ""
|
187 |
+
|
188 |
+
print(f"๊ด๋ จ ์ฒญํฌ ์ฐพ๊ธฐ ์์ - ์ฟผ๋ฆฌ: {query}")
|
189 |
+
|
190 |
+
# ๊ฐ๋จํ ํค์๋ ๊ธฐ๋ฐ ๊ฒ์
|
191 |
+
query_words = query.lower().split()
|
192 |
+
chunks = []
|
193 |
+
|
194 |
+
# ๋ฌธ์๋ฅผ ์ฒญํฌ๋ก ๋๋๊ธฐ
|
195 |
+
for i in range(0, len(document), chunk_size - overlap):
|
196 |
+
chunk = document[i:i + chunk_size]
|
197 |
+
chunks.append((i, chunk))
|
198 |
+
|
199 |
+
print(f"์ด {len(chunks)}๊ฐ์ ์ฒญํฌ๋ก ๋ถํ ๋จ")
|
200 |
+
|
201 |
+
# ๊ฐ ์ฒญํฌ์ ๊ด๋ จ์ฑ ์ ์ ๊ณ์ฐ
|
202 |
+
scored_chunks = []
|
203 |
+
for idx, chunk in chunks:
|
204 |
+
chunk_lower = chunk.lower()
|
205 |
+
score = sum(1 for word in query_words if word in chunk_lower)
|
206 |
+
if score > 0:
|
207 |
+
scored_chunks.append((score, idx, chunk))
|
208 |
+
|
209 |
+
# ์์ 2๊ฐ ์ฒญํฌ ์ ํ (๋ฉ๋ชจ๋ฆฌ ์ ์ฝ)
|
210 |
+
scored_chunks.sort(reverse=True, key=lambda x: x[0])
|
211 |
+
relevant_chunks = scored_chunks[:2]
|
212 |
+
|
213 |
+
if relevant_chunks:
|
214 |
+
result = ""
|
215 |
+
for score, idx, chunk in relevant_chunks:
|
216 |
+
result += f"\n[๋ฌธ์์ {idx}๋ฒ์งธ ์์น์์ ๋ฐ์ท - ๊ด๋ จ๋: {score}]\n{chunk}\n"
|
217 |
+
print(f"{len(relevant_chunks)}๊ฐ์ ๊ด๋ จ ์ฒญํฌ ์ฐพ์")
|
218 |
+
return result
|
219 |
else:
|
220 |
+
# ๊ด๋ จ ์ฒญํฌ๋ฅผ ์ฐพ์ง ๋ชปํ ๊ฒฝ์ฐ ๋ฌธ์ ์์ ๋ถ๋ถ ๋ฐํ
|
221 |
+
print("๊ด๋ จ ์ฒญํฌ๋ฅผ ์ฐพ์ง ๋ชปํจ, ๋ฌธ์ ์์ ๋ถ๋ถ ๋ฐํ")
|
222 |
+
return document[:2000]
|
223 |
|
224 |
+
@spaces.GPU(duration=120)
|
225 |
+
def respond(
|
226 |
+
message,
|
227 |
+
history: list[dict],
|
228 |
+
system_message,
|
229 |
+
max_tokens,
|
230 |
+
temperature,
|
231 |
+
top_p,
|
232 |
+
top_k,
|
233 |
+
repeat_penalty,
|
234 |
+
):
|
235 |
+
global llm, llm_model
|
236 |
+
|
237 |
+
# globals()๋ฅผ ์ฌ์ฉํ์ฌ ์ ์ญ ๋ณ์์ ์ ๊ทผ
|
238 |
+
document_context = globals().get('document_context', '')
|
239 |
+
document_filename = globals().get('document_filename', '')
|
240 |
+
|
241 |
+
# ๋๋ฒ๊น
์ ์ํ ์์ธ ๋ก๊ทธ
|
242 |
+
print(f"\n=== RESPOND ํจ์ ์์ ===")
|
243 |
+
print(f"์ฌ์ฉ์ ๋ฉ์์ง: {message}")
|
244 |
+
print(f"๋ฌธ์ ์ปจํ
์คํธ ์กด์ฌ ์ฌ๋ถ: {bool(document_context)}")
|
245 |
+
if document_context:
|
246 |
+
print(f"๋ฌธ์ ๊ธธ์ด: {len(document_context)}")
|
247 |
+
print(f"๋ฌธ์ ํ์ผ๋ช
: {document_filename}")
|
248 |
+
print(f"๋ฌธ์ ์์ 100์: {document_context[:100]}...")
|
249 |
+
else:
|
250 |
+
print("โ ๏ธ document_context๊ฐ ๋น์ด์์ต๋๋ค!")
|
251 |
+
print(f"globals()์ ํค๋ค: {list(globals().keys())[:20]}...") # ์ฒ์ 20๊ฐ ํค๋ง
|
252 |
+
|
253 |
+
chat_template = get_messages_formatter_type(MISTRAL_MODEL_NAME)
|
254 |
+
|
255 |
+
# ๋ชจ๋ธ ํ์ผ ๊ฒฝ๋ก ํ์ธ
|
256 |
+
model_path_local = os.path.join("./models", MISTRAL_MODEL_NAME)
|
257 |
+
|
258 |
+
if llm is None or llm_model != MISTRAL_MODEL_NAME:
|
259 |
+
print("LLM ๋ชจ๋ธ ๋ก๋ฉ ์ค...")
|
260 |
+
llm = Llama(
|
261 |
+
model_path=model_path_local,
|
262 |
+
flash_attn=True,
|
263 |
+
n_gpu_layers=81,
|
264 |
+
n_batch=1024,
|
265 |
+
n_ctx=16384, # ์ปจํ
์คํธ ํฌ๊ธฐ
|
266 |
+
verbose=True # ๋๋ฒ๊น
์ ์ํ ์์ธ ๋ก๊ทธ
|
267 |
+
)
|
268 |
+
llm_model = MISTRAL_MODEL_NAME
|
269 |
+
print("LLM ๋ชจ๋ธ ๋ก๋ฉ ์๋ฃ!")
|
270 |
+
|
271 |
+
provider = LlamaCppPythonProvider(llm)
|
272 |
+
|
273 |
+
# ํ๊ตญ์ด ๋ต๋ณ์ ์ํ ๊ธฐ๋ณธ ์์คํ
๋ฉ์์ง
|
274 |
+
korean_system_message = system_message # ์ฌ์ฉ์๊ฐ ์ค์ ํ ์์คํ
๋ฉ์์ง ์ฌ์ฉ
|
275 |
+
|
276 |
+
# ๋ฌธ์ ์ปจํ
์คํธ๊ฐ ์์ผ๋ฉด ์์คํ
๋ฉ์์ง์ ์ฌ์ฉ์ ๋ฉ์์ง ๋ชจ๋์ ํฌํจ
|
277 |
+
if document_context and len(document_context) > 0:
|
278 |
+
doc_length = len(document_context)
|
279 |
+
print(f"๐ ๋ฌธ์ ์ปจํ
์คํธ๋ฅผ ๋ฉ์์ง์ ํฌํจํฉ๋๋ค: {doc_length} ๋ฌธ์")
|
280 |
+
|
281 |
+
# ์์คํ
๋ฉ์์ง์๋ ๋ฌธ์ ์ ๋ณด ์ถ๊ฐ
|
282 |
+
korean_system_message += f"\n\nํ์ฌ '{document_filename}' PDF ๋ฌธ์๊ฐ ๋ก๋๋์ด ์์ต๋๋ค. ์ฌ์ฉ์์ ๋ชจ๋ ์ง๋ฌธ์ ๋ํด ์ด ๋ฌธ์์ ๋ด์ฉ์ ๋ฐ๋์ ์ฐธ์กฐํ์ฌ ๋ต๋ณํ์ธ์."
|
283 |
+
|
284 |
+
# ๋ฌธ์ ๋ด์ฉ์ ์ ์ ํ ํฌ๊ธฐ๋ก ์ ํ
|
285 |
+
max_doc_length = 4000 # ์ต๋ 4000์๋ก ์ ํ
|
286 |
+
if doc_length > max_doc_length:
|
287 |
+
# ๋ฌธ์๊ฐ ๋๋ฌด ๊ธด ๊ฒฝ์ฐ ์ฒ์๊ณผ ๋ ๋ถ๋ถ๋ง ํฌํจ
|
288 |
+
doc_snippet = document_context[:2000] + "\n\n[... ์ค๊ฐ ๋ด์ฉ ์๋ต ...]\n\n" + document_context[-1500:]
|
289 |
+
enhanced_message = f"""์
๋ก๋๋ PDF ๋ฌธ์ ์ ๋ณด:
|
290 |
+
- ํ์ผ๋ช
: {document_filename}
|
291 |
+
- ๋ฌธ์ ๊ธธ์ด: {doc_length} ๋ฌธ์
|
292 |
|
293 |
+
๋ฌธ์ ๋ด์ฉ (์ผ๋ถ):
|
294 |
+
{doc_snippet}
|
295 |
|
296 |
+
์ฌ์ฉ์ ์ง๋ฌธ: {message}
|
|
|
|
|
|
|
297 |
|
298 |
+
์ ๋ฌธ์๋ฅผ ์ฐธ๊ณ ํ์ฌ ํ๊ตญ์ด๋ก ๋ต๋ณํด์ฃผ์ธ์."""
|
299 |
+
else:
|
300 |
+
# ์งง์ ๋ฌธ์๋ ์ ์ฒด ํฌํจ
|
301 |
+
enhanced_message = f"""์
๋ก๋๋ PDF ๋ฌธ์ ์ ๋ณด:
|
302 |
+
- ํ์ผ๋ช
: {document_filename}
|
303 |
+
- ๋ฌธ์ ๊ธธ์ด: {doc_length} ๋ฌธ์
|
304 |
|
305 |
+
๋ฌธ์ ๋ด์ฉ:
|
306 |
+
{document_context}
|
|
|
|
|
307 |
|
308 |
+
์ฌ์ฉ์ ์ง๋ฌธ: {message}
|
|
|
309 |
|
310 |
+
์ ๋ฌธ์๋ฅผ ์ฐธ๊ณ ํ์ฌ ํ๊ตญ์ด๋ก ๋ต๋ณํด์ฃผ์ธ์."""
|
311 |
+
|
312 |
+
print(f"๊ฐํ๋ ๋ฉ์์ง ๊ธธ์ด: {len(enhanced_message)}")
|
313 |
+
print(f"๋ฉ์์ง ๋ฏธ๋ฆฌ๋ณด๊ธฐ (์ฒ์ 300์):\n{enhanced_message[:300]}...")
|
314 |
+
|
315 |
+
# ๋๋ฒ๊ทธ: ์ต์ข
๋ฉ์์ง ํ์ผ๋ก ์ ์ฅ (ํ์ธ์ฉ)
|
316 |
+
with open("debug_last_message.txt", "w", encoding="utf-8") as f:
|
317 |
+
f.write(f"=== ๋๋ฒ๊ทธ ์ ๋ณด ===\n")
|
318 |
+
f.write(f"๋ฌธ์ ๊ธธ์ด: {len(document_context)}\n")
|
319 |
+
f.write(f"ํ์ผ๋ช
: {document_filename}\n")
|
320 |
+
f.write(f"์ฌ์ฉ์ ์ง๋ฌธ: {message}\n")
|
321 |
+
f.write(f"\n=== ์ ์ก๋ ๋ฉ์์ง ===\n")
|
322 |
+
f.write(enhanced_message)
|
323 |
+
else:
|
324 |
+
# ๋ฌธ์๊ฐ ์๋ ๊ฒฝ์ฐ
|
325 |
+
enhanced_message = message
|
326 |
+
if any(keyword in message.lower() for keyword in ["๋ฌธ์", "pdf", "์
๋ก๋", "ํ์ผ", "๋ด์ฉ", "์์ฝ"]):
|
327 |
+
enhanced_message = f"{message}\n\n[์์คํ
๋ฉ์์ง: ํ์ฌ ์
๋ก๋๋ PDF ๋ฌธ์๊ฐ ์์ต๋๋ค. PDF ํ์ผ์ ๋จผ์ ์
๋ก๋ํด์ฃผ์ธ์.]"
|
328 |
+
print("๋ฌธ์ ๊ด๋ จ ์ง๋ฌธ์ด์ง๋ง ๋ฌธ์๊ฐ ์์")
|
329 |
+
|
330 |
+
# ๋๋ฒ๊ทธ ๋ฉ์์ง
|
331 |
+
print("โ ๏ธ ๊ฒฝ๊ณ : document_context๊ฐ ๋น์ด์์ต๋๋ค!")
|
332 |
+
print(f"document_context ํ์
: {type(document_context)}")
|
333 |
+
print(f"document_context ๊ฐ: {repr(document_context)}")
|
334 |
+
print(f"document_filename: {document_filename}")
|
335 |
|
336 |
+
settings = provider.get_provider_default_settings()
|
337 |
+
settings.temperature = temperature
|
338 |
+
settings.top_k = top_k
|
339 |
+
settings.top_p = top_p
|
340 |
+
settings.max_tokens = max_tokens
|
341 |
+
settings.repeat_penalty = repeat_penalty
|
342 |
+
settings.stream = True
|
343 |
|
344 |
+
# ์์คํ
ํ๋กฌํํธ์ ๋ฌธ์ ๋ด์ฉ ์ง์ ํฌํจ (๋ฌธ์๊ฐ ์๋ ๊ฒฝ์ฐ)
|
345 |
+
if document_context and len(document_context) > 0:
|
346 |
+
doc_snippet = document_context[:3000] # ์ฒ์ 3000์๋ง ์ฌ์ฉ
|
347 |
+
enhanced_system_prompt = f"""{korean_system_message}
|
348 |
|
349 |
+
ํ์ฌ ๋ก๋๋ PDF ๋ฌธ์:
|
350 |
+
ํ์ผ๋ช
: {document_filename}
|
351 |
+
๋ฌธ์ ๋ด์ฉ:
|
352 |
+
{doc_snippet}
|
353 |
+
{'' if len(document_context) <= 3000 else '... (์ดํ ์๋ต)'}
|
|
|
|
|
354 |
|
355 |
+
์ ๋ฌธ์์ ๋ด์ฉ์ ๋ฐํ์ผ๋ก ์ฌ์ฉ์์ ์ง๋ฌธ์ ๋ต๋ณํ์ธ์."""
|
356 |
+
|
357 |
+
# ์ฌ์ฉ์ ๋ฉ์์ง๋ ๋จ์ํ๊ฒ
|
358 |
+
final_message = message
|
359 |
+
else:
|
360 |
+
enhanced_system_prompt = korean_system_message
|
361 |
+
final_message = enhanced_message
|
362 |
|
363 |
+
agent = LlamaCppAgent(
|
364 |
+
provider,
|
365 |
+
system_prompt=enhanced_system_prompt,
|
366 |
+
predefined_messages_formatter_type=chat_template,
|
367 |
+
debug_output=True
|
368 |
+
)
|
369 |
|
370 |
+
messages = BasicChatHistory()
|
371 |
|
372 |
+
# ์ด์ ๋ํ ๊ธฐ๋ก ์ถ๊ฐ (์์ ๋จ)
|
373 |
+
for i in range(0, len(history)):
|
374 |
+
# ํ์ฌ ๋ฉ์์ง๋ ์ ์ธ
|
375 |
+
if i < len(history) - 1 and history[i][1] is not None:
|
376 |
+
# ์ฌ์ฉ์ ๋ฉ์์ง
|
377 |
+
messages.add_message({
|
378 |
+
'role': Roles.user,
|
379 |
+
'content': history[i][0]
|
380 |
+
})
|
381 |
+
# ์ด์์คํดํธ ๋ฉ์์ง
|
382 |
+
messages.add_message({
|
383 |
+
'role': Roles.assistant,
|
384 |
+
'content': history[i][1]
|
385 |
+
})
|
386 |
+
|
387 |
+
print(f"์ต์ข
๋ฉ์์ง ์ ์ก ์ค: {final_message}")
|
388 |
+
|
389 |
+
# ์คํธ๋ฆผ ์๋ต ์์ฑ
|
390 |
+
try:
|
391 |
+
stream = agent.get_chat_response(
|
392 |
+
final_message, # ๋จ์ํ ๋ฉ์์ง ์ฌ์ฉ
|
393 |
+
llm_sampling_settings=settings,
|
394 |
+
chat_history=messages,
|
395 |
+
returns_streaming_generator=True,
|
396 |
+
print_output=False
|
397 |
+
)
|
398 |
+
|
399 |
+
outputs = ""
|
400 |
+
for output in stream:
|
401 |
+
outputs += output
|
402 |
+
yield outputs
|
403 |
+
except Exception as e:
|
404 |
+
print(f"์คํธ๋ฆผ ์์ฑ ์ค ์ค๋ฅ: {e}")
|
405 |
+
yield "์ฃ์กํฉ๋๋ค. ์๋ต ์์ฑ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค. ๋ค์ ์๋ํด์ฃผ์ธ์."
|
406 |
|
407 |
+
def clear_document_context():
|
408 |
+
"""๋ฌธ์ ์ปจํ
์คํธ ์ด๊ธฐํ"""
|
409 |
+
global document_context, document_filename
|
410 |
+
document_context = ""
|
411 |
+
document_filename = ""
|
412 |
+
return "๐ญ ๋ฌธ์ ์ปจํ
์คํธ๊ฐ ์ด๊ธฐํ๋์์ต๋๋ค. ์๋ก์ด PDF๋ฅผ ์
๋ก๋ํด์ฃผ์ธ์."
|
|
|
413 |
|
414 |
+
def check_document_status():
|
415 |
+
"""ํ์ฌ ๋ฌธ์ ์ํ ํ์ธ"""
|
416 |
+
global document_context, document_filename
|
417 |
+
print(f"\n=== ๋ฌธ์ ์ํ ํ์ธ ===")
|
418 |
+
print(f"document_context ํ์
: {type(document_context)}")
|
419 |
+
print(f"document_context ๊ธธ์ด: {len(document_context) if document_context else 0}")
|
420 |
+
print(f"document_filename: '{document_filename}'")
|
421 |
+
|
422 |
+
if document_context and len(document_context) > 0:
|
423 |
+
status = f"โ
๋ฌธ์๊ฐ ๋ก๋๋์ด ์์ต๋๋ค.\n๐ ํ์ผ๋ช
: {document_filename}\n๐ ๋ฌธ์ ๊ธธ์ด: {len(document_context):,} ๋ฌธ์"
|
424 |
+
print(f"๋ฌธ์ ์ฒซ 100์: {document_context[:100]}")
|
425 |
+
return status
|
426 |
else:
|
427 |
+
return "๐ญ ๋ก๋๋ ๋ฌธ์๊ฐ ์์ต๋๋ค. PDF ํ์ผ์ ์
๋ก๋ํด์ฃผ์ธ์."
|
428 |
|
429 |
+
# Gradio ์ธํฐํ์ด์ค ๊ตฌ์ฑ
|
430 |
+
with gr.Blocks(theme=gr.themes.Soft(
|
431 |
+
primary_hue="blue",
|
432 |
+
secondary_hue="cyan",
|
433 |
+
neutral_hue="gray",
|
434 |
+
font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]
|
435 |
+
).set(
|
436 |
+
body_background_fill="#f8f9fa",
|
437 |
+
block_background_fill="#ffffff",
|
438 |
+
block_border_width="1px",
|
439 |
+
block_title_background_fill="#e9ecef",
|
440 |
+
input_background_fill="#ffffff",
|
441 |
+
button_secondary_background_fill="#e9ecef",
|
442 |
+
border_color_accent="#dee2e6",
|
443 |
+
border_color_primary="#ced4da",
|
444 |
+
background_fill_secondary="#f8f9fa",
|
445 |
+
color_accent_soft="transparent",
|
446 |
+
code_background_fill="#f1f3f5",
|
447 |
+
), css=css) as demo:
|
448 |
+
|
449 |
+
gr.Markdown("# ์จํ๋ ๋ฏธ์ค ์ต์ ํ 'LLM+RAG ๋ชจ๋ธ' ์๋น์ค์ค")
|
450 |
+
gr.Markdown("๐ PDF ๋ฌธ์๋ฅผ ์
๋ก๋ํ๋ฉด AI๊ฐ ๋ฌธ์ ๋ด์ฉ์ ๋ถ์ํ์ฌ ์ง๋ฌธ์ ๋ต๋ณํฉ๋๋ค.")
|
451 |
+
gr.Markdown("๐ก ์ฌ์ฉ๋ฒ: 1) ์๋์์ PDF ์
๋ก๋ โ 2) ๋ฌธ์์ ๋ํ ์ง๋ฌธ ์
๋ ฅ โ 3) AI๊ฐ ํ๊ตญ์ด๋ก ๋ต๋ณ")
|
452 |
+
|
453 |
+
# ์ฑํ
์ธํฐํ์ด์ค๋ฅผ ์์ชฝ์ ๋ฐฐ์น
|
454 |
+
with gr.Row():
|
455 |
+
with gr.Column():
|
456 |
+
# ์ฑํ
์ธํฐํ์ด์ค
|
457 |
+
chatbot = gr.Chatbot(elem_id="chatbot", height=500)
|
458 |
+
msg = gr.Textbox(
|
459 |
+
label="๋ฉ์์ง ์
๋ ฅ",
|
460 |
+
placeholder="์ง๋ฌธ์ ์
๋ ฅํ์ธ์... (PDF๋ฅผ ์
๋ก๋ํ๋ฉด ๋ฌธ์ ๋ด์ฉ์ ๋ํด ์ง๋ฌธํ ์ ์์ต๋๋ค)",
|
461 |
+
lines=2
|
462 |
+
)
|
463 |
+
with gr.Row():
|
464 |
+
submit = gr.Button("์ ์ก", variant="primary")
|
465 |
+
clear_chat = gr.Button("๋ํ ์ด๊ธฐํ")
|
466 |
+
|
467 |
+
# ์์ ๋ฅผ ์ค๊ฐ์ ๋ฐฐ์น
|
468 |
+
gr.Examples(
|
469 |
+
examples=[
|
470 |
+
["์ด ๋ฌธ์๋ ๋ฌด์์ ๊ดํ ๋ด์ฉ์ธ๊ฐ์?"],
|
471 |
+
["์
๋ก๋๏ฟฝ๏ฟฝ๏ฟฝ PDF ๋ฌธ์์ ์ฃผ์ ๋ด์ฉ์ ํ๊ตญ์ด๋ก ์์ฝํด์ฃผ์ธ์."],
|
472 |
+
["๋ฌธ์์ ๋์จ ์ผ์ ์ ์๋ ค์ฃผ์ธ์."],
|
473 |
+
["๋ฌธ์์์ ๊ฐ์ฅ ์ค์ํ 3๊ฐ์ง ํต์ฌ ํฌ์ธํธ๋ ๋ฌด์์ธ๊ฐ์?"],
|
474 |
+
["์ด ํ์ฌ์ ๊ฐ์๋ฅผ ์ค๋ช
ํด์ฃผ์ธ์."]
|
475 |
+
],
|
476 |
+
inputs=msg
|
477 |
+
)
|
478 |
+
|
479 |
+
# PDF ์
๋ก๋ ์น์
์ ์๋์ชฝ์ ๋ฐฐ์น
|
480 |
+
with gr.Accordion("๐ PDF ๋ฌธ์ ์
๋ก๋", open=True):
|
481 |
+
with gr.Row():
|
482 |
+
with gr.Column(scale=1):
|
483 |
+
file_input = gr.File(
|
484 |
+
label="PDF ๋ฌธ์ ์ ํ",
|
485 |
+
file_types=[".pdf"],
|
486 |
+
type="filepath"
|
487 |
+
)
|
488 |
+
with gr.Row():
|
489 |
+
convert_button = gr.Button("๋ฌธ์ ๋ณํ", variant="primary")
|
490 |
+
clear_button = gr.Button("๋ฌธ์ ์ด๊ธฐํ", variant="secondary")
|
491 |
+
test_button = gr.Button("๋ฌธ์ ํ
์คํธ", variant="secondary")
|
492 |
+
|
493 |
+
status_text = gr.Textbox(
|
494 |
+
label="๋ฌธ์ ์ํ",
|
495 |
+
interactive=False,
|
496 |
+
value=check_document_status(),
|
497 |
+
lines=3
|
498 |
+
)
|
499 |
+
|
500 |
+
with gr.Column(scale=1):
|
501 |
+
with gr.Accordion("๋ณํ๋ ๋ฌธ์ ๋ฏธ๋ฆฌ๋ณด๊ธฐ", open=False):
|
502 |
+
converted_text = gr.Textbox(
|
503 |
+
label="Markdown ๋ณํ ๊ฒฐ๊ณผ",
|
504 |
+
lines=10,
|
505 |
+
max_lines=20,
|
506 |
+
interactive=False
|
507 |
+
)
|
508 |
+
metadata_output = gr.JSON(label="๋ฉํ๋ฐ์ดํฐ")
|
509 |
+
|
510 |
+
# ๊ณ ๊ธ ์ค์ ์ ๊ฐ์ฅ ์๋์ ๋ฐฐ์น
|
511 |
+
with gr.Accordion("โ๏ธ ๊ณ ๊ธ ์ค์ ", open=False):
|
512 |
+
system_message = gr.Textbox(
|
513 |
+
value="๋น์ ์ ํ๊ตญ์ด๋ก ๋ต๋ณํ๋ AI ์ด์์คํดํธ์
๋๋ค. PDF ๋ฌธ์๊ฐ ์ ๊ณต๋๋ฉด ๊ทธ ๋ด์ฉ์ ์ ํํ ๋ถ์ํ์ฌ ๋ต๋ณํฉ๋๋ค.",
|
514 |
+
label="์์คํ
๋ฉ์์ง",
|
515 |
+
lines=3
|
516 |
+
)
|
517 |
+
max_tokens = gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="์ต๋ ํ ํฐ ์")
|
518 |
+
temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.3, step=0.1, label="Temperature (๋ฎ์์๋ก ์ผ๊ด์ฑ ์์)")
|
519 |
+
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.90, step=0.05, label="Top-p")
|
520 |
+
top_k = gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k")
|
521 |
+
repeat_penalty = gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty")
|
522 |
+
|
523 |
+
# ์ด๋ฒคํธ ํธ๋ค๋ฌ
|
524 |
+
def user_submit(message, history):
|
525 |
+
return "", history + [[message, None]]
|
526 |
+
|
527 |
+
def bot_response(history, system_msg, max_tok, temp, top_p_val, top_k_val, rep_pen):
|
528 |
+
if history and history[-1][1] is None:
|
529 |
+
user_message = history[-1][0]
|
530 |
+
|
531 |
+
# ๋๋ฒ๊น
: ๋ฌธ์ ์ปจํ
์คํธ ์ํ ํ์ธ
|
532 |
+
global document_context, document_filename
|
533 |
+
print(f"\n=== BOT RESPONSE ์์ ===")
|
534 |
+
print(f"์ฌ์ฉ์ ๋ฉ์์ง: {user_message}")
|
535 |
+
if document_context:
|
536 |
+
print(f"๐ ๋ฌธ์ ์ปจํ
์คํธ ํ์ฑ: {document_filename} ({len(document_context)} ๋ฌธ์)")
|
537 |
+
print(f"๋ฌธ์ ์ฒซ 200์: {document_context[:200]}...")
|
538 |
+
else:
|
539 |
+
print("๐ญ ๋ฌธ์ ์ปจํ
์คํธ ์์")
|
540 |
+
|
541 |
+
# ๋จ์ํ ํ์ ์ฌ์ฉ - [user_message, assistant_message]
|
542 |
+
previous_history = []
|
543 |
+
for i in range(len(history) - 1):
|
544 |
+
if history[i][1] is not None:
|
545 |
+
previous_history.append({
|
546 |
+
"user": history[i][0],
|
547 |
+
"assistant": history[i][1]
|
548 |
+
})
|
549 |
+
|
550 |
+
print(f"์ด์ ๋ํ ์: {len(previous_history)}")
|
551 |
+
|
552 |
+
# ๋ฌธ์๊ฐ ์๋ ๊ฒฝ์ฐ ํน๋ณ ์ฒ๋ฆฌ
|
553 |
+
if document_context and len(document_context) > 0:
|
554 |
+
print(f"๐ ๋ฌธ์ ๊ธฐ๋ฐ ์๋ต ์์ฑ ์ค... (๋ฌธ์ ๊ธธ์ด: {len(document_context)})")
|
555 |
+
|
556 |
+
bot_message = ""
|
557 |
+
try:
|
558 |
+
for token in respond(
|
559 |
+
user_message,
|
560 |
+
previous_history,
|
561 |
+
system_msg,
|
562 |
+
max_tok,
|
563 |
+
temp,
|
564 |
+
top_p_val,
|
565 |
+
top_k_val,
|
566 |
+
rep_pen
|
567 |
+
):
|
568 |
+
bot_message = token
|
569 |
+
history[-1][1] = bot_message
|
570 |
+
yield history
|
571 |
+
except Exception as e:
|
572 |
+
print(f"โ ์๋ต ์์ฑ ์ค ์ค๋ฅ: {e}")
|
573 |
+
import traceback
|
574 |
+
traceback.print_exc()
|
575 |
+
history[-1][1] = "์ฃ์กํฉ๋๋ค. ์๋ต ์์ฑ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค. ๋ค์ ์๋ํด์ฃผ์ธ์."
|
576 |
+
yield history
|
577 |
+
|
578 |
+
# PDF ๋ณํ ์ด๋ฒคํธ
|
579 |
+
def on_pdf_convert(file):
|
580 |
+
"""PDF ๋ณํ ๋ฐ ์ํ ์
๋ฐ์ดํธ"""
|
581 |
+
global document_context, document_filename
|
582 |
+
|
583 |
+
if file is None:
|
584 |
+
return "", {}, "โ ํ์ผ์ด ์ ํ๋์ง ์์์ต๋๋ค."
|
585 |
+
|
586 |
+
markdown_content, metadata = convert_pdf_to_markdown(file)
|
587 |
|
588 |
+
if "error" in metadata:
|
589 |
+
status = f"โ ๋ณํ ์คํจ: {metadata['error']}"
|
590 |
+
else:
|
591 |
+
# ์ ์ญ ๋ณ์ ๋ค์ ํ๋ฒ ํ์ธ ๋ฐ ์ค์ (globals() ์ฌ์ฉ)
|
592 |
+
globals()['document_context'] = markdown_content
|
593 |
+
globals()['document_filename'] = metadata['filename']
|
594 |
+
|
595 |
+
status = f"โ
PDF ๋ฌธ์๊ฐ ์ฑ๊ณต์ ์ผ๋ก ๋ณํ๋์์ต๋๋ค!\n๐ ํ์ผ๋ช
: {metadata['filename']}\n๐ ๋ฌธ์ ๊ธธ์ด: {metadata['content_length']:,} ๋ฌธ์\n\n์ด์ ๋ฌธ์ ๋ด์ฉ์ ๋ํด ํ๊ตญ์ด๋ก ์ง๋ฌธํ์ค ์ ์์ต๋๋ค.\n\n์์ ์ง๋ฌธ:\n- ์ด ๋ฌธ์์ ์ฃผ์ ๋ด์ฉ์ ์์ฝํด์ฃผ์ธ์\n- ๋ฌธ์์ ๋์จ ํต์ฌ ๊ฐ๋
์ ์ค๋ช
ํด์ฃผ์ธ์"
|
596 |
+
|
597 |
+
print(f"\nโ
๋ฌธ์ ๋ก๋ ์๋ฃ ํ์ธ:")
|
598 |
+
print(f"- globals()['document_context'] ๊ธธ์ด: {len(globals()['document_context'])}")
|
599 |
+
print(f"- globals()['document_filename']: {globals()['document_filename']}")
|
600 |
+
|
601 |
+
# ์ต์ข
ํ์ธ
|
602 |
+
if len(globals()['document_context']) > 0:
|
603 |
+
print("โ
๋ฌธ์๊ฐ ์ฑ๊ณต์ ์ผ๋ก ์ ์ญ ๋ณ์์ ์ ์ฅ๋์์ต๋๋ค!")
|
604 |
else:
|
605 |
+
print("โ ๊ฒฝ๊ณ : ๋ฌธ์๊ฐ ์ ์ญ ๋ณ์์ ์ ์ฅ๋์ง ์์์ต๋๋ค!")
|
606 |
+
|
607 |
+
return markdown_content, metadata, status
|
608 |
+
|
609 |
+
# ํ์ผ ์
๋ก๋ ์ ์๋ ๋ณํ
|
610 |
+
file_input.change(
|
611 |
+
fn=on_pdf_convert,
|
612 |
+
inputs=[file_input],
|
613 |
+
outputs=[converted_text, metadata_output, status_text]
|
614 |
+
)
|
615 |
+
|
616 |
+
# ์๋ ๋ณํ ๋ฒํผ
|
617 |
+
convert_button.click(
|
618 |
+
fn=on_pdf_convert,
|
619 |
+
inputs=[file_input],
|
620 |
+
outputs=[converted_text, metadata_output, status_text]
|
621 |
+
)
|
622 |
+
|
623 |
+
# ๋ฌธ์ ํ
์คํธ ํจ์
|
624 |
+
def test_document():
|
625 |
+
"""ํ์ฌ ๋ก๋๋ ๋ฌธ์ ํ
์คํธ"""
|
626 |
+
global document_context, document_filename
|
627 |
+
if document_context:
|
628 |
+
test_msg = f"โ
๋ฌธ์ ํ
์คํธ ๊ฒฐ๊ณผ:\n"
|
629 |
+
test_msg += f"๐ ํ์ผ๋ช
: {document_filename}\n"
|
630 |
+
test_msg += f"๐ ์ ์ฒด ๊ธธ์ด: {len(document_context):,} ๋ฌธ์\n"
|
631 |
+
test_msg += f"๐ ์ฒซ 500์:\n{document_context[:500]}..."
|
632 |
+
return test_msg
|
633 |
+
else:
|
634 |
+
return "โ ํ์ฌ ๋ก๋๋ ๋ฌธ์๊ฐ ์์ต๋๋ค."
|
635 |
+
|
636 |
+
test_button.click(
|
637 |
+
fn=test_document,
|
638 |
+
outputs=[status_text]
|
639 |
+
)
|
640 |
+
|
641 |
+
clear_button.click(
|
642 |
+
fn=clear_document_context,
|
643 |
+
outputs=[status_text]
|
644 |
+
).then(
|
645 |
+
fn=lambda: ("", {}, check_document_status()),
|
646 |
+
outputs=[converted_text, metadata_output, status_text]
|
647 |
+
)
|
648 |
+
|
649 |
+
# ์ฑํ
์ด๋ฒคํธ
|
650 |
+
msg.submit(user_submit, [msg, chatbot], [msg, chatbot]).then(
|
651 |
+
bot_response,
|
652 |
+
[chatbot, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty],
|
653 |
+
chatbot
|
654 |
+
)
|
655 |
+
|
656 |
+
submit.click(user_submit, [msg, chatbot], [msg, chatbot]).then(
|
657 |
+
bot_response,
|
658 |
+
[chatbot, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty],
|
659 |
+
chatbot
|
660 |
+
)
|
661 |
+
|
662 |
+
clear_chat.click(lambda: [], None, chatbot)
|
663 |
+
|
664 |
+
if __name__ == "__main__":
|
665 |
+
# ํ์ํ ๋๋ ํ ๋ฆฌ ์์ฑ
|
666 |
+
os.makedirs("./models", exist_ok=True)
|
667 |
+
|
668 |
+
# ํ๊ฒฝ ๋ณ์ ํ์ธ
|
669 |
+
if not HF_TOKEN:
|
670 |
+
print("โ ๏ธ ๊ฒฝ๊ณ : HF_TOKEN์ด ์ค์ ๋์ง ์์์ต๋๋ค. ๋ชจ๋ธ ๋ค์ด๋ก๋์ ์ ํ์ด ์์ ์ ์์ต๋๋ค.")
|
671 |
+
print("ํ๊ฒฝ ๋ณ์๋ฅผ ์ค์ ํ๋ ค๋ฉด: export HF_TOKEN='your_huggingface_token'")
|
672 |
+
|
673 |
+
demo.launch(
|
674 |
+
server_name="0.0.0.0", # ๋ก์ปฌ ๋คํธ์ํฌ์์ ์ ๊ทผ ๊ฐ๋ฅ
|
675 |
+
server_port=7860,
|
676 |
+
share=False # ์จํ๋ ๋ฏธ์ค ํ๊ฒฝ์ด๋ฏ๋ก ๊ณต์ ๋นํ์ฑํ
|
677 |
+
)
|