|
import os |
|
import gradio as gr |
|
from llm2vec import LLM2Vec |
|
from transformers import AutoTokenizer, AutoModel, AutoConfig |
|
from peft import PeftModel |
|
import torch |
|
|
|
torch.backends.cuda.enable_mem_efficient_sdp(False) |
|
torch.backends.cuda.enable_flash_sdp(False) |
|
|
|
|
|
GROQ_API_KEY = os.getenv('GROQ_API_KEY') |
|
HF_TOKEN = os.getenv('HF_TOKEN') |
|
|
|
if not GROQ_API_KEY or not HF_TOKEN: |
|
raise ValueError("GROQ_API_KEY and HF_TOKEN must be set as environment variables.") |
|
|
|
os.environ['GROQ_API_KEY'] = GROQ_API_KEY |
|
os.environ['HF_TOKEN'] = HF_TOKEN |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp") |
|
config = AutoConfig.from_pretrained("McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp", trust_remote_code=True) |
|
model = AutoModel.from_pretrained("McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp", trust_remote_code=True, config=config, torch_dtype=torch.bfloat16, device_map="cuda" if torch.cuda.is_available() else "cpu") |
|
|
|
model = PeftModel.from_pretrained(model, "McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp") |
|
model = model.merge_and_unload() |
|
|
|
|
|
model = PeftModel.from_pretrained(model, "McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-unsup-simcse") |
|
|
|
|
|
l2v = LLM2Vec(model, tokenizer, pooling_mode="mean", max_length=512) |
|
|
|
def encode_texts(input_texts): |
|
encodings = [l2v.encode(text) for text in input_texts] |
|
return encodings |
|
|
|
|
|
iface = gr.Interface( |
|
fn=encode_texts, |
|
inputs=gr.Textbox(lines=5, placeholder="Enter texts separated by newlines..."), |
|
outputs=gr.JSON() |
|
) |
|
|
|
|
|
iface.launch(share=True) |
|
|