Spaces:
Paused
Paused
import shutil | |
import requests | |
import sys | |
from typing import Optional, List, Tuple | |
import json | |
from langchain_community.llms import HuggingFaceHub | |
##Loading the Model to answer questions | |
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig | |
from peft import PeftModel, PeftConfig | |
peft_model_id = "Ubaidbhat/zephr_finance_finetuned" | |
config = PeftConfig.from_pretrained(peft_model_id) | |
print(config.base_model_name_or_path) | |
bnb_config = BitsAndBytesConfig( | |
load_in_4bit = True, | |
bnb_4bit_use_double_quant=True, | |
bnb_4bit_quant_type="nf4", | |
bnb_4bit_compute_dtype=torch.bfloat16 | |
) | |
d_map = {"": torch.cuda.current_device()} if torch.cuda.is_available() else None | |
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, quantization_config=bnb_config, device_map=d_map) | |
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path) | |
model = PeftModel.from_pretrained(model, peft_model_id) | |
model = model.merge_and_unload() | |
##Creating base Model Chain | |
from langchain.llms import HuggingFacePipeline | |
from langchain.prompts import PromptTemplate | |
from transformers import pipeline | |
from langchain_core.output_parsers import StrOutputParser | |
from langchain.chains import LLMChain | |
text_generation_pipeline = pipeline( | |
model=model, | |
tokenizer=tokenizer, | |
task="text-generation", | |
temperature=0.2, | |
do_sample=True, | |
repetition_penalty=1.1, | |
return_full_text=True, | |
max_new_tokens=400, | |
pad_token_id=tokenizer.eos_token_id, | |
) | |
llm = HuggingFacePipeline(pipeline=text_generation_pipeline) | |
prompt_template = """ | |
<|system|> | |
Answer the question based on your knowledge. | |
</s> | |
<|user|> | |
{question} | |
</s> | |
<|assistant|> | |
""" | |
prompt = PromptTemplate( | |
input_variables=["question"], | |
template=prompt_template, | |
) | |
llm_chain = prompt | llm | StrOutputParser() | |
def inference(question): | |
llmAnswer = llm_chain.invoke({"question": question}) | |
llmAnswer = llmAnswer.rstrip() | |
return llmAnswer | |
import gradio as gr | |
from langchain_core.runnables import RunnablePassthrough | |
def predict(question): | |
return inference(question) | |
pred = gr.Interface( | |
fn=predict, | |
inputs=[ | |
gr.Textbox(label="Question"), | |
], | |
outputs="text", | |
title="Finetuned Zephr Model in the Finance Domain." | |
) | |
pred.launch(share=True) | |