import shutil import requests import sys from typing import Optional, List, Tuple import json from langchain_community.llms import HuggingFaceHub ##Loading the Model to answer questions import torch from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig from peft import PeftModel, PeftConfig peft_model_id = "Ubaidbhat/zephr_finance_finetuned" config = PeftConfig.from_pretrained(peft_model_id) print(config.base_model_name_or_path) bnb_config = BitsAndBytesConfig( load_in_4bit = True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16 ) d_map = {"": torch.cuda.current_device()} if torch.cuda.is_available() else None model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, quantization_config=bnb_config, device_map=d_map) tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path) model = PeftModel.from_pretrained(model, peft_model_id) model = model.merge_and_unload() ##Creating base Model Chain from langchain.llms import HuggingFacePipeline from langchain.prompts import PromptTemplate from transformers import pipeline from langchain_core.output_parsers import StrOutputParser from langchain.chains import LLMChain text_generation_pipeline = pipeline( model=model, tokenizer=tokenizer, task="text-generation", temperature=0.2, do_sample=True, repetition_penalty=1.1, return_full_text=True, max_new_tokens=400, pad_token_id=tokenizer.eos_token_id, ) llm = HuggingFacePipeline(pipeline=text_generation_pipeline) prompt_template = """ <|system|> Answer the question based on your knowledge. <|user|> {question} <|assistant|> """ prompt = PromptTemplate( input_variables=["question"], template=prompt_template, ) llm_chain = prompt | llm | StrOutputParser() def inference(question): llmAnswer = llm_chain.invoke({"question": question}) llmAnswer = llmAnswer.rstrip() return llmAnswer import gradio as gr from langchain_core.runnables import RunnablePassthrough def predict(question): return inference(question) pred = gr.Interface( fn=predict, inputs=[ gr.Textbox(label="Question"), ], outputs="text", title="Finetuned Zephr Model in the Finance Domain." ) pred.launch(share=True)