File size: 1,350 Bytes
44d95a9 d9e831a d3feb04 d9e831a 8c46a19 d9e831a 8c46a19 d9e831a 8c46a19 d9e831a 8c46a19 d9e831a 8c46a19 d9e831a 5626287 d9e831a 44d95a9 8c46a19 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer
model_name = "AbdulHadi806/Llama-2-7b-finetuned-with-QLoRa"
@st.cache_resource
def load_model_and_tokenizer(model_name):
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
return model, tokenizer
model, tokenizer = load_model_and_tokenizer(model_name)
# Function to generate response
def generate_response(topic):
input_text = f"Response about {topic}:"
input_ids = tokenizer.encode(input_text, return_tensors="pt")
# Generate text
output = model.generate(input_ids, max_length=500, num_return_sequences=1, no_repeat_ngram_size=2)
# Decode and return text
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
return generated_text
# Streamlit app
def main():
st.title("Llama 2 Fine-Tuned Demo with QLoRa")
# Sidebar input for topic
topic = st.sidebar.text_input("Enter your topic", "a crazy person driving a car")
# Generate button
if st.sidebar.button("Generate Response"):
with st.spinner("Generating response..."):
response = generate_response(f"[INST] {topic} [/INST]" )
st.subheader(f"Generated response on '{topic}':")
st.write(response)
if __name__ == "__main__":
main()
|