bhaskartripathi commited on
Commit
3ce52c0
·
1 Parent(s): d56a9b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -22
app.py CHANGED
@@ -1,20 +1,14 @@
1
- from peft import PeftModel
2
- from transformers import LLaMATokenizer, LLaMAForCausalLM, GenerationConfig
3
- import torch
4
- n_gpus = torch.cuda.device_count()
5
- max_memory = {i: max_memory for i in range(n_gpus)}
6
 
7
- print(f'Max memory : {max_memory}')
 
8
 
9
- tokenizer = LLaMATokenizer.from_pretrained("decapoda-research/llama-7b-hf")
10
- max_memory = '40GB'
11
 
12
- model = LLaMAForCausalLM.from_pretrained(
 
13
  "decapoda-research/llama-7b-hf",
14
  load_in_8bit=True,
15
- device_map="auto",max_memory=max_memory
16
  )
17
-
18
  model = PeftModel.from_pretrained(model, "tloen/alpaca-lora-7b")
19
 
20
  def generate_prompt(instruction, input=None):
@@ -57,11 +51,24 @@ def evaluate(instruction, input=None):
57
  output = tokenizer.decode(s)
58
  print("Response:", output.split("### Response:")[1].strip())
59
 
60
- import gradio as gr
61
  from peft import PeftModel
62
- from transformers import LLaMATokenizer, LLaMAForCausalLM, GenerationConfig
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
- import gradio as gr
 
65
 
66
  def evaluate1(instruction):
67
  prompt = generate_prompt(instruction)
@@ -78,11 +85,15 @@ def evaluate1(instruction):
78
  output = tokenizer.decode(s)
79
  return output.split("### Response:")[1].strip()
80
 
81
- inputs = gr.inputs.Textbox(lines=5, label="Instruction")
82
- outputs = gr.outputs.Textbox(label="Response")
83
- title = "LLaMA-7B Language Model"
84
- description = "This is a LLaMA-7B language model fine-tuned on various text datasets to generate text for a given task. It was trained on PyTorch by and is capable of generating high-quality, coherent text that is similar to human writing. The model is highly versatile and can be used for a variety of tasks, including text completion, summarization, and translation."
85
- copyright = "Copyright Bhaskar Tripathi (2023)"
86
-
87
- gr.Interface(evaluate1, inputs, outputs, title=title, description=description, footer=copyright, flag=False).launch()
88
-
 
 
 
 
 
 
 
 
 
 
1
 
2
+ from peft import PeftModel
3
+ from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig
4
 
 
 
5
 
6
+ tokenizer = LlamaTokenizer.from_pretrained("decapoda-research/llama-7b-hf")
7
+ model = LlamaForCausalLM.from_pretrained(
8
  "decapoda-research/llama-7b-hf",
9
  load_in_8bit=True,
10
+ device_map="auto",
11
  )
 
12
  model = PeftModel.from_pretrained(model, "tloen/alpaca-lora-7b")
13
 
14
  def generate_prompt(instruction, input=None):
 
51
  output = tokenizer.decode(s)
52
  print("Response:", output.split("### Response:")[1].strip())
53
 
54
+ import streamlit as st
55
  from peft import PeftModel
56
+ from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig
57
+
58
+ model_name = 'bhaskar/LLaMA-7B-peft'
59
+ tokenizer = LlamaTokenizer.from_pretrained(model_name)
60
+ model = LlamaForCausalLM.from_pretrained(model_name).cuda()
61
+ generation_config = GenerationConfig(
62
+ do_sample=True,
63
+ max_length=1024,
64
+ top_p=0.9,
65
+ temperature=1.0,
66
+ no_repeat_ngram_size=3,
67
+ num_return_sequences=1,
68
+ )
69
 
70
+ def generate_prompt(instruction):
71
+ return f"### Instruction: {instruction}\n\n### Response:"
72
 
73
  def evaluate1(instruction):
74
  prompt = generate_prompt(instruction)
 
85
  output = tokenizer.decode(s)
86
  return output.split("### Response:")[1].strip()
87
 
88
+ def main():
89
+ st.set_page_config(page_title="LLaMA-7B Language Model")
90
+ st.title("LLaMA-7B Language Model")
91
+ st.write("This is a LLaMA-7B language model fine-tuned on various text datasets to generate text for a given task. It was trained on PyTorch by and is capable of generating high-quality, coherent text that is similar to human writing. The model is highly versatile and can be used for a variety of tasks, including text completion, summarization, and translation.")
92
+ instruction = st.text_area("Instruction", height=200)
93
+ if st.button("Generate Response"):
94
+ with st.spinner("Generating response..."):
95
+ output = evaluate1(instruction)
96
+ st.write(output)
97
+
98
+ if __name__ == "__main__":
99
+ main()