bhaskartripathi commited on
Commit
90f74fc
·
1 Parent(s): 2706075

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -0
app.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from peft import PeftModel
2
+ from transformers import LLaMATokenizer, LLaMAForCausalLM, GenerationConfig
3
+
4
+ tokenizer = LLaMATokenizer.from_pretrained("decapoda-research/llama-7b-hf")
5
+ model = LLaMAForCausalLM.from_pretrained(
6
+ "decapoda-research/llama-7b-hf",
7
+ load_in_8bit=True,
8
+ device_map="auto",
9
+ )
10
+ model = PeftModel.from_pretrained(model, "tloen/alpaca-lora-7b")
11
+
12
+ def generate_prompt(instruction, input=None):
13
+ if input:
14
+ return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. Answer step by step.
15
+
16
+ ### Instruction:
17
+ {instruction}
18
+
19
+ ### Input:
20
+ {input}
21
+
22
+ ### Response:"""
23
+ else:
24
+ return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request. Answer step by step.
25
+
26
+ ### Instruction:
27
+ {instruction}
28
+
29
+ ### Response:"""
30
+
31
+ generation_config = GenerationConfig(
32
+ temperature=0.1,
33
+ top_p=0.75,
34
+ num_beams=4,
35
+ )
36
+
37
+ def evaluate(instruction, input=None):
38
+ prompt = generate_prompt(instruction, input)
39
+ inputs = tokenizer(prompt, return_tensors="pt")
40
+ input_ids = inputs["input_ids"].cuda()
41
+ generation_output = model.generate(
42
+ input_ids=input_ids,
43
+ generation_config=generation_config,
44
+ return_dict_in_generate=True,
45
+ output_scores=True,
46
+ max_new_tokens=256
47
+ )
48
+ for s in generation_output.sequences:
49
+ output = tokenizer.decode(s)
50
+ print("Response:", output.split("### Response:")[1].strip())
51
+
52
+ import gradio as gr
53
+ from peft import PeftModel
54
+ from transformers import LLaMATokenizer, LLaMAForCausalLM, GenerationConfig
55
+
56
+ import gradio as gr
57
+
58
+ def evaluate1(instruction):
59
+ prompt = generate_prompt(instruction)
60
+ inputs = tokenizer(prompt, return_tensors="pt")
61
+ input_ids = inputs["input_ids"].cuda()
62
+ generation_output = model.generate(
63
+ input_ids=input_ids,
64
+ generation_config=generation_config,
65
+ return_dict_in_generate=True,
66
+ output_scores=True,
67
+ max_new_tokens=256
68
+ )
69
+ for s in generation_output.sequences:
70
+ output = tokenizer.decode(s)
71
+ return output.split("### Response:")[1].strip()
72
+
73
+ inputs = gr.inputs.Textbox(lines=5, label="Instruction")
74
+ outputs = gr.outputs.Textbox(label="Response")
75
+ title = "LLaMA-7B Language Model"
76
+ description = "This is a LLaMA-7B language model fine-tuned on various text datasets to generate text for a given task. It was trained on PyTorch by and is capable of generating high-quality, coherent text that is similar to human writing. The model is highly versatile and can be used for a variety of tasks, including text completion, summarization, and translation."
77
+ copyright = "Copyright Bhaskar Tripathi (2023)"
78
+
79
+ gr.Interface(evaluate1, inputs, outputs, title=title, description=description, footer=copyright, flag=False).launch()
80
+