kwabs22 commited on
Commit
136e821
·
1 Parent(s): e9869cf

Will it load?

Browse files
Files changed (2) hide show
  1. app.py +26 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import torch
4
+
5
+ # Load model and tokenizer
6
+ tokenizer = AutoTokenizer.from_pretrained("ISTA-DASLab/Meta-Llama-3.1-70B-AQLM-PV-2Bit-1x16")
7
+ model = AutoModelForCausalLM.from_pretrained("ISTA-DASLab/Meta-Llama-3.1-70B-AQLM-PV-2Bit-1x16", torch_dtype=torch.float16)
8
+ model = model.to('cuda') # Move the model to GPU if available
9
+
10
+ # Define a function for generating text from a prompt
11
+ def generate_text(prompt):
12
+ inputs = tokenizer(prompt, return_tensors="pt").to('cuda') # Tokenize input and move to GPU
13
+ outputs = model.generate(inputs.input_ids, max_length=100) # Generate output text
14
+ return tokenizer.decode(outputs[0], skip_special_tokens=True) # Decode and return the text
15
+
16
+ # Create Gradio Interface
17
+ interface = gr.Interface(
18
+ fn=generate_text, # Function that handles text generation
19
+ inputs="text", # Input is a text box
20
+ outputs="text", # Output is a text box
21
+ title="Meta-Llama-3.1-70B Text Generation",
22
+ description="Enter a prompt and generate text using Meta-Llama-3.1-70B.",
23
+ )
24
+
25
+ # Launch the Gradio app
26
+ interface.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ transformers
2
+ gradio
3
+ torch