Leo8613 commited on
Commit
1b3fa16
·
verified ·
1 Parent(s): 36476e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -33
app.py CHANGED
@@ -1,60 +1,61 @@
 
1
  import gradio as gr
2
  from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
3
 
4
- # Load the model and tokenizer
5
- tokenizer = AutoTokenizer.from_pretrained("unsloth/Llama-3.2-1B")
6
- model = AutoModelForCausalLM.from_pretrained("unsloth/Llama-3.2-1B")
7
 
8
- # Use a pipeline for text generation
9
- text_gen_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
 
 
 
 
 
 
 
 
10
 
11
- # Text generation function with repetition penalty and no_repeat_ngram_size
12
- def generate_text(prompt, max_length=50, temperature=0.7, top_p=0.9, top_k=50, repetition_penalty=1.2, no_repeat_ngram_size=3):
 
 
 
 
 
 
 
 
 
13
  generated_text = text_gen_pipeline(prompt,
14
  max_length=max_length,
15
  temperature=temperature,
16
  top_p=top_p,
17
  top_k=top_k,
18
- repetition_penalty=repetition_penalty, # Penalty to avoid repetitions
19
- no_repeat_ngram_size=no_repeat_ngram_size, # Avoid repeating n-grams
20
  num_return_sequences=1)
21
  return generated_text[0]['generated_text']
22
 
23
- # Gradio Interface
24
  with gr.Blocks() as demo:
25
  gr.Markdown("## Text Generation with Llama 3.2 - 1B")
 
26
 
27
- # Input box for user prompt
28
  prompt_input = gr.Textbox(label="Input (Prompt)", placeholder="Enter your prompt here...")
29
-
30
- # Slider for maximum text length
31
- max_length_input = gr.Slider(minimum=10, maximum=200, value=50, step=10, label="Maximum Length")
32
-
33
- # Slider for temperature (controls creativity)
34
- temperature_input = gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature (creativity)")
35
-
36
- # Slider for top_p (nucleus sampling)
37
  top_p_input = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.1, label="Top-p (nucleus sampling)")
 
 
 
38
 
39
- # Slider for top_k (controls diversity)
40
- top_k_input = gr.Slider(minimum=1, maximum=100, value=50, step=1, label="Top-k (sampling diversity)")
41
-
42
- # Slider for repetition penalty
43
- repetition_penalty_input = gr.Slider(minimum=1.0, maximum=2.0, value=1.2, step=0.1, label="Repetition Penalty")
44
-
45
- # Slider for no_repeat_ngram_size
46
- no_repeat_ngram_size_input = gr.Slider(minimum=1, maximum=10, value=3, step=1, label="No Repeat N-Gram Size")
47
-
48
- # Output box for the generated text
49
  output_text = gr.Textbox(label="Generated Text")
50
-
51
- # Submit button
52
  generate_button = gr.Button("Generate")
53
 
54
- # Action on button click
55
  generate_button.click(generate_text,
56
  inputs=[prompt_input, max_length_input, temperature_input, top_p_input, top_k_input, repetition_penalty_input, no_repeat_ngram_size_input],
57
  outputs=output_text)
58
 
59
- # Launch the app
60
  demo.launch()
 
1
+ # Step 2: Import necessary libraries
2
  import gradio as gr
3
  from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
4
 
5
+ # Step 3: Load the model and tokenizer
6
+ model_name = "unsloth/Llama-3.2-1B"
 
7
 
8
+ try:
9
+ # Attempt to load the tokenizer and model
10
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
11
+ model = AutoModelForCausalLM.from_pretrained(model_name)
12
+ print(f"Successfully loaded model: {model_name}")
13
+ except Exception as e:
14
+ # Handle errors and notify the user
15
+ print(f"Error loading model or tokenizer: {e}")
16
+ tokenizer = None
17
+ model = None
18
 
19
+ # Step 4: Use a pipeline for text generation if model is loaded
20
+ if model is not None and tokenizer is not None:
21
+ text_gen_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
22
+ else:
23
+ text_gen_pipeline = None
24
+
25
+ # Step 5: Define the text generation function
26
+ def generate_text(prompt, max_length=40, temperature=0.8, top_p=0.9, top_k=40, repetition_penalty=1.5, no_repeat_ngram_size=4):
27
+ if text_gen_pipeline is None:
28
+ return "Model not loaded. Please check the model name or try a different one."
29
+
30
  generated_text = text_gen_pipeline(prompt,
31
  max_length=max_length,
32
  temperature=temperature,
33
  top_p=top_p,
34
  top_k=top_k,
35
+ repetition_penalty=repetition_penalty,
36
+ no_repeat_ngram_size=no_repeat_ngram_size,
37
  num_return_sequences=1)
38
  return generated_text[0]['generated_text']
39
 
40
+ # Step 6: Set up the Gradio interface
41
  with gr.Blocks() as demo:
42
  gr.Markdown("## Text Generation with Llama 3.2 - 1B")
43
+ gr.Markdown("For more details, check out this [Google Colab notebook](https://colab.research.google.com/drive/1TCyQNWMQzsjit_z3-0jHCQYfFTpawh8r#scrollTo=5-6MhJj0ZVpk).")
44
 
 
45
  prompt_input = gr.Textbox(label="Input (Prompt)", placeholder="Enter your prompt here...")
46
+ max_length_input = gr.Slider(minimum=10, maximum=200, value=40, step=10, label="Maximum Length")
47
+ temperature_input = gr.Slider(minimum=0.1, maximum=1.0, value=0.8, step=0.1, label="Temperature (creativity)")
 
 
 
 
 
 
48
  top_p_input = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.1, label="Top-p (nucleus sampling)")
49
+ top_k_input = gr.Slider(minimum=1, maximum=100, value=40, step=1, label="Top-k (sampling diversity)")
50
+ repetition_penalty_input = gr.Slider(minimum=1.0, maximum=2.0, value=1.5, step=0.1, label="Repetition Penalty")
51
+ no_repeat_ngram_size_input = gr.Slider(minimum=1, maximum=10, value=4, step=1, label="No Repeat N-Gram Size")
52
 
 
 
 
 
 
 
 
 
 
 
53
  output_text = gr.Textbox(label="Generated Text")
 
 
54
  generate_button = gr.Button("Generate")
55
 
 
56
  generate_button.click(generate_text,
57
  inputs=[prompt_input, max_length_input, temperature_input, top_p_input, top_k_input, repetition_penalty_input, no_repeat_ngram_size_input],
58
  outputs=output_text)
59
 
60
+ # Step 7: Launch the app
61
  demo.launch()