ThomasBlumet commited on
Commit
03071d5
·
1 Parent(s): 38d893e

new try with new app

Browse files
Files changed (3) hide show
  1. Dockerfile +8 -8
  2. app.py +82 -76
  3. requirements.txt +5 -4
Dockerfile CHANGED
@@ -1,10 +1,10 @@
1
  # For more information, please refer to https://aka.ms/vscode-docker-python
2
- FROM python:3.10-slim
3
 
4
- #FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
5
 
6
  # Install Python and pip
7
- #RUN apt-get update && apt-get install -y python3-pip python3-venv
8
 
9
  # Where we'll copy the code
10
  WORKDIR /code
@@ -13,15 +13,15 @@ WORKDIR /code
13
  COPY ./requirements.txt /code/requirements.txt
14
 
15
  # Install pip requirements without venv
16
- RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
17
 
18
  # Create a virtual environment and install pip requirements
19
- # RUN python3 -m venv /code/venv
20
- # RUN /code/venv/bin/pip install --no-cache-dir --upgrade pip
21
- # RUN /code/venv/bin/pip install --no-cache-dir --upgrade -r /code/requirements.txt
22
 
23
  # Set the PATH to include the virtual environment's bin directory
24
- # ENV PATH="/code/venv/bin:$PATH"
25
 
26
  # Creates a non-root user with an explicit UID
27
  # For more info, please refer to https://aka.ms/vscode-docker-python-configure-containers
 
1
  # For more information, please refer to https://aka.ms/vscode-docker-python
2
+ #FROM python:3.10-slim
3
 
4
+ FROM nvidia/cuda:11.7.0-cudnn8-devel-ubuntu22.04
5
 
6
  # Install Python and pip
7
+ RUN apt-get update && apt-get install -y python3-pip python3-venv
8
 
9
  # Where we'll copy the code
10
  WORKDIR /code
 
13
  COPY ./requirements.txt /code/requirements.txt
14
 
15
  # Install pip requirements without venv
16
+ #RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
17
 
18
  # Create a virtual environment and install pip requirements
19
+ RUN python3 -m venv /code/venv
20
+ RUN /code/venv/bin/pip install --no-cache-dir --upgrade pip
21
+ RUN /code/venv/bin/pip install --no-cache-dir --upgrade -r /code/requirements.txt
22
 
23
  # Set the PATH to include the virtual environment's bin directory
24
+ ENV PATH="/code/venv/bin:$PATH"
25
 
26
  # Creates a non-root user with an explicit UID
27
  # For more info, please refer to https://aka.ms/vscode-docker-python-configure-containers
app.py CHANGED
@@ -1,80 +1,86 @@
1
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
2
- from transformers.utils import logging
 
 
 
3
  import gradio as gr
4
- #import spaces
5
-
6
- # Define the logger instance for the transformers library
7
- logger = logging.get_logger("transformers")
8
-
9
- # Load the model and tokenizer
10
- model_name = "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ" #"openai-community/gpt2" or "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ" or "TheBloke/Llama-2-7B-Chat-GGML" or "TheBloke/zephyr-7B-beta-GPTQ"
11
- tokenizer = AutoTokenizer.from_pretrained(model_name,use_fast=True)
12
- model = AutoModelForCausalLM.from_pretrained(model_name,device_map="auto",trust_remote_code=False,revision="main")
13
- #tokenizer.pad_token_id = tokenizer.eos_token_id
14
-
15
- #transfer model on GPU
16
- #model.to("cuda")
17
- pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer,
18
- max_new_tokens=512,
19
- do_sample=True,
20
- temperature=0.7,
21
- top_p=0.95,
22
- top_k=40,
23
- repetition_penalty=1.1)
24
-
25
- # Generate text using the model and tokenizer
26
- #@spaces.GPU(duration=60)
27
- def generate_text(input_text):
28
- #input_ids = tokenizer.encode(input_text, return_tensors="pt")#.to("cuda")
29
- #attention_mask = input_ids.ne(tokenizer.pad_token_id).long()
30
- #output = model.generate(input_ids, max_new_tokens=512, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)# attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
31
- #output = model.generate(input_ids) #, attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
32
- #return tokenizer.decode(output[0])
33
- return pipe(input_text)[0]["generated_text"]
34
-
35
- interface = gr.Interface(fn=generate_text, inputs="text", outputs="text",title="TeLLMyStory",description="Enter your story idea and the model will generate the story based on it.")
36
- interface.launch()
37
-
38
-
39
- # Example of disabling Exllama backend (if applicable in your configuration)
40
- #config = {"disable_exllama": True}
41
- #model.config.update(config)
42
-
43
- # def generate_text(prompt):
44
- # inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512, padding="max_length")
45
- # summary_ids = model.generate(inputs["input_ids"], max_new_tokens=512, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
46
- # return tokenizer.decode(summary_ids[0], skip_special_tokens=True)
47
-
48
- # #for training the model after the data is collected
49
- # #model.save_pretrained("model")
50
- # #tokenizer.save_pretrained("model")
51
-
52
- # #for the app functions
53
-
54
- # def show_output_text(message):
55
- # history.append((message,""))
56
- # story = generate_text(message)
57
- # history[-1] = (message,story)
58
- # return story
59
-
60
- # def clear_textbox():
61
- # return None,None
62
-
63
- # # Créer une interface de saisie avec Gradio
64
-
65
- # with gr.Blocks() as demo:
66
- # gr.Markdown("TeLLMyStory chatbot")
67
- # with gr.Row():
68
- # input_text = gr.Textbox(label="Enter your story idea here", placeholder="Once upon a time...")
69
- # clear_button = gr.Button("Clear",variant="secondary")
70
- # submit_button = gr.Button("Submit", variant="primary")
71
-
72
- # with gr.Row():
73
- # gr.Markdown("And see the story take shape here")
74
- # output_text = gr.Textbox(label="History")
75
 
76
- # submit_button.click(fn=show_output_text, inputs=input_text,outputs=output_text)
77
- # clear_button.click(fn=clear_textbox,outputs=[input_text,output_text])
78
- # # Lancer l'interface
 
 
79
 
80
 
 
 
 
1
+ #last version of app.py
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, GPTQConfig
3
+ import torch
4
+ import optimum
5
+ import auto_gptq
6
  import gradio as gr
7
+ import time
8
+
9
+ device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
10
+
11
+ model_name = "TheBloke/zephyr-7B-beta-GPTQ"
12
+
13
+ tokenizer = AutoTokenizer.from_pretrained(model_name,use_fast=True,padding_side="left")
14
+ quantization_config_loading = GPTQConfig(
15
+ bits=4,
16
+ group_size=128,
17
+ disable_exllama=False)
18
+ model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quantization_config_loading, device_map="auto")
19
+ model = model.to(device)
20
+
21
+ def generate_text(input_text,max_new_tokens=512,top_k=50,top_p=0.95,temperature=0.7,no_grad=False):
22
+ tokenizer.pad_token_id = tokenizer.eos_token_id
23
+ input_ids = tokenizer.encode(input_text, padding=True, return_tensors="pt").to(device)
24
+ attention_mask = input_ids.ne(tokenizer.pad_token_id).long().to(device)
25
+ output = None
26
+ if no_grad:
27
+ with torch.no_grad():
28
+ output = model.generate(input_ids, attention_mask=attention_mask, max_new_tokens=max_new_tokens, top_k=top_k, top_p=top_p, temperature=temperature,do_sample=True)
29
+ else:
30
+ output = model.generate(input_ids, attention_mask=attention_mask, max_new_tokens=max_new_tokens, top_k=top_k, top_p=top_p, temperature=temperature,do_sample=True)
31
+ return tokenizer.decode(output[0][input_ids['input_ids'].shape[1]:], skip_special_tokens=True)
32
+
33
+
34
+ time_story = 0
35
+
36
+ def generate_response(input,history: list[tuple[str, str]],max_tokens, temperature, top_p):
37
+ messages=[]
38
+ for val in history:
39
+ # Directly access content using "content" key
40
+ messages.extend([{"role": "user", "content": val.get("content")}, {"role": "assistant", "content": val.get("content")}]) if val else None
41
+
42
+ messages.append({"role": "user", "content": input})
43
+ print(f'Time to generate the story: {time_story}')
44
+ start = time.time()
45
+ output = generate_text(input,max_new_tokens=max_tokens, top_p=top_p, temperature=temperature)
46
+ end = time.time()
47
+ time_story= end-start
48
+ history.append((input,output))
49
+ yield output
50
+
51
+ #define the chatinterface
52
+ title = "TeLLMyStory"
53
+ description = "A LLM for stories generation aiming the reinforcement of the controllability aspect"
54
+ theme = gr.Theme.from_hub("Yntec/HaleyCH_Theme_Yellow_Blue")
55
+ examples=[["Once upon a time a witch named Malefique was against the wedding of her daughter with the son of the king of the nearby kingdom."],
56
+ ["Once upon a time an ice-cream met a spoon and they fell in love"],
57
+ ["The neverending day began with a beautiful sunshine and an AI robot which was seeking humans on the desert Earth."]]
58
+
59
+ demo = gr.ChatInterface(
60
+ generate_response,
61
+ type="messages",
62
+ title=title,
63
+ description=description,
64
+ theme=theme,
65
+ examples=examples,
66
+ additional_inputs=[
67
+ gr.Slider(minimum=1, maximum=2048, value=100, step=1, label="Max new tokens"),
68
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
69
+ gr.Slider(
70
+ minimum=0.1,
71
+ maximum=1.0,
72
+ value=0.95,
73
+ step=0.05,
74
+ label="Top-p (nucleus sampling)",
75
+ ),
76
+ ],
 
77
 
78
+ stop_btn="Stop",
79
+ delete_cache=[60,60],
80
+ show_progress="full",
81
+ save_history=True,
82
+ )
83
 
84
 
85
+ if __name__ == "__main__":
86
+ demo.launch(share=True,debug=True)
requirements.txt CHANGED
@@ -1,8 +1,9 @@
1
- #--extra-index-url https://download.pytorch.org/whl/cu118
2
- #torch==2.0.1+cu118
3
  torch
4
  transformers
5
  gradio
6
- huggingface_hub
7
  optimum
8
- auto-gptq
 
 
 
1
+ huggingface_hub
2
+ --extra-index-url https://download.pytorch.org/whl/cu117
3
  torch
4
  transformers
5
  gradio
 
6
  optimum
7
+ accelerate
8
+ --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu117/
9
+ auto-gptq