Ethavanol commited on
Commit
19dd5dc
·
2 Parent(s): 5fca041 4b4390c
Files changed (3) hide show
  1. Dockerfile +18 -4
  2. app.py +11 -11
  3. requirements.txt +3 -1
Dockerfile CHANGED
@@ -1,13 +1,27 @@
1
  # For more information, please refer to https://aka.ms/vscode-docker-python
2
- FROM python:3.10-slim
 
 
 
 
 
 
 
 
3
 
4
  # Where we'll copy the code
5
  WORKDIR /code
6
 
7
  # Copy the current directory contents into the container at /code
8
  COPY ./requirements.txt /code/requirements.txt
9
- # Install pip requirements
10
- RUN pip install -r /code/requirements.txt
 
 
 
 
 
 
11
 
12
  # Creates a non-root user with an explicit UID
13
  # For more info, please refer to https://aka.ms/vscode-docker-python-configure-containers
@@ -30,4 +44,4 @@ COPY --chown=user . $HOME/app
30
  # Expose port 7860
31
  EXPOSE 7860
32
  ENV GRADIO_SERVER_NAME="0.0.0.0"
33
- CMD ["python", "app.py"]
 
1
  # For more information, please refer to https://aka.ms/vscode-docker-python
2
+ #FROM python:3.10-slim
3
+ FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
4
+
5
+ #nvidia/cuda:12.6.3-cudnn-devel-ubuntu24.04
6
+
7
+ #ENV DEBIAN_FRONTEND=noninteractive
8
+
9
+ # Install Python and pip
10
+ RUN apt-get update && apt-get install -y python3-pip python3-venv
11
 
12
  # Where we'll copy the code
13
  WORKDIR /code
14
 
15
  # Copy the current directory contents into the container at /code
16
  COPY ./requirements.txt /code/requirements.txt
17
+
18
+ # Install pip requirements without venv
19
+ #RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
20
+
21
+ # Create a virtual environment and install pip requirements
22
+ RUN python3 -m venv /code/venv
23
+ RUN /code/venv/bin/pip install --no-cache-dir --upgrade pip
24
+ RUN /code/venv/bin/pip install --no-cache-dir --upgrade -r /code/requirements.txt
25
 
26
  # Creates a non-root user with an explicit UID
27
  # For more info, please refer to https://aka.ms/vscode-docker-python-configure-containers
 
44
  # Expose port 7860
45
  EXPOSE 7860
46
  ENV GRADIO_SERVER_NAME="0.0.0.0"
47
+ CMD ["python", "app.py"]
app.py CHANGED
@@ -14,23 +14,23 @@ model = AutoModelForCausalLM.from_pretrained(model_name,device_map="auto",trust_
14
 
15
  #transfer model on GPU
16
  #model.to("cuda")
17
- pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer,
18
- max_new_tokens=512,
19
- do_sample=True,
20
- temperature=0.7,
21
- top_p=0.95,
22
- top_k=40,
23
- repetition_penalty=1.1)
24
 
25
  # Generate text using the model and tokenizer
26
  #@spaces.GPU(duration=60)
27
  def generate_text(input_text):
28
- #input_ids = tokenizer.encode(input_text, return_tensors="pt")#.to("cuda")
29
  #attention_mask = input_ids.ne(tokenizer.pad_token_id).long()
30
- #output = model.generate(input_ids, max_new_tokens=512, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)# attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
31
  #output = model.generate(input_ids) #, attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
32
- #return tokenizer.decode(output[0])
33
- return pipe(input_text)[0]["generated_text"]
34
 
35
  interface = gr.Interface(fn=generate_text, inputs="text", outputs="text",title="TeLLMyStory",description="Enter your story idea and the model will generate the story based on it.")
36
  interface.launch()
 
14
 
15
  #transfer model on GPU
16
  #model.to("cuda")
17
+ # pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer,
18
+ # max_new_tokens=512,
19
+ # do_sample=True,
20
+ # temperature=0.7,
21
+ # top_p=0.95,
22
+ # top_k=40,
23
+ # repetition_penalty=1.1)
24
 
25
  # Generate text using the model and tokenizer
26
  #@spaces.GPU(duration=60)
27
  def generate_text(input_text):
28
+ input_ids = tokenizer.encode(input_text, return_tensors="pt")#.to("cuda")
29
  #attention_mask = input_ids.ne(tokenizer.pad_token_id).long()
30
+ output = model.generate(input_ids, max_new_tokens=512, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)# attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
31
  #output = model.generate(input_ids) #, attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
32
+ return tokenizer.decode(output[0])
33
+ #return pipe(input_text)[0]["generated_text"]
34
 
35
  interface = gr.Interface(fn=generate_text, inputs="text", outputs="text",title="TeLLMyStory",description="Enter your story idea and the model will generate the story based on it.")
36
  interface.launch()
requirements.txt CHANGED
@@ -1,6 +1,8 @@
 
1
  transformers
2
- torch
3
  gradio
 
4
  huggingface_hub
5
  optimum
6
  #for running auto-gptq
 
1
+ --extra-index-url https://download.pytorch.org/whl/cu118
2
  transformers
3
+ torch==2.0.1+cu118
4
  gradio
5
+ #spaces
6
  huggingface_hub
7
  optimum
8
  #for running auto-gptq