merge
Browse files- Dockerfile +18 -4
- app.py +11 -11
- requirements.txt +3 -1
Dockerfile
CHANGED
@@ -1,13 +1,27 @@
|
|
1 |
# For more information, please refer to https://aka.ms/vscode-docker-python
|
2 |
-
FROM python:3.10-slim
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
# Where we'll copy the code
|
5 |
WORKDIR /code
|
6 |
|
7 |
# Copy the current directory contents into the container at /code
|
8 |
COPY ./requirements.txt /code/requirements.txt
|
9 |
-
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
# Creates a non-root user with an explicit UID
|
13 |
# For more info, please refer to https://aka.ms/vscode-docker-python-configure-containers
|
@@ -30,4 +44,4 @@ COPY --chown=user . $HOME/app
|
|
30 |
# Expose port 7860
|
31 |
EXPOSE 7860
|
32 |
ENV GRADIO_SERVER_NAME="0.0.0.0"
|
33 |
-
CMD ["python", "app.py"]
|
|
|
1 |
# For more information, please refer to https://aka.ms/vscode-docker-python
|
2 |
+
#FROM python:3.10-slim
|
3 |
+
FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
|
4 |
+
|
5 |
+
#nvidia/cuda:12.6.3-cudnn-devel-ubuntu24.04
|
6 |
+
|
7 |
+
#ENV DEBIAN_FRONTEND=noninteractive
|
8 |
+
|
9 |
+
# Install Python and pip
|
10 |
+
RUN apt-get update && apt-get install -y python3-pip python3-venv
|
11 |
|
12 |
# Where we'll copy the code
|
13 |
WORKDIR /code
|
14 |
|
15 |
# Copy the current directory contents into the container at /code
|
16 |
COPY ./requirements.txt /code/requirements.txt
|
17 |
+
|
18 |
+
# Install pip requirements without venv
|
19 |
+
#RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
20 |
+
|
21 |
+
# Create a virtual environment and install pip requirements
|
22 |
+
RUN python3 -m venv /code/venv
|
23 |
+
RUN /code/venv/bin/pip install --no-cache-dir --upgrade pip
|
24 |
+
RUN /code/venv/bin/pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
25 |
|
26 |
# Creates a non-root user with an explicit UID
|
27 |
# For more info, please refer to https://aka.ms/vscode-docker-python-configure-containers
|
|
|
44 |
# Expose port 7860
|
45 |
EXPOSE 7860
|
46 |
ENV GRADIO_SERVER_NAME="0.0.0.0"
|
47 |
+
CMD ["python", "app.py"]
|
app.py
CHANGED
@@ -14,23 +14,23 @@ model = AutoModelForCausalLM.from_pretrained(model_name,device_map="auto",trust_
|
|
14 |
|
15 |
#transfer model on GPU
|
16 |
#model.to("cuda")
|
17 |
-
pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer,
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
|
25 |
# Generate text using the model and tokenizer
|
26 |
#@spaces.GPU(duration=60)
|
27 |
def generate_text(input_text):
|
28 |
-
|
29 |
#attention_mask = input_ids.ne(tokenizer.pad_token_id).long()
|
30 |
-
|
31 |
#output = model.generate(input_ids) #, attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
|
32 |
-
|
33 |
-
return pipe(input_text)[0]["generated_text"]
|
34 |
|
35 |
interface = gr.Interface(fn=generate_text, inputs="text", outputs="text",title="TeLLMyStory",description="Enter your story idea and the model will generate the story based on it.")
|
36 |
interface.launch()
|
|
|
14 |
|
15 |
#transfer model on GPU
|
16 |
#model.to("cuda")
|
17 |
+
# pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer,
|
18 |
+
# max_new_tokens=512,
|
19 |
+
# do_sample=True,
|
20 |
+
# temperature=0.7,
|
21 |
+
# top_p=0.95,
|
22 |
+
# top_k=40,
|
23 |
+
# repetition_penalty=1.1)
|
24 |
|
25 |
# Generate text using the model and tokenizer
|
26 |
#@spaces.GPU(duration=60)
|
27 |
def generate_text(input_text):
|
28 |
+
input_ids = tokenizer.encode(input_text, return_tensors="pt")#.to("cuda")
|
29 |
#attention_mask = input_ids.ne(tokenizer.pad_token_id).long()
|
30 |
+
output = model.generate(input_ids, max_new_tokens=512, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)# attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
|
31 |
#output = model.generate(input_ids) #, attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
|
32 |
+
return tokenizer.decode(output[0])
|
33 |
+
#return pipe(input_text)[0]["generated_text"]
|
34 |
|
35 |
interface = gr.Interface(fn=generate_text, inputs="text", outputs="text",title="TeLLMyStory",description="Enter your story idea and the model will generate the story based on it.")
|
36 |
interface.launch()
|
requirements.txt
CHANGED
@@ -1,6 +1,8 @@
|
|
|
|
1 |
transformers
|
2 |
-
torch
|
3 |
gradio
|
|
|
4 |
huggingface_hub
|
5 |
optimum
|
6 |
#for running auto-gptq
|
|
|
1 |
+
--extra-index-url https://download.pytorch.org/whl/cu118
|
2 |
transformers
|
3 |
+
torch==2.0.1+cu118
|
4 |
gradio
|
5 |
+
#spaces
|
6 |
huggingface_hub
|
7 |
optimum
|
8 |
#for running auto-gptq
|