harsh-manvar commited on
Commit
0efa81e
·
verified ·
1 Parent(s): 42d0d17

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +12 -55
  2. app.py +77 -0
  3. requirements.txt +3 -0
Dockerfile CHANGED
@@ -1,60 +1,17 @@
1
- FROM ghcr.io/huggingface/chat-ui:latest AS base
2
 
3
- FROM ghcr.io/huggingface/text-generation-inference:latest AS final
 
4
 
5
- ARG MODEL_NAME
6
- ENV MODEL_NAME=${MODEL_NAME}
 
7
 
8
- ENV TZ=Europe/Paris \
9
- PORT=3000
10
 
11
- # mongo installation
12
- RUN curl -fsSL https://www.mongodb.org/static/pgp/server-7.0.asc | \
13
- gpg -o /usr/share/keyrings/mongodb-server-7.0.gpg \
14
- --dearmor
15
 
16
- RUN echo "deb [ arch=amd64,arm64 signed-by=/usr/share/keyrings/mongodb-server-7.0.gpg ] https://repo.mongodb.org/apt/ubuntu jammy/mongodb-org/7.0 multiverse" | tee /etc/apt/sources.list.d/mongodb-org-7.0.list
17
-
18
- RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
19
- mongodb-org && \
20
- rm -rf /var/lib/apt/lists/*
21
-
22
- # node installation
23
- RUN curl -fsSL https://deb.nodesource.com/setup_20.x | /bin/bash -
24
-
25
- RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
26
- nodejs && \
27
- rm -rf /var/lib/apt/lists/*
28
-
29
- # image setup
30
- RUN useradd -m -u 1000 user
31
-
32
- RUN mkdir /app
33
- RUN chown -R 1000:1000 /app
34
- RUN mkdir /data
35
- RUN chown -R 1000:1000 /data
36
-
37
- # Switch to the "user" user
38
- USER user
39
-
40
- ENV HOME=/home/user \
41
- PATH=/home/user/.local/bin:$PATH
42
-
43
- RUN npm config set prefix /home/user/.local
44
- RUN npm install -g dotenv-cli
45
-
46
-
47
- # copy chat-ui from base image
48
- COPY --from=base --chown=1000 /app/node_modules /app/node_modules
49
- COPY --from=base --chown=1000 /app/package.json /app/package.json
50
- COPY --from=base --chown=1000 /app/build /app/build
51
-
52
- COPY --from=base --chown=1000 /app/.env /app/.env
53
- COPY --chown=1000 .env.local /app/.env.local
54
-
55
- COPY --chown=1000 entrypoint.sh /app/entrypoint.sh
56
-
57
- RUN chmod +x /app/entrypoint.sh
58
-
59
- # entrypoint
60
- ENTRYPOINT [ "/app/entrypoint.sh" ]
 
1
+ FROM python:3.9-slim
2
 
3
+ # Set working directory
4
+ WORKDIR /app
5
 
6
+ # Copy the application files
7
+ COPY app.py /app
8
+ COPY requirements.txt /app
9
 
10
+ # Install required Python libraries
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
 
13
+ # Expose the default Gradio port
14
+ EXPOSE 7860
 
 
15
 
16
+ # Run the application
17
+ CMD ["python", "app.py"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ from vllm import LLMEngine, SamplingParams
4
+
5
+ # Load the model and tokenizer from Hugging Face
6
+ model_name = "Qwen/Qwen2-7B"
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ engine = LLMEngine(model=model_name)
9
+
10
+ def generate_response(prompt, max_tokens, temperature, top_p):
11
+ # Tokenize the prompt
12
+ inputs = tokenizer(prompt, return_tensors="pt")
13
+
14
+ # Define sampling parameters
15
+ sampling_params = SamplingParams(
16
+ max_tokens=max_tokens,
17
+ temperature=temperature,
18
+ top_p=top_p,
19
+ )
20
+
21
+ # Generate text using vLLM
22
+ output = engine.generate(inputs["input_ids"], sampling_params)
23
+
24
+ # Decode the generated tokens to text
25
+ generated_text = tokenizer.decode(output[0]["token_ids"], skip_special_tokens=True)
26
+ return generated_text
27
+
28
+ # Gradio UI
29
+ with gr.Blocks() as demo:
30
+ gr.Markdown("# 🚀 Hugging Face Integration with vLLM")
31
+ gr.Markdown("Generate text using the vLLM integration with Hugging Face models.")
32
+
33
+ with gr.Row():
34
+ with gr.Column():
35
+ prompt_input = gr.Textbox(
36
+ label="Prompt",
37
+ placeholder="Enter your prompt here...",
38
+ lines=3,
39
+ )
40
+ max_tokens = gr.Slider(
41
+ label="Max Tokens",
42
+ minimum=10,
43
+ maximum=500,
44
+ value=100,
45
+ step=10,
46
+ )
47
+ temperature = gr.Slider(
48
+ label="Temperature",
49
+ minimum=0.1,
50
+ maximum=1.0,
51
+ value=0.7,
52
+ step=0.1,
53
+ )
54
+ top_p = gr.Slider(
55
+ label="Top P",
56
+ minimum=0.1,
57
+ maximum=1.0,
58
+ value=0.9,
59
+ step=0.1,
60
+ )
61
+ submit_button = gr.Button("Generate")
62
+
63
+ with gr.Column():
64
+ output_text = gr.Textbox(
65
+ label="Generated Text",
66
+ lines=10,
67
+ interactive=False,
68
+ )
69
+
70
+ submit_button.click(
71
+ generate_response,
72
+ inputs=[prompt_input, max_tokens, temperature, top_p],
73
+ outputs=output_text,
74
+ )
75
+
76
+ # Launch the app
77
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio==3.33.1
2
+ vllm
3
+ transformers