Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	Commit 
							
							·
						
						17aa59f
	
1
								Parent(s):
							
							03d9166
								
migrated to groq -- suuuuuper fast!
Browse files- .gitignore +2 -1
 - Dockerfile +23 -6
 - main.py +25 -10
 - requirements.txt +4 -0
 
    	
        .gitignore
    CHANGED
    
    | 
         @@ -1,5 +1,6 @@ 
     | 
|
| 1 | 
         
             
            __pycache__
         
     | 
| 2 | 
         
             
            venv
         
     | 
| 3 | 
         
             
            .vscode
         
     | 
| 
         | 
|
| 4 | 
         
             
            # script for some housekeeping
         
     | 
| 5 | 
         
            -
            f.py 
     | 
| 
         | 
|
| 1 | 
         
             
            __pycache__
         
     | 
| 2 | 
         
             
            venv
         
     | 
| 3 | 
         
             
            .vscode
         
     | 
| 4 | 
         
            +
            .env
         
     | 
| 5 | 
         
             
            # script for some housekeeping
         
     | 
| 6 | 
         
            +
            f.py
         
     | 
    	
        Dockerfile
    CHANGED
    
    | 
         @@ -1,9 +1,26 @@ 
     | 
|
| 1 | 
         
            -
            FROM ollama/ollama
         
     | 
| 2 | 
         | 
| 3 | 
         
            -
            RUN mkdir -p /.ollama && chmod 777 /.ollama
         
     | 
| 4 | 
         | 
| 5 | 
         
            -
            ENV OLLAMA_MAX_LOADED_MODELS=20 --OLLAMA_NUM_PARALLEL=20
         
     | 
| 6 | 
         
            -
            ENV OLLAMA_HOST "0.0.0.0:7860"
         
     | 
| 7 | 
         | 
| 8 | 
         
            -
            CMD ["serve"]
         
     | 
| 9 | 
         
            -
            EXPOSE 7860
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            # FROM ollama/ollama
         
     | 
| 2 | 
         | 
| 3 | 
         
            +
            # RUN mkdir -p /.ollama && chmod 777 /.ollama
         
     | 
| 4 | 
         | 
| 5 | 
         
            +
            # ENV OLLAMA_MAX_LOADED_MODELS=20 --OLLAMA_NUM_PARALLEL=20
         
     | 
| 6 | 
         
            +
            # ENV OLLAMA_HOST "0.0.0.0:7860"
         
     | 
| 7 | 
         | 
| 8 | 
         
            +
            # CMD ["serve"]
         
     | 
| 9 | 
         
            +
            # EXPOSE 7860
         
     | 
| 10 | 
         
            +
             
     | 
| 11 | 
         
            +
            FROM python:3
         
     | 
| 12 | 
         
            +
             
     | 
| 13 | 
         
            +
            RUN useradd -m -u 1000 user
         
     | 
| 14 | 
         
            +
            USER user
         
     | 
| 15 | 
         
            +
            ENV HOME=/home/user \
         
     | 
| 16 | 
         
            +
                PATH=/home/user/.local/bin:$PATH
         
     | 
| 17 | 
         
            +
             
     | 
| 18 | 
         
            +
            COPY --chown=user . $HOME/LLM_API
         
     | 
| 19 | 
         
            +
             
     | 
| 20 | 
         
            +
            WORKDIR $HOME/LLM_API
         
     | 
| 21 | 
         
            +
             
     | 
| 22 | 
         
            +
            RUN mkdir $HOME/.cache
         
     | 
| 23 | 
         
            +
             
     | 
| 24 | 
         
            +
            RUN pip install --no-cache-dir --upgrade -r requirements.txt
         
     | 
| 25 | 
         
            +
             
     | 
| 26 | 
         
            +
            CMD ["gunicorn", "-w", "5", "-b", "0.0.0.0:7860","main:app"]
         
     | 
    	
        main.py
    CHANGED
    
    | 
         @@ -1,24 +1,39 @@ 
     | 
|
| 1 | 
         
             
            from flask import Flask
         
     | 
| 2 | 
         
             
            from flask import request
         
     | 
| 3 | 
         
            -
            from  
     | 
| 
         | 
|
| 
         | 
|
| 4 | 
         | 
| 5 | 
         
             
            app = Flask(__name__)
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 6 | 
         | 
| 7 | 
         
            -
            llm = Ollama(model="phi3")
         
     | 
| 8 | 
         | 
| 9 | 
         
            -
            @app.route( 
     | 
| 10 | 
         
             
            def completion():
         
     | 
| 11 | 
         
             
                """
         
     | 
| 12 | 
         
             
                {
         
     | 
| 13 | 
         
            -
                     
     | 
| 14 | 
         
            -
                     
     | 
| 15 | 
         
             
                }
         
     | 
| 16 | 
         
             
                """
         
     | 
| 17 | 
         
            -
             
     | 
| 18 | 
         
             
                message = request.get_json()
         
     | 
| 19 | 
         
            -
                llm_output = llm.invoke(message['text'])
         
     | 
| 20 | 
         | 
| 21 | 
         
            -
                 
     | 
| 22 | 
         
            -
                 
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 23 | 
         | 
| 24 | 
         
            -
            # curl -v -X POST 'http://127.0.0.1:8000/ 
     | 
| 
         | 
|
| 1 | 
         
             
            from flask import Flask
         
     | 
| 2 | 
         
             
            from flask import request
         
     | 
| 3 | 
         
            +
            from groq import Groq
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            import os
         
     | 
| 6 | 
         | 
| 7 | 
         
             
            app = Flask(__name__)
         
     | 
| 8 | 
         
            +
            client = Groq(
         
     | 
| 9 | 
         
            +
                api_key=os.environ.get("GROQ_API_KEY")
         
     | 
| 10 | 
         
            +
            )
         
     | 
| 11 | 
         | 
| 
         | 
|
| 12 | 
         | 
| 13 | 
         
            +
            @app.route("/api/generate", methods=['POST'])
         
     | 
| 14 | 
         
             
            def completion():
         
     | 
| 15 | 
         
             
                """
         
     | 
| 16 | 
         
             
                {
         
     | 
| 17 | 
         
            +
                    "model": "llama3-70b-8192",
         
     | 
| 18 | 
         
            +
                    "prompt": "why is the sky blue?"
         
     | 
| 19 | 
         
             
                }
         
     | 
| 20 | 
         
             
                """
         
     | 
| 21 | 
         
            +
             
     | 
| 22 | 
         
             
                message = request.get_json()
         
     | 
| 
         | 
|
| 23 | 
         | 
| 24 | 
         
            +
                model = message['model']
         
     | 
| 25 | 
         
            +
                prompt = message['prompt']
         
     | 
| 26 | 
         
            +
             
     | 
| 27 | 
         
            +
                chat_completion = client.chat.completions.create(
         
     | 
| 28 | 
         
            +
                    messages=[
         
     | 
| 29 | 
         
            +
                        {
         
     | 
| 30 | 
         
            +
                            "role": "user",
         
     | 
| 31 | 
         
            +
                            "content": prompt,
         
     | 
| 32 | 
         
            +
                        }
         
     | 
| 33 | 
         
            +
                    ],
         
     | 
| 34 | 
         
            +
                    model=model,
         
     | 
| 35 | 
         
            +
                )
         
     | 
| 36 | 
         
            +
             
     | 
| 37 | 
         
            +
                return chat_completion.choices[0].message.content 
         
     | 
| 38 | 
         | 
| 39 | 
         
            +
            # curl -v -X POST 'http://127.0.0.1:8000/api/generate' --header 'Content-Type: application/json' --data '{"model": "llama3-70b-8192", "prompt": "why is sky blue?"}'
         
     | 
    	
        requirements.txt
    CHANGED
    
    | 
         @@ -9,6 +9,7 @@ charset-normalizer==3.3.2 
     | 
|
| 9 | 
         
             
            click==8.1.7
         
     | 
| 10 | 
         
             
            dataclasses-json==0.6.6
         
     | 
| 11 | 
         
             
            diskcache==5.6.3
         
     | 
| 
         | 
|
| 12 | 
         
             
            dnspython==2.6.1
         
     | 
| 13 | 
         
             
            email_validator==2.1.1
         
     | 
| 14 | 
         
             
            fastapi==0.111.0
         
     | 
| 
         @@ -18,6 +19,7 @@ Flask==3.0.3 
     | 
|
| 18 | 
         
             
            frozenlist==1.4.1
         
     | 
| 19 | 
         
             
            fsspec==2024.5.0
         
     | 
| 20 | 
         
             
            greenlet==3.0.3
         
     | 
| 
         | 
|
| 21 | 
         
             
            gunicorn==22.0.0
         
     | 
| 22 | 
         
             
            h11==0.14.0
         
     | 
| 23 | 
         
             
            httpcore==1.0.5
         
     | 
| 
         @@ -58,6 +60,7 @@ referencing==0.35.1 
     | 
|
| 58 | 
         
             
            requests==2.31.0
         
     | 
| 59 | 
         
             
            rich==13.7.1
         
     | 
| 60 | 
         
             
            rpds-py==0.18.1
         
     | 
| 
         | 
|
| 61 | 
         
             
            shellingham==1.5.4
         
     | 
| 62 | 
         
             
            sniffio==1.3.1
         
     | 
| 63 | 
         
             
            SQLAlchemy==2.0.30
         
     | 
| 
         @@ -76,4 +79,5 @@ uvloop==0.19.0 
     | 
|
| 76 | 
         
             
            watchfiles==0.21.0
         
     | 
| 77 | 
         
             
            websockets==12.0
         
     | 
| 78 | 
         
             
            Werkzeug==3.0.3
         
     | 
| 
         | 
|
| 79 | 
         
             
            yarl==1.9.4
         
     | 
| 
         | 
|
| 9 | 
         
             
            click==8.1.7
         
     | 
| 10 | 
         
             
            dataclasses-json==0.6.6
         
     | 
| 11 | 
         
             
            diskcache==5.6.3
         
     | 
| 12 | 
         
            +
            distro==1.9.0
         
     | 
| 13 | 
         
             
            dnspython==2.6.1
         
     | 
| 14 | 
         
             
            email_validator==2.1.1
         
     | 
| 15 | 
         
             
            fastapi==0.111.0
         
     | 
| 
         | 
|
| 19 | 
         
             
            frozenlist==1.4.1
         
     | 
| 20 | 
         
             
            fsspec==2024.5.0
         
     | 
| 21 | 
         
             
            greenlet==3.0.3
         
     | 
| 22 | 
         
            +
            groq==0.8.0
         
     | 
| 23 | 
         
             
            gunicorn==22.0.0
         
     | 
| 24 | 
         
             
            h11==0.14.0
         
     | 
| 25 | 
         
             
            httpcore==1.0.5
         
     | 
| 
         | 
|
| 60 | 
         
             
            requests==2.31.0
         
     | 
| 61 | 
         
             
            rich==13.7.1
         
     | 
| 62 | 
         
             
            rpds-py==0.18.1
         
     | 
| 63 | 
         
            +
            setuptools==70.0.0
         
     | 
| 64 | 
         
             
            shellingham==1.5.4
         
     | 
| 65 | 
         
             
            sniffio==1.3.1
         
     | 
| 66 | 
         
             
            SQLAlchemy==2.0.30
         
     | 
| 
         | 
|
| 79 | 
         
             
            watchfiles==0.21.0
         
     | 
| 80 | 
         
             
            websockets==12.0
         
     | 
| 81 | 
         
             
            Werkzeug==3.0.3
         
     | 
| 82 | 
         
            +
            wheel==0.43.0
         
     | 
| 83 | 
         
             
            yarl==1.9.4
         
     |