Spaces:
Sleeping
Sleeping
| .PHONY: show-params | |
| ############################################################################### | |
| # GLOBALS # | |
| ############################################################################### | |
| PROJECT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) | |
| PROJECT_NAME := $(shell basename $(subst -,_,$(PROJECT_DIR))) | |
| PROJECT_NAME_LOWER := $(shell echo $(PROJECT_NAME) | tr '[:upper:]' '[:lower:]') | |
| ENVIRONMENT_NAME = $(PROJECT_NAME) | |
| PYTHON_INTERPRETER = python3 | |
| PIP_INTERPRETER = pip | |
| PYTHON_VERSION = 3.9 | |
| PIP_VERSION = 22.3 | |
| # --- REQUIREMENTS-RELATED | |
| REQUIREMENTS_FILE = $(PROJECT_DIR)/requirements.txt | |
| REQUIREMENTS_FILE_TEMP = $(PROJECT_DIR)/requirements.tmp | |
| REQUIREMENTS_DEV_FILE = $(PROJECT_DIR)/requirements-dev.txt | |
| REQUIREMENTS_DEV_FILE_TEMP = $(PROJECT_DIR)/requirements-dev.tmp | |
| REQUIREMENTS_DEPLOYMENT_FILE = $(PROJECT_DIR)/requirements-deploy.txt | |
| REQUIREMENTS_DEPLOYMENT_FILE_TEMP = $(PROJECT_DIR)/requirements-deploy.tmp | |
| # --- PATHS TO PROJECT DIRECTOIRES | |
| DATA_DIRECTORY = $(PROJECT_DIR)/data | |
| SRC_DIRECTORY = $(PROJECT_DIR)/src | |
| API_DIRECTORY = $(SRC_DIRECTORY)/api | |
| DATA_PROCESSING_DIRECTORY = $(SRC_DIRECTORY)/data_processing | |
| TRAINING_DIRECTORY = $(SRC_DIRECTORY)/training | |
| # -- Docker-related | |
| # Variable used for turning on/off Docker Buildkit | |
| DOCKER_BUILDKIT_VALUE=1 | |
| LOCAL_DEVELOPMENT_DIR_PATH="$(PROJECT_DIR)/docker" | |
| # -- API-related | |
| INPUT_APP_PORT=8501 | |
| OUTPUT_APP_PORT=8501 | |
| API_WEBSERVER_URL="http://localhost:$(INPUT_APP_PORT)" | |
| # -- App-related | |
| APP_SERVER_PORT=7860 | |
| APP_WEBSERVER_URL="http://localhost:$(APP_SERVER_PORT)" | |
| # ----------------------------- Python-specific ------------------------------- | |
| # - Checking what type of python one is using | |
| # Anaconda | |
| ifeq (,$(shell which conda)) | |
| HAS_CONDA=False | |
| else | |
| HAS_CONDA=True | |
| # We need to specify the following commands in order to properly activate the | |
| # Anaconda environment. | |
| SHELL=/bin/bash | |
| # Note that the extra activate is needed to ensure that the activate floats env to the front of PATH | |
| CONDA_ACTIVATE=source $$(conda info --base)/etc/profile.d/conda.sh ; conda activate ; conda activate | |
| CONDA_DEACTIVATE=source $$(conda info --base)/etc/profile.d/conda.sh ; conda deactivate ; conda deactivate | |
| endif | |
| # - Pyenv | |
| ifeq (,$(shell which pyenv)) | |
| HAS_PYENV=False | |
| else | |
| HAS_PYENV=True | |
| endif | |
| ############################################################################### | |
| # VARIABLES FOR COMMANDS # | |
| ############################################################################### | |
| ## Show the set of input parameters | |
| show-params: | |
| @ printf "\n-------- GENERAL ---------------\n" | |
| @ echo "PROJECT_DIR: $(PROJECT_DIR)" | |
| @ echo "PROJECT_NAME: $(PROJECT_NAME)" | |
| @ echo "LOCAL_DEVELOPMENT_DIR_PATH: $(LOCAL_DEVELOPMENT_DIR_PATH)" | |
| @ echo "ENVIRONMENT_NAME: $(ENVIRONMENT_NAME)" | |
| @ echo "PYTHON_INTERPRETER: $(PYTHON_INTERPRETER)" | |
| @ echo "PYTHON_VERSION: $(PYTHON_VERSION)" | |
| @ echo "PIP_VERSION: $(PIP_VERSION)" | |
| @ echo "REQUIREMENTS_FILE: $(REQUIREMENTS_FILE)" | |
| @ echo "REQUIREMENTS_FILE_TEMP: $(REQUIREMENTS_FILE_TEMP)" | |
| @ echo "REQUIREMENTS_DEV_FILE: $(REQUIREMENTS_DEV_FILE)" | |
| @ echo "REQUIREMENTS_DEV_FILE_TEMP: $(REQUIREMENTS_DEV_FILE_TEMP)" | |
| @ echo "REQUIREMENTS_DEPLOYMENT_FILE: $(REQUIREMENTS_DEPLOYMENT_FILE)" | |
| @ echo "REQUIREMENTS_DEPLOYMENT_FILE_TEMP: $(REQUIREMENTS_DEPLOYMENT_FILE_TEMP)" | |
| @ printf "\n-------- DOCKER ---------------\n" | |
| @ echo "DOCKER_BUILDKIT_VALUE: $(DOCKER_BUILDKIT_VALUE)" | |
| @ printf "\n-------- PYTHON ---------------\n" | |
| @ echo "HAS_CONDA: $(HAS_CONDA)" | |
| @ echo "HAS_PYENV: $(HAS_PYENV)" | |
| @ printf "\n-------- LOCAL DEVELOPMENT ---------------\n" | |
| @ echo "LOCAL_DEV_DOCKER_PROJECT_NAME: $(LOCAL_DEV_DOCKER_PROJECT_NAME)" | |
| @ echo "LOCAL_DEV_SERVICE_NAME: $(LOCAL_DEV_SERVICE_NAME)" | |
| @ printf "\n-------- API ---------------\n" | |
| @ echo "APP_PORT: $(APP_PORT)" | |
| @ echo "APP_WEBSERVER_URL: $(APP_WEBSERVER_URL)" | |
| @ echo "API_SERVICE_NAME: $(API_SERVICE_NAME)" | |
| @ echo "API_DOCKER_PROJECT_NAME: $(API_DOCKER_PROJECT_NAME)" | |
| @ printf "\n-----------------------\n" | |
| ## Initialize the repository for code development | |
| init: clean create-envrc delete-environment create-environment | |
| ifeq (True,$(HAS_CONDA)) | |
| @ ($(CONDA_ACTIVATE) $(ENVIRONMENT_NAME) ; $(MAKE) requirements) | |
| @ printf "\n\n>>> New Conda environment created. Activate with: \n\t: conda activate $(ENVIRONMENT_NAME)" | |
| @ $(MAKE) show-params | |
| @ printf "\n\n>>> Project initialized!" | |
| @ ($(CONDA_ACTIVATE) $(ENVIRONMENT_NAME) ; $(MAKE) pre-commit-install ) | |
| @ ($(CONDA_ACTIVATE) $(ENVIRONMENT_NAME) ; $(MAKE) lint ) | |
| else | |
| @ direnv allow || echo "" | |
| @ echo ">>> Continuing installation ..." | |
| @ $(MAKE) requirements | |
| @ $(MAKE) show-params | |
| @ printf "\n\n>>> Project initialized!\n" | |
| @ $(MAKE) pre-commit-install | |
| @ $(MAKE) lint | |
| endif | |
| ## Remove ALL of the artifacts + Python environments | |
| destroy: clean pre-commit-uninstall delete-environment | |
| @ echo ">>> Deleted all artifacts and environments!" | |
| ############################################################################### | |
| # MISCELLANEOUS COMMANDS # | |
| ############################################################################### | |
| # -------------------- Functions for cleaning repository ---------------------- | |
| ## Removes artifacts from the build stage, and other common Python artifacts. | |
| clean: clean-build clean-pyc clean-test clean-secrets clean-model-files clean-images | |
| ## Removes Python file artifacts | |
| clean-pyc: | |
| find . -name '*.pyc' -exec rm -f {} + | |
| find . -name '*.pyo' -exec rm -f {} + | |
| find . -name '*~' -exec rm -f {} + | |
| find . -name '__pycache__' -exec rm -fr {} + | |
| ## Remove build artifacts | |
| clean-build: | |
| rm -fr build/ | |
| rm -fr dist/ | |
| rm -fr .eggs/ | |
| find . -name '*.egg-info' -exec rm -fr {} + | |
| find . -name '*.egg' -exec rm -f {} + | |
| ## Remove test and coverage artifacts | |
| clean-test: | |
| rm -fr .tox/ | |
| rm -f .coverage | |
| rm -fr htmlcov/ | |
| rm -fr .pytest_cache | |
| ## Remove files related to pre-trained models | |
| clean-model-files: | |
| find . -name '*.pt' -exec rm -fr {} + | |
| find . -name "runs" -type d -exec rm -rf {} + || echo "" | |
| ## Clean left-over images | |
| clean-images: | |
| find . -name '*.png' -exec rm -fr {} + | |
| find . -name '*.jpg' -exec rm -fr {} + | |
| ## Removes secret artifacts - Serverless | |
| clean-secrets: | |
| find . -name "node_modules" -type d -exec rm -rf {} + || echo "" | |
| find . -name ".serverless" -type d -exec rm -rf {} + || echo "" | |
| # ---------------------- Functions for local environment ---------------------- | |
| ## Set up the envrc file for the project. | |
| create-envrc: | |
| @ echo "cat $(PROJECT_DIR)/template.envrc > $(PROJECT_DIR)/.envrc" | |
| @ cat $(PROJECT_DIR)/template.envrc > $(PROJECT_DIR)/.envrc | |
| ## Delete the local envrc file of the project | |
| delete-envrc: | |
| @ rm -rf $(PROJECT_DIR)/.envrc || echo "" | |
| ## Install git-flow | |
| git-flow-install: | |
| @ (( if [[ ! -f "`which git-flow`" ]]; then \ | |
| echo "No Git-flow installed"! ; \ | |
| if [[ -f "`which brew`" ]]; then \ | |
| echo "Homebrew installed"; \ | |
| HOMEBREW_NO_AUTO_UPDATE=1 brew install git-flow; \ | |
| elif [[ -f "`which apt-get`" ]]; then \ | |
| echo "Apt-get installed"; \ | |
| apt-get install git-flow; \ | |
| else \ | |
| echo "Could not locate package manager! (brew or apt-get)"; \ | |
| fi; \ | |
| fi ) && git flow init -f -d) || echo "Git-Flow setup could not be completed" | |
| # ---------------------- Functions for Python environment --------------------- | |
| ## Creates the Python environment | |
| create-environment: | |
| ifeq (True,$(HAS_CONDA)) | |
| @ echo ">>> Detected CONDA ... Creating new conda environment!" | |
| @ echo ">>> \tCreating environment: \t $(ENVIRONMENT_NAME)" | |
| @ conda create --name $(ENVIRONMENT_NAME) python=$(PYTHON_VERSION) -y || echo "" | |
| @ echo ">>> New conda environment created. Activate with: \n conda activate $(ENVIRONMENT_NAME)" | |
| else ifeq (True,$(HAS_PYENV)) | |
| @ echo ">>> Detected PYENV ... Creating new Pyenv environment!" | |
| @ echo ">>> \tCreating environment: \t $(ENVIRONMENT_NAME)" | |
| @ pyenv virtualenv $(PYTHON_VERSION) $(ENVIRONMENT_NAME) || echo "" | |
| @ pyenv local $(ENVIRONMENT_NAME) | |
| @ echo ">>> New Pyenv environment created: '$(ENVIRONMENT_NAME)'" | |
| @ pyenv virtualenvs | |
| @ echo | |
| endif | |
| ## Deletes the Python environment | |
| delete-environment: | |
| ifeq (True,$(HAS_CONDA)) | |
| @ echo ">>> Detected CONDA ... Deleting Conda environment, if applicable!" | |
| @ echo ">>> Deleting environment: '$(ENVIRONMENT_NAME)'" | |
| @ ($(CONDA_DEACTIVATE) ; conda env remove --name $(ENVIRONMENT_NAME) -y) || echo "" | |
| @ echo ">>> Conda environment deleted: '$(ENVIRONMENT_NAME)'" | |
| else ifeq (True,$(HAS_PYENV)) | |
| @ echo ">>> Detected PYENV ... Deleting Pyenv environment!" | |
| @ echo ">>> Deleting environment: '$(ENVIRONMENT_NAME)'" | |
| @ pyenv uninstall -f $(ENVIRONMENT_NAME) || echo "" | |
| @ rm $(PROJECT_DIR)/.python-version || echo "" | |
| @ echo ">>> Pyenv environment deleted: '$(ENVIRONMENT_NAME)'" | |
| @ pyenv virtualenvs | |
| @ echo | |
| endif | |
| ## Upgrade the version of the 'pip' package | |
| pip-upgrade: | |
| @ $(PYTHON_INTERPRETER) -m pip install --no-cache-dir -q --upgrade pip==$(PIP_VERSION) | |
| ## Sort the project packages requirements file | |
| sort-requirements: | |
| @ sort $(REQUIREMENTS_FILE) | grep "\S" > $(REQUIREMENTS_FILE_TEMP) && \ | |
| mv $(REQUIREMENTS_FILE_TEMP) $(REQUIREMENTS_FILE) | |
| @ sort $(REQUIREMENTS_DEV_FILE) | grep "\S" > $(REQUIREMENTS_DEV_FILE_TEMP) && \ | |
| mv $(REQUIREMENTS_DEV_FILE_TEMP) $(REQUIREMENTS_DEV_FILE) | |
| @ sort $(REQUIREMENTS_DEPLOYMENT_FILE) | grep "\S" > $(REQUIREMENTS_DEPLOYMENT_FILE_TEMP) && \ | |
| mv $(REQUIREMENTS_DEPLOYMENT_FILE_TEMP) $(REQUIREMENTS_DEPLOYMENT_FILE) | |
| ## Install Python dependencies into the Python environment | |
| requirements: pip-upgrade sort-requirements | |
| @ $(PYTHON_INTERPRETER) -m pip install --no-cache-dir -q -r $(REQUIREMENTS_DEV_FILE) | |
| # -------------------------- Functions for Code Linting ----------------------- | |
| ## Installing the pre-commit Git hook | |
| pre-commit-install: | |
| @ pre-commit install | |
| ## Uninstall the pre-commit Git hook | |
| pre-commit-uninstall: | |
| @ pre-commit uninstall | |
| ## Run the 'pre-commit' linting step manually | |
| lint: | |
| @ pre-commit run -a --hook-stage manual | |
| ############################################################################### | |
| # Docker Commands - Local development # | |
| ############################################################################### | |
| LOCAL_DEV_DOCKER_PROJECT_NAME="$(PROJECT_NAME_LOWER)_localdev_dind" | |
| LOCAL_DEV_SERVICE_NAME="local-dev" | |
| ## Clean Docker images | |
| docker-prune: | |
| @ docker system prune -f | |
| ## Stops both the API service and the local development service | |
| all-stop: api-stop docker-local-dev-stop app-stop | |
| @ echo "All services are down" | |
| ## Starts both the API service and the local development service | |
| all-start: api-start docker-local-dev-start app-start | |
| @ echo "All services are up!" | |
| ## Build local development Docker image | |
| docker-local-dev-build: docker-prune | |
| @ cd $(LOCAL_DEVELOPMENT_DIR_PATH) && \ | |
| docker compose \ | |
| --project-name $(LOCAL_DEV_DOCKER_PROJECT_NAME) \ | |
| build $(LOCAL_DEV_SERVICE_NAME) | |
| ## Start service for local development | |
| docker-local-dev-start: docker-local-dev-build docker-local-dev-stop | |
| @ cd $(LOCAL_DEVELOPMENT_DIR_PATH) && \ | |
| docker compose \ | |
| --project-name $(LOCAL_DEV_DOCKER_PROJECT_NAME) \ | |
| up -d $(LOCAL_DEV_SERVICE_NAME) | |
| ## Stop service for local development | |
| docker-local-dev-stop: | |
| @ cd $(LOCAL_DEVELOPMENT_DIR_PATH) && \ | |
| docker compose \ | |
| --project-name $(LOCAL_DEV_DOCKER_PROJECT_NAME) \ | |
| stop $(LOCAL_DEV_SERVICE_NAME) | |
| @ $(MAKE) docker-prune | |
| ## Start a shell session into the docker container | |
| docker-local-dev-login: | |
| @ cd $(LOCAL_DEVELOPMENT_DIR_PATH) && \ | |
| docker compose \ | |
| --project-name $(LOCAL_DEV_DOCKER_PROJECT_NAME) \ | |
| exec \ | |
| $(LOCAL_DEV_SERVICE_NAME) /bin/zsh | |
| ############################################################################### | |
| # Docker Commands - API-related # | |
| ############################################################################### | |
| API_DOCKER_PROJECT_NAME="$(PROJECT_NAME_LOWER)_api" | |
| API_SERVICE_NAME="api" | |
| ## Build API Docker image | |
| api-build: docker-prune | |
| @ cd $(LOCAL_DEVELOPMENT_DIR_PATH) && \ | |
| docker compose \ | |
| --project-name $(API_DOCKER_PROJECT_NAME) \ | |
| build $(API_SERVICE_NAME) | |
| ## Start API Docker image container | |
| api-start: api-stop api-build | |
| @ cd $(LOCAL_DEVELOPMENT_DIR_PATH) && \ | |
| docker compose \ | |
| --project-name $(API_DOCKER_PROJECT_NAME) \ | |
| up -d $(API_SERVICE_NAME) | |
| ## Stop API Docker image container | |
| api-stop: | |
| @ cd $(LOCAL_DEVELOPMENT_DIR_PATH) && \ | |
| docker compose \ | |
| --project-name $(API_DOCKER_PROJECT_NAME) \ | |
| stop $(API_SERVICE_NAME) | |
| @ $(MAKE) docker-prune | |
| ## Open API in web browser | |
| api-web: | |
| @ python -m webbrowser "$(API_WEBSERVER_URL)/docs" | |
| ############################################################################### | |
| # Docker Commands - App-related # | |
| ############################################################################### | |
| APP_DOCKER_PROJECT_NAME="$(PROJECT_NAME_LOWER)_app" | |
| APP_SERVICE_NAME="app" | |
| ## Build App Docker image | |
| app-app-build: docker-prune | |
| @ cd $(LOCAL_DEVELOPMENT_DIR_PATH) && \ | |
| docker compose \ | |
| --project-name $(APP_DOCKER_PROJECT_NAME) \ | |
| build $(APP_SERVICE_NAME) | |
| ## Start App Docker image container | |
| app-app-start: app-app-stop app-app-build | |
| @ cd $(LOCAL_DEVELOPMENT_DIR_PATH) && \ | |
| docker compose \ | |
| --project-name $(APP_DOCKER_PROJECT_NAME) \ | |
| up -d $(APP_SERVICE_NAME) | |
| ## Stop App Docker image container | |
| app-app-stop: | |
| @ cd $(LOCAL_DEVELOPMENT_DIR_PATH) && \ | |
| docker compose \ | |
| --project-name $(APP_DOCKER_PROJECT_NAME) \ | |
| stop $(APP_SERVICE_NAME) | |
| @ $(MAKE) docker-prune | |
| ## Open App in web browser | |
| app-app-web: | |
| @ python -m webbrowser "$(APP_WEBSERVER_URL)" | |
| ############################################################################### | |
| # Unit Tests and Code checking # | |
| ############################################################################### | |
| # See: https://github.com/google/addlicense for more information | |
| ## Add licenses to Python files | |
| add-licenses: | |
| @ docker run -it \ | |
| -v ${PWD}:/src \ | |
| ghcr.io/google/addlicense \ | |
| -f ./LICENSE.rst \ | |
| ./src/**/*.py | |
| ## Open up all web endpoints | |
| all-web: api-web app-app-web | |
| @ echo "All web endpoints opened!" | |
| ############################################################################### | |
| # PROJECT AND DATA FUNCTIONS # | |
| ############################################################################### | |
| DATASET_PATH="https://raw.githubusercontent.com/hamzafarooq/maven-mlsystem-design-cohort-1/main/data/df_embed.csv" | |
| DATASET_WITH_SUMMARIES_NAME="cicero_dataset_with_summaries" | |
| DATASET_WITH_FAISS_AND_EMBEDDINGS_NAME="cicero_dataset_with_embeddings_and_faiss_index" | |
| HUGGING_FACE_REPOSITORY_NAME="cicero_synthesizer" | |
| FAISS_OUTPUT_FILENAME="cicero_faiss_index" | |
| ## Run the data preparation on the input dataset | |
| prepare_data: | |
| @ $(PYTHON_INTERPRETER) \ | |
| $(DATA_PROCESSING_DIRECTORY)/prepare_dataset.py \ | |
| --dataset-path $(DATASET_PATH) | |
| ## Run the script for creating a FAISS index and text embeddings of the dataset | |
| run_faiss_and_embeddings: | |
| @ $(PYTHON_INTERPRETER) \ | |
| $(TRAINING_DIRECTORY)/create_faiss_corpus_index.py \ | |
| --dataset-name $(DATASET_WITH_SUMMARIES_NAME) \ | |
| --output-dataset-name $(DATASET_WITH_FAISS_AND_EMBEDDINGS_NAME) \ | |
| --repository-name $(HUGGING_FACE_REPOSITORY_NAME) \ | |
| --faiss-index-name $(FAISS_OUTPUT_FILENAME) | |
| ############################################################################### | |
| # Self Documenting Commands # | |
| ############################################################################### | |
| .DEFAULT_GOAL := help | |
| # Inspired by <http://marmelab.com/blog/2016/02/29/auto-documented-makefile.html> | |
| # sed script explained: | |
| # /^##/: | |
| # * save line in hold space | |
| # * purge line | |
| # * Loop: | |
| # * append newline + line to hold space | |
| # * go to next line | |
| # * if line starts with doc comment, strip comment character off and loop | |
| # * remove target prerequisites | |
| # * append hold space (+ newline) to line | |
| # * replace newline plus comments by `---` | |
| # * print line | |
| # Separate expressions are necessary because labels cannot be delimited by | |
| # semicolon; see <http://stackoverflow.com/a/11799865/1968> | |
| help: | |
| @echo "$$(tput bold)Available rules:$$(tput sgr0)" | |
| @echo | |
| @sed -n -e "/^## / { \ | |
| h; \ | |
| s/.*//; \ | |
| :doc" \ | |
| -e "H; \ | |
| n; \ | |
| s/^## //; \ | |
| t doc" \ | |
| -e "s/:.*//; \ | |
| G; \ | |
| s/\\n## /---/; \ | |
| s/\\n/ /g; \ | |
| p; \ | |
| }" ${MAKEFILE_LIST} \ | |
| | LC_ALL='C' sort --ignore-case \ | |
| | awk -F '---' \ | |
| -v ncol=$$(tput cols) \ | |
| -v indent=25 \ | |
| -v col_on="$$(tput setaf 6)" \ | |
| -v col_off="$$(tput sgr0)" \ | |
| '{ \ | |
| printf "%s%*s%s ", col_on, -indent, $$1, col_off; \ | |
| n = split($$2, words, " "); \ | |
| line_length = ncol - indent; \ | |
| for (i = 1; i <= n; i++) { \ | |
| line_length -= length(words[i]) + 1; \ | |
| if (line_length <= 0) { \ | |
| line_length = ncol - indent - length(words[i]) - 1; \ | |
| printf "\n%*s ", -indent, " "; \ | |
| } \ | |
| printf "%s ", words[i]; \ | |
| } \ | |
| printf "\n"; \ | |
| }' \ | |
| | more $(shell test $(shell uname) = Darwin && echo '--no-init --raw-control-chars') | |