aiben / requirements.txt
abugaber's picture
Update requirements.txt
0bb348c verified
# no websockets, more cloud friendly
# able to make gradio clean-up states
# gradio @ https://h2o-release.s3.amazonaws.com/h2ogpt/gradio-4.25.0-py3-none-any.whl
# gradio_client @ https://h2o-release.s3.amazonaws.com/h2ogpt/gradio_client-0.15.0-py3-none-any.whl
#gradio @ https://h2o-release.s3.amazonaws.com/h2ogpt/gradio-4.20.1-py3-none-any.whl
#gradio_client==0.11.0
# gradio @ https://h2o-release.s3.amazonaws.com/h2ogpt/gradio-4.26.0-py3-none-any.whl
# gradio_client @ https://h2o-release.s3.amazonaws.com/h2ogpt/gradio_client-0.15.1-py3-none-any.whl
gradio==4.44.0
gradio_client==1.3.0
uvicorn[standard]
gunicorn
fastapi-utils
sse_starlette>=1.8.2
# consrained by tokenizers etc.:
huggingface_hub==0.25.2
appdirs>=1.4.4
fire>=0.5.0
docutils>=0.20.1
torch==2.2.1; sys_platform != "darwin" and platform_machine != "arm64"
torch==2.3.1; sys_platform == "darwin" and platform_machine == "arm64"
evaluate>=0.4.0
rouge_score>=0.1.2
sacrebleu>=2.3.1
scikit-learn>=1.2.2
# optional (need to uncomment code in gradio_runner.py for import of better_profanity)
# alt-profanity-check==1.2.2
# better-profanity==0.7.0
numpy>=1.23.4,<2.0
pandas>=2.0.2
matplotlib>=3.7.1
# transformers
loralib>=0.1.2
bitsandbytes>=0.43.1; sys_platform != "darwin" and platform_machine != "arm64"
#bitsandbytes downgraded because of Mac M1/M2 support issue. See https://github.com/axolotl-ai-cloud/axolotl/issues/1436
bitsandbytes==0.42.0; sys_platform == "darwin" and platform_machine == "arm64"
accelerate>=0.30.1
peft>=0.7.0
transformers>=4.45.1
jinja2>=3.1.0
tokenizers>=0.19.0
hf_transfer>=0.1.6
#optimum>=1.17.1
datasets>=2.18.0
sentencepiece>=0.2.0
APScheduler>=3.10.1
# optional for generate
pynvml>=11.5.0
psutil>=5.9.5
boto3>=1.26.101
botocore>=1.29.101
beautifulsoup4>=4.12.2
markdown>=3.4.3
# data and testing
pytest>=7.2.2
pytest-xdist>=3.2.1
nltk>=3.8.1
textstat>=0.7.3
# pandoc==2.3
pypandoc>=1.11; sys_platform == "darwin" and platform_machine == "arm64"
pypandoc_binary>=1.11; platform_machine == "x86_64"
pypandoc_binary>=1.11; platform_system == "Windows"
python-magic-bin>=0.4.14; platform_system == "Windows"
openpyxl>=3.1.2
lm_dataformat>=0.0.20
bioc>=2.0
# for HF embeddings
sentence_transformers>=3.0.1
InstructorEmbedding @ https://h2o-release.s3.amazonaws.com/h2ogpt/InstructorEmbedding-1.0.1-py3-none-any.whl
sentence_transformers_old @ https://h2o-release.s3.amazonaws.com/h2ogpt/sentence_transformers_old-2.2.2-py3-none-any.whl
# falcon
einops>=0.6.1
# for gpt4all .env file, but avoid worrying about imports
python-dotenv>=1.0.0
json_repair>=0.21.0
text-generation>=0.7.0
# for tokenization when don't have HF tokenizer
tiktoken>=0.5.2
# optional: for OpenAI endpoint
openai>=1.40.1
slowapi>=0.1.9
# for image metadata
pyexiv2
requests>=2.31.0
httpx>=0.24.1
urllib3>=1.26.16
filelock>=3.12.2
joblib>=1.3.1
tqdm>=4.65.0
tabulate>=0.9.0
packaging>=23.1
jsonschema>=4.23.0
spacy==3.7.5
### PASTING IN CONTENTS OF OPTIONAL_LANGCHAIN
# optional for chat with PDF
langchain==0.2.6
langchain_experimental==0.0.62
langchain-community==0.2.6
langsmith==0.1.82
langchain-core==0.2.23
langchain-text-splitters==0.2.2
#langchain_huggingface==0.0.3
pypdf>=3.17.1
# avoid textract, requires old six
#textract==1.6.5
pypdfium2>=4.24.0
# for HF embeddings
sentence_transformers>=3.0.1
# https://github.com/h2oai/instructor-embedding/tree/h2ogpt
# pip wheel .
InstructorEmbedding @ https://h2o-release.s3.amazonaws.com/h2ogpt/InstructorEmbedding-1.0.1-py3-none-any.whl
# https://github.com/h2oai/sentence-transformers/tree/h2ogpt
# pip wheel .
sentence_transformers_old @ https://h2o-release.s3.amazonaws.com/h2ogpt/sentence_transformers_old-2.2.2-py3-none-any.whl
# optional: for OpenAI endpoint or embeddings (requires key)
replicate>=0.26.0
anthropic>=0.34.2
langchain-anthropic>=0.1.20
together>=1.1.5
langchain_together==0.1.3
langchain-openai>=0.1.8
langchain-google-genai>=1.0.8
google-generativeai>=0.7.2
google-ai-generativelanguage>=0.6.6
# pydantic version conflict
#mistral_common==1.3.3
llava @ https://h2o-release.s3.amazonaws.com/h2ogpt/llava-1.7.0.dev0-py3-none-any.whl
#langchain_mistralai==0.1.2 # tokenizers<0.16.0, but transformers requires >=0.19
httpx>=0.25.2
httpx-sse>=0.3.1
mistralai>=0.4.0
# pydantic issue, don't need yet
#mistral-common==1.0.2
groq>=0.5.0
langchain-groq>=0.1.5
# local vector db
chromadb==0.4.23
pydantic-settings>=2.1.0
# server vector db
#pymilvus==2.2.8
# weak url support, if can't install opencv etc. If comment-in this one, then comment-out unstructured[local-inference]==0.6.6
# unstructured==0.8.1
# strong support for images
# Requires on Ubuntu: sudo apt-get install libmagic-dev poppler-utils tesseract-ocr libtesseract-dev libreoffice
unstructured[local-inference]==0.12.5
unstructured[all-docs]==0.12.5
docx2txt==0.8
python-docx==1.1.0
#pdf2image==1.16.3
#pytesseract==0.3.10
pillow>=10.2.0
posthog
pdfminer.six==20231228
urllib3
requests_file
#pdf2image==1.16.3
#pytesseract==0.3.10
tabulate>=0.9.0
# FYI pandoc already part of requirements.txt
# JSONLoader, but makes some trouble for some users
# TRY: apt-get install autoconf libtool
# unclear what happens on windows/mac for now
jq>=1.4.1; platform_machine == "x86_64"
# to check licenses
# Run: pip-licenses|grep -v 'BSD\|Apache\|MIT'
pip-licenses>=4.3.0
# weaviate vector db
# required for httpx for mistralai
weaviate-client==3.26.2
# vllm==0.2.2
# only gradio>=4
gradio_pdf>=0.0.7
gradio_tools>=0.0.9
# Qdrant - https://qdrant.tech vector database
qdrant-client>=1.8.0
# MIT:
arxiv>=2.1.3