# no websockets, more cloud friendly # able to make gradio clean-up states # gradio @ https://h2o-release.s3.amazonaws.com/h2ogpt/gradio-4.25.0-py3-none-any.whl # gradio_client @ https://h2o-release.s3.amazonaws.com/h2ogpt/gradio_client-0.15.0-py3-none-any.whl #gradio @ https://h2o-release.s3.amazonaws.com/h2ogpt/gradio-4.20.1-py3-none-any.whl #gradio_client==0.11.0 # gradio @ https://h2o-release.s3.amazonaws.com/h2ogpt/gradio-4.26.0-py3-none-any.whl # gradio_client @ https://h2o-release.s3.amazonaws.com/h2ogpt/gradio_client-0.15.1-py3-none-any.whl gradio==4.44.0 gradio_client==1.3.0 uvicorn[standard] gunicorn fastapi-utils sse_starlette>=1.8.2 # consrained by tokenizers etc.: huggingface_hub==0.25.2 appdirs>=1.4.4 fire>=0.5.0 docutils>=0.20.1 torch==2.2.1; sys_platform != "darwin" and platform_machine != "arm64" torch==2.3.1; sys_platform == "darwin" and platform_machine == "arm64" evaluate>=0.4.0 rouge_score>=0.1.2 sacrebleu>=2.3.1 scikit-learn>=1.2.2 # optional (need to uncomment code in gradio_runner.py for import of better_profanity) # alt-profanity-check==1.2.2 # better-profanity==0.7.0 numpy>=1.23.4,<2.0 pandas>=2.0.2 matplotlib>=3.7.1 # transformers loralib>=0.1.2 bitsandbytes>=0.43.1; sys_platform != "darwin" and platform_machine != "arm64" #bitsandbytes downgraded because of Mac M1/M2 support issue. See https://github.com/axolotl-ai-cloud/axolotl/issues/1436 bitsandbytes==0.42.0; sys_platform == "darwin" and platform_machine == "arm64" accelerate>=0.30.1 peft>=0.7.0 transformers>=4.45.1 jinja2>=3.1.0 tokenizers>=0.19.0 hf_transfer>=0.1.6 #optimum>=1.17.1 datasets>=2.18.0 sentencepiece>=0.2.0 APScheduler>=3.10.1 # optional for generate pynvml>=11.5.0 psutil>=5.9.5 boto3>=1.26.101 botocore>=1.29.101 beautifulsoup4>=4.12.2 markdown>=3.4.3 # data and testing pytest>=7.2.2 pytest-xdist>=3.2.1 nltk>=3.8.1 textstat>=0.7.3 # pandoc==2.3 pypandoc>=1.11; sys_platform == "darwin" and platform_machine == "arm64" pypandoc_binary>=1.11; platform_machine == "x86_64" pypandoc_binary>=1.11; platform_system == "Windows" python-magic-bin>=0.4.14; platform_system == "Windows" openpyxl>=3.1.2 lm_dataformat>=0.0.20 bioc>=2.0 # for HF embeddings sentence_transformers>=3.0.1 InstructorEmbedding @ https://h2o-release.s3.amazonaws.com/h2ogpt/InstructorEmbedding-1.0.1-py3-none-any.whl sentence_transformers_old @ https://h2o-release.s3.amazonaws.com/h2ogpt/sentence_transformers_old-2.2.2-py3-none-any.whl # falcon einops>=0.6.1 # for gpt4all .env file, but avoid worrying about imports python-dotenv>=1.0.0 json_repair>=0.21.0 text-generation>=0.7.0 # for tokenization when don't have HF tokenizer tiktoken>=0.5.2 # optional: for OpenAI endpoint openai>=1.40.1 slowapi>=0.1.9 # for image metadata pyexiv2 requests>=2.31.0 httpx>=0.24.1 urllib3>=1.26.16 filelock>=3.12.2 joblib>=1.3.1 tqdm>=4.65.0 tabulate>=0.9.0 packaging>=23.1 jsonschema>=4.23.0 spacy==3.7.5 ### PASTING IN CONTENTS OF OPTIONAL_LANGCHAIN # optional for chat with PDF langchain==0.2.6 langchain_experimental==0.0.62 langchain-community==0.2.6 langsmith==0.1.82 langchain-core==0.2.23 langchain-text-splitters==0.2.2 #langchain_huggingface==0.0.3 pypdf>=3.17.1 # avoid textract, requires old six #textract==1.6.5 pypdfium2>=4.24.0 # for HF embeddings sentence_transformers>=3.0.1 # https://github.com/h2oai/instructor-embedding/tree/h2ogpt # pip wheel . InstructorEmbedding @ https://h2o-release.s3.amazonaws.com/h2ogpt/InstructorEmbedding-1.0.1-py3-none-any.whl # https://github.com/h2oai/sentence-transformers/tree/h2ogpt # pip wheel . sentence_transformers_old @ https://h2o-release.s3.amazonaws.com/h2ogpt/sentence_transformers_old-2.2.2-py3-none-any.whl # optional: for OpenAI endpoint or embeddings (requires key) replicate>=0.26.0 anthropic>=0.34.2 langchain-anthropic>=0.1.20 together>=1.1.5 langchain_together==0.1.3 langchain-openai>=0.1.8 langchain-google-genai>=1.0.8 google-generativeai>=0.7.2 google-ai-generativelanguage>=0.6.6 # pydantic version conflict #mistral_common==1.3.3 llava @ https://h2o-release.s3.amazonaws.com/h2ogpt/llava-1.7.0.dev0-py3-none-any.whl #langchain_mistralai==0.1.2 # tokenizers<0.16.0, but transformers requires >=0.19 httpx>=0.25.2 httpx-sse>=0.3.1 mistralai>=0.4.0 # pydantic issue, don't need yet #mistral-common==1.0.2 groq>=0.5.0 langchain-groq>=0.1.5 # local vector db chromadb==0.4.23 pydantic-settings>=2.1.0 # server vector db #pymilvus==2.2.8 # weak url support, if can't install opencv etc. If comment-in this one, then comment-out unstructured[local-inference]==0.6.6 # unstructured==0.8.1 # strong support for images # Requires on Ubuntu: sudo apt-get install libmagic-dev poppler-utils tesseract-ocr libtesseract-dev libreoffice unstructured[local-inference]==0.12.5 unstructured[all-docs]==0.12.5 docx2txt==0.8 python-docx==1.1.0 #pdf2image==1.16.3 #pytesseract==0.3.10 pillow>=10.2.0 posthog pdfminer.six==20231228 urllib3 requests_file #pdf2image==1.16.3 #pytesseract==0.3.10 tabulate>=0.9.0 # FYI pandoc already part of requirements.txt # JSONLoader, but makes some trouble for some users # TRY: apt-get install autoconf libtool # unclear what happens on windows/mac for now jq>=1.4.1; platform_machine == "x86_64" # to check licenses # Run: pip-licenses|grep -v 'BSD\|Apache\|MIT' pip-licenses>=4.3.0 # weaviate vector db # required for httpx for mistralai weaviate-client==3.26.2 # vllm==0.2.2 # only gradio>=4 gradio_pdf>=0.0.7 gradio_tools>=0.0.9 # Qdrant - https://qdrant.tech vector database qdrant-client>=1.8.0 # MIT: arxiv>=2.1.3