diff --git a/.dockerignore b/.dockerignore deleted file mode 100644 index c429ea3819dc0559f16686160eb7ba15b0ba4e9b..0000000000000000000000000000000000000000 --- a/.dockerignore +++ /dev/null @@ -1,39 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Please keep below sorted alphabetically - -__pycache__ -.cache -.coverage -.coverage.* -.DS_Store -.env -.git -.gitignore -.pytest_cache -.Python -.tox -.venv -*.cover -*.log -*.pyc -*.pyd -*.pyo -coverage.xml -env -nosetests.xml -pip-delete-this-directory.txt -pip-log.txt diff --git a/.flake8 b/.flake8 deleted file mode 100644 index e3e9c9ed39e1cb510ccf469c8f779bb96fb222a6..0000000000000000000000000000000000000000 --- a/.flake8 +++ /dev/null @@ -1,25 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -[flake8] -enable-extensions = G -select = B,C,E,F,G,P,SIM1,T4,W,B9 -max-line-length = 120 -# C408 ignored because we like the dict keyword argument syntax -# E501 is not flexible enough, we're using B950 instead -ignore = - E203,E305,E402,E501,E721,E741,F405,F821,F841,F999,W503,W504,C408,E302,W291,E303,E226,E265 -exclude = - third_party diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index a6344aac8c09253b3b630fb776ae94478aa0275b..0000000000000000000000000000000000000000 --- a/.gitattributes +++ /dev/null @@ -1,35 +0,0 @@ -*.7z filter=lfs diff=lfs merge=lfs -text -*.arrow filter=lfs diff=lfs merge=lfs -text -*.bin filter=lfs diff=lfs merge=lfs -text -*.bz2 filter=lfs diff=lfs merge=lfs -text -*.ckpt filter=lfs diff=lfs merge=lfs -text -*.ftz filter=lfs diff=lfs merge=lfs -text -*.gz filter=lfs diff=lfs merge=lfs -text -*.h5 filter=lfs diff=lfs merge=lfs -text -*.joblib filter=lfs diff=lfs merge=lfs -text -*.lfs.* filter=lfs diff=lfs merge=lfs -text -*.mlmodel filter=lfs diff=lfs merge=lfs -text -*.model filter=lfs diff=lfs merge=lfs -text -*.msgpack filter=lfs diff=lfs merge=lfs -text -*.npy filter=lfs diff=lfs merge=lfs -text -*.npz filter=lfs diff=lfs merge=lfs -text -*.onnx filter=lfs diff=lfs merge=lfs -text -*.ot filter=lfs diff=lfs merge=lfs -text -*.parquet filter=lfs diff=lfs merge=lfs -text -*.pb filter=lfs diff=lfs merge=lfs -text -*.pickle filter=lfs diff=lfs merge=lfs -text -*.pkl filter=lfs diff=lfs merge=lfs -text -*.pt filter=lfs diff=lfs merge=lfs -text -*.pth filter=lfs diff=lfs merge=lfs -text -*.rar filter=lfs diff=lfs merge=lfs -text -*.safetensors filter=lfs diff=lfs merge=lfs -text -saved_model/**/* filter=lfs diff=lfs merge=lfs -text -*.tar.* filter=lfs diff=lfs merge=lfs -text -*.tar filter=lfs diff=lfs merge=lfs -text -*.tflite filter=lfs diff=lfs merge=lfs -text -*.tgz filter=lfs diff=lfs merge=lfs -text -*.wasm filter=lfs diff=lfs merge=lfs -text -*.xz filter=lfs diff=lfs merge=lfs -text -*.zip filter=lfs diff=lfs merge=lfs -text -*.zst filter=lfs diff=lfs merge=lfs -text -*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml deleted file mode 100644 index 37964127c2e7da1843acca819173fabe9879adcb..0000000000000000000000000000000000000000 --- a/.github/workflows/lint.yml +++ /dev/null @@ -1,35 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# lint.yml : A workflow to trigger lint tests on GitHub -name: 'Lint' -on: - pull_request: - workflow_dispatch: -jobs: - lint: - name: 'Linting' - runs-on: ubuntu-latest - steps: - - name: 'Checkout' - uses: actions/checkout@v4 - - name: 'Setup Python' - uses: actions/setup-python@v5 - with: - python-version: 'pypy3.10' - - name: 'Lint' - run: | - sudo apt-get update - bash ./cosmos1/scripts/format.sh diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml deleted file mode 100644 index cbf83e67bb79c379c26beaf93c9b617cf9ee2dcc..0000000000000000000000000000000000000000 --- a/.pre-commit-config.yaml +++ /dev/null @@ -1,53 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -default_language_version: - python: python3.10 -repos: - - repo: https://github.com/pycqa/flake8 - rev: 6.0.0 - hooks: - - id: flake8 - args: [--max-line-length=120] - exclude: ^dist/|^third_party/ - - - repo: https://github.com/psf/black - rev: 23.12.1 - hooks: - - id: black - args: [--line-length=120] - exclude: ^dist/|^third_party/ - - - repo: https://github.com/timothycrosley/isort - rev: 5.12.0 - hooks: - - id: isort - args: [--line-length=120] - - - repo: https://github.com/MarcoGorelli/absolufy-imports - rev: v0.3.1 - hooks: - - id: absolufy-imports - - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.0.1 - hooks: - - id: trailing-whitespace - exclude: ^tests/.*/fixtures/.* - args: [--markdown-linebreak-ext=md] - - id: end-of-file-fixer - exclude: ^tests/.*/fixtures/.* - - id: check-added-large-files - args: ['--maxkb=2000'] diff --git a/ATTRIBUTIONS.md b/ATTRIBUTIONS.md deleted file mode 100644 index b0ef6fa06b6810a56a8120fa53bf1bfc814fc33c..0000000000000000000000000000000000000000 --- a/ATTRIBUTIONS.md +++ /dev/null @@ -1,1437 +0,0 @@ -# Open Source License Attribution - - Cosmos uses Open Source components. You can find the details of these open-source projects along with license information below, sorted alphabetically. - We are grateful to the developers for their contributions to open source and acknowledge these below. - -## Better-Profanity - [MIT License](https://github.com/snguyenthanh/better_profanity/blob/master/LICENSE) - - ``` - - Copyright (c) 2018 The Python Packaging Authority - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - - ``` - -## FFmpeg - [FFMPEG License](https://github.com/FFmpeg/FFmpeg/blob/master/LICENSE.md) - - ``` - # License - - Most files in FFmpeg are under the GNU Lesser General Public License version 2.1 - or later (LGPL v2.1+). Read the file `COPYING.LGPLv2.1` for details. Some other - files have MIT/X11/BSD-style licenses. In combination the LGPL v2.1+ applies to - FFmpeg. - - Some optional parts of FFmpeg are licensed under the GNU General Public License - version 2 or later (GPL v2+). See the file `COPYING.GPLv2` for details. None of - these parts are used by default, you have to explicitly pass `--enable-gpl` to - configure to activate them. In this case, FFmpeg's license changes to GPL v2+. - - Specifically, the GPL parts of FFmpeg are: - - - libpostproc - - optional x86 optimization in the files - - `libavcodec/x86/flac_dsp_gpl.asm` - - `libavcodec/x86/idct_mmx.c` - - `libavfilter/x86/vf_removegrain.asm` - - the following building and testing tools - - `compat/solaris/make_sunver.pl` - - `doc/t2h.pm` - - `doc/texi2pod.pl` - - `libswresample/tests/swresample.c` - - `tests/checkasm/*` - - `tests/tiny_ssim.c` - - the following filters in libavfilter: - - `signature_lookup.c` - - `vf_blackframe.c` - - `vf_boxblur.c` - - `vf_colormatrix.c` - - `vf_cover_rect.c` - - `vf_cropdetect.c` - - `vf_delogo.c` - - `vf_eq.c` - - `vf_find_rect.c` - - `vf_fspp.c` - - `vf_histeq.c` - - `vf_hqdn3d.c` - - `vf_kerndeint.c` - - `vf_lensfun.c` (GPL version 3 or later) - - `vf_mcdeint.c` - - `vf_mpdecimate.c` - - `vf_nnedi.c` - - `vf_owdenoise.c` - - `vf_perspective.c` - - `vf_phase.c` - - `vf_pp.c` - - `vf_pp7.c` - - `vf_pullup.c` - - `vf_repeatfields.c` - - `vf_sab.c` - - `vf_signature.c` - - `vf_smartblur.c` - - `vf_spp.c` - - `vf_stereo3d.c` - - `vf_super2xsai.c` - - `vf_tinterlace.c` - - `vf_uspp.c` - - `vf_vaguedenoiser.c` - - `vsrc_mptestsrc.c` - - Should you, for whatever reason, prefer to use version 3 of the (L)GPL, then - the configure parameter `--enable-version3` will activate this licensing option - for you. Read the file `COPYING.LGPLv3` or, if you have enabled GPL parts, - `COPYING.GPLv3` to learn the exact legal terms that apply in this case. - - There are a handful of files under other licensing terms, namely: - - * The files `libavcodec/jfdctfst.c`, `libavcodec/jfdctint_template.c` and - `libavcodec/jrevdct.c` are taken from libjpeg, see the top of the files for - licensing details. Specifically note that you must credit the IJG in the - documentation accompanying your program if you only distribute executables. - You must also indicate any changes including additions and deletions to - those three files in the documentation. - * `tests/reference.pnm` is under the expat license. - - - ## External libraries - - FFmpeg can be combined with a number of external libraries, which sometimes - affect the licensing of binaries resulting from the combination. - - ### Compatible libraries - - The following libraries are under GPL version 2: - - avisynth - - frei0r - - libcdio - - libdavs2 - - librubberband - - libvidstab - - libx264 - - libx265 - - libxavs - - libxavs2 - - libxvid - - When combining them with FFmpeg, FFmpeg needs to be licensed as GPL as well by - passing `--enable-gpl` to configure. - - The following libraries are under LGPL version 3: - - gmp - - libaribb24 - - liblensfun - - When combining them with FFmpeg, use the configure option `--enable-version3` to - upgrade FFmpeg to the LGPL v3. - - The VMAF, mbedTLS, RK MPI, OpenCORE and VisualOn libraries are under the Apache License - 2.0. That license is incompatible with the LGPL v2.1 and the GPL v2, but not with - version 3 of those licenses. So to combine these libraries with FFmpeg, the - license version needs to be upgraded by passing `--enable-version3` to configure. - - The smbclient library is under the GPL v3, to combine it with FFmpeg, - the options `--enable-gpl` and `--enable-version3` have to be passed to - configure to upgrade FFmpeg to the GPL v3. - - ### Incompatible libraries - - There are certain libraries you can combine with FFmpeg whose licenses are not - compatible with the GPL and/or the LGPL. If you wish to enable these - libraries, even in circumstances that their license may be incompatible, pass - `--enable-nonfree` to configure. This will cause the resulting binary to be - unredistributable. - - The Fraunhofer FDK AAC and OpenSSL libraries are under licenses which are - incompatible with the GPLv2 and v3. To the best of our knowledge, they are - compatible with the LGPL. - - ``` - -## Hydra-core [MIT License](https://github.com/facebookresearch/hydra/blob/main/LICENSE) - - ``` - - MIT License - - Copyright (c) Facebook, Inc. and its affiliates. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - - ``` - -## ImageIo - [BSD 2-Clause "Simplified" License](https://github.com/imageio/imageio/blob/master/LICENSE) - - ``` - - Copyright (c) 2014-2022, imageio developers - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ``` - -## Iopath - [MIT License](https://github.com/facebookresearch/iopath/blob/main/LICENSE) - - ``` - MIT License - - Copyright (c) Facebook, Inc. and its affiliates. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - - ``` - -## Loguru - [MIT License](https://github.com/Delgan/loguru/blob/master/LICENSE) - - ``` - - MIT License - - Copyright (c) 2017 - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - - ``` - -## Mediapy - [Apache License 2.0](https://github.com/google/mediapy/blob/main/LICENSE) - - ``` - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - ``` - -## Nltk - [Apache License 2.0](https://github.com/nltk/nltk/blob/develop/LICENSE.txt) - - ``` - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - ``` - -## PEFT - [Apache License 2.0](https://github.com/huggingface/peft/blob/main/LICENSE) - - ``` - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - ``` - -## Pillow - [MIT License](https://github.com/python-pillow/Pillow/blob/main/LICENSE) - - ``` - - The Python Imaging Library (PIL) is - - Copyright © 1997-2011 by Secret Labs AB - Copyright © 1995-2011 by Fredrik Lundh and contributors - - Pillow is the friendly PIL fork. It is - - Copyright © 2010 by Jeffrey A. Clark and contributors - - Like PIL, Pillow is licensed under the open source MIT-CMU License: - - By obtaining, using, and/or copying this software and/or its associated - documentation, you agree that you have read, understood, and will comply - with the following terms and conditions: - - Permission to use, copy, modify and distribute this software and its - documentation for any purpose and without fee is hereby granted, - provided that the above copyright notice appears in all copies, and that - both that copyright notice and this permission notice appear in supporting - documentation, and that the name of Secret Labs AB or the author not be - used in advertising or publicity pertaining to distribution of the software - without specific, written prior permission. - - SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS - SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. - IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR ANY SPECIAL, - INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM - LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE - OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR - PERFORMANCE OF THIS SOFTWARE. - - ``` - -## PyAV - [BSD 3-Clause "New" or "Revised" License](https://github.com/PyAV-Org/PyAV/blob/main/LICENSE.txt) - - ``` - - Copyright retained by original committers. All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the project nor the names of its contributors may be - used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY DIRECT, - INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, - EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ``` - -## Pytorch_Retinaface - [MIT License](https://github.com/biubug6/Pytorch_Retinaface/blob/master/LICENSE.MIT) - - ``` - MIT License - - Copyright (c) 2019 - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - ``` - -## Sentencepiece - [Apache License 2.0](https://github.com/google/sentencepiece/blob/master/LICENSE) - - ``` - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - ``` - -## Termcolor - [MIT License](https://github.com/termcolor/termcolor/blob/main/COPYING.txt) - - ``` - Copyright (c) 2008-2011 Volvox Development Team - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - ``` - -## Transformers [Apache License 2.0](https://github.com/huggingface/transformers/blob/main/LICENSE) - - ``` - - Copyright 2018- The Hugging Face team. All rights reserved. - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - ``` diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index 038da0753410ea465d6769f7f1b956f3b7157fce..0000000000000000000000000000000000000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,59 +0,0 @@ -# How to Contribute - -We'd love to receive your patches and contributions. Please keep your PRs as draft until such time that you would like us to review them. - -## Code Reviews - -All submissions, including submissions by project members, require review. We use GitHub pull requests for this purpose. Consult -[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more information on using pull requests. - -## Pipeline - -Ensure you run the linter prior to submitting your pull request and the CI-CD pipeline is green before removing the draft designation. - -```bash -./cosmos1/scripts/format.sh -``` - -## Signing Your Work - -* We require that all contributors "sign-off" on their commits. This certifies that the contribution is your original work, or you have rights to submit it under the same license, or a compatible license. - - * Any contribution which contains commits that are not Signed-Off will not be accepted. - -* To sign off on a commit you simply use the `--signoff` (or `-s`) option when committing your changes: - ```bash - $ git commit -s -m "Add cool feature." - ``` - This will append the following to your commit message: - ``` - Signed-off-by: Your Name - ``` - -* Full text of the DCO: - - ``` - Developer Certificate of Origin - Version 1.1 - - Copyright (C) 2004, 2006 The Linux Foundation and its contributors. - 1 Letterman Drive - Suite D4700 - San Francisco, CA, 94129 - - Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. - ``` - - ``` - Developer's Certificate of Origin 1.1 - - By making a contribution to this project, I certify that: - - (a) The contribution was created in whole or in part by me and I have the right to submit it under the open source license indicated in the file; or - - (b) The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate open source license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same open source license (unless I am permitted to submit under a different license), as indicated in the file; or - - (c) The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it. - - (d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open source license(s) involved. - ``` diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index d81193a6fd3b7e46b521ed825eaa6d171f920cc0..0000000000000000000000000000000000000000 --- a/Dockerfile +++ /dev/null @@ -1,43 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Use NVIDIA PyTorch container as base image -FROM nvcr.io/nvidia/pytorch:24.10-py3 - -# Install system dependencies -RUN apt-get update && apt-get install -y \ - ffmpeg \ - && rm -rf /var/lib/apt/lists/* - -# Set working directory -WORKDIR /workspace - -# Copy source code -COPY cosmos1 /workspace/cosmos1 - -# Copy main README -COPY README.md /workspace/ - -# Copy third-party licenses -COPY ATTRIBUTIONS.md /workspace/ - -# Copy requirements file -COPY requirements.txt /workspace/ - -# Install Python dependencies -RUN pip install --no-cache-dir -r requirements.txt - -# Default command -CMD ["/bin/bash"] diff --git a/INSTALL.md b/INSTALL.md deleted file mode 100644 index f1dc5684f50c63288ee45f4dd0cfcdd6e41d6f42..0000000000000000000000000000000000000000 --- a/INSTALL.md +++ /dev/null @@ -1,20 +0,0 @@ -# Cosmos Installation - -We have only tested the installation with Ubuntu 24.04, 22.04, and 20.04. - -1. Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html). - -2. Clone the repository. - -```bash -git clone git@github.com:NVIDIA/Cosmos.git -cd Cosmos -``` - -3. Build a Docker image using `Dockerfile` and run the Docker container. - -```bash -docker build -t cosmos . -docker run -d --name cosmos_container --gpus all --ipc=host -it -v $(pwd):/workspace cosmos -docker attach cosmos_container -``` diff --git a/LICENSE b/LICENSE deleted file mode 100644 index 4c9ad980682246bd6ab0d2bae82232be6dbdcbd4..0000000000000000000000000000000000000000 --- a/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/README.md b/README.md deleted file mode 100644 index e2cdbe82bc90960e66780ecbb9d2095f8513d68c..0000000000000000000000000000000000000000 --- a/README.md +++ /dev/null @@ -1,78 +0,0 @@ - -![Cosmos Logo](assets/cosmos-logo.png) - --------------------------------------------------------------------------------- -### [Website](https://www.nvidia.com/en-us/ai/cosmos/) | [HuggingFace](https://huggingface.co/collections/nvidia/cosmos-6751e884dc10e013a0a0d8e6) | [GPU-free Preview](https://build.nvidia.com/explore/discover) | [Paper](https://arxiv.org/abs/2501.03575) | [Paper Website](https://research.nvidia.com/labs/dir/cosmos1/) - -[NVIDIA Cosmos](https://www.nvidia.com/cosmos/) is a developer-first world foundation model platform designed to help Physical AI developers build their Physical AI systems better and faster. Cosmos contains - -1. pre-trained models, available via [Hugging Face](https://huggingface.co/collections/nvidia/cosmos-6751e884dc10e013a0a0d8e6) under the [NVIDIA Open Model License](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license/) that allows commercial use of the models for free -2. training scripts under the [Apache 2 License](https://www.apache.org/licenses/LICENSE-2.0), offered through [NVIDIA Nemo Framework](https://github.com/NVIDIA/NeMo) for post-training the models for various downstream Physical AI applications - -Details of the platform is described in the [Cosmos paper](https://research.nvidia.com/publication/2025-01_cosmos-world-foundation-model-platform-physical-ai). Preview access is avaiable at [build.nvidia.com](https://build.nvidia.com). - -## Key Features - -- [Pre-trained Diffusion-based world foundation models](cosmos1/models/diffusion/README.md) for Text2World and Video2World generation where a user can generate visual simulation based on text prompts and video prompts. -- [Pre-trained Autoregressive-based world foundation models](cosmos1/models/autoregressive/README.md) for Video2World generation where a user can generate visual simulation based on video prompts and optional text prompts. -- [Video tokenizers](https://github.com/NVIDIA/Cosmos-Tokenizer) for tokenizing videos into continuous tokens (latent vectors) and discrete tokens (integers) efficiently and effectively. -- Video curation pipeline for building your own video dataset. [Coming soon] -- [Post-training scripts](cosmos1/models/POST_TRAINING.md) via NeMo Framework to post-train the pre-trained world foundation models for various Physical AI setup. -- Pre-training scripts via NeMo Framework for building your own world foundation model. [[Diffusion](https://github.com/NVIDIA/NeMo/tree/main/nemo/collections/diffusion)] [[Autoregressive](https://github.com/NVIDIA/NeMo/tree/main/nemo/collections/multimodal_autoregressive)] [[Tokenizer](https://github.com/NVIDIA/NeMo/tree/main/nemo/collections/diffusion/vae)]. - -## Model Family - -| Model name | Description | Try it out | -|------------|----------|----------| -| [Cosmos-1.0-Diffusion-7B-Text2World](https://huggingface.co/nvidia/Cosmos-1.0-Diffusion-7B-Text2World) | Text to visual world generation | [Inference](cosmos1/models/diffusion/README.md) | -| [Cosmos-1.0-Diffusion-14B-Text2World](https://huggingface.co/nvidia/Cosmos-1.0-Diffusion-14B-Text2World) | Text to visual world generation | [Inference](cosmos1/models/diffusion/README.md) | -| [Cosmos-1.0-Diffusion-7B-Video2World](https://huggingface.co/nvidia/Cosmos-1.0-Diffusion-7B-Video2World) | Video + Text based future visual world generation | [Inference](cosmos1/models/diffusion/README.md) | -| [Cosmos-1.0-Diffusion-14B-Video2World](https://huggingface.co/nvidia/Cosmos-1.0-Diffusion-14B-Video2World) | Video + Text based future visual world generation | [Inference](cosmos1/models/diffusion/README.md) | -| [Cosmos-1.0-Autoregressive-4B](https://huggingface.co/nvidia/Cosmos-1.0-Autoregressive-4B) | Future visual world generation | [Inference](cosmos1/models/autoregressive/README.md) | -| [Cosmos-1.0-Autoregressive-12B](https://huggingface.co/nvidia/Cosmos-1.0-Autoregressive-12B) | Future visual world generation | [Inference](cosmos1/models/autoregressive/README.md) | -| [Cosmos-1.0-Autoregressive-5B-Video2World](https://huggingface.co/nvidia/Cosmos-1.0-Autoregressive-5B-Video2World) | Video + Text based future visual world generation | [Inference](cosmos1/models/autoregressive/README.md) | -| [Cosmos-1.0-Autoregressive-13B-Video2World](https://huggingface.co/nvidia/Cosmos-1.0-Autoregressive-13B-Video2World) | Video + Text based future visual world generation | [Inference](cosmos1/models/autoregressive/README.md) | -| [Cosmos-1.0-Guardrail](https://huggingface.co/nvidia/Cosmos-1.0-Guardrail) | Guardrail contains pre-Guard and post-Guard for safe use | Embedded in model inference scripts | - -## Example Usage - -### Inference - -Follow the [Cosmos Installation Guide](INSTALL.md) to setup the docker. For inference with the pretrained models, please refer to [Cosmos Diffusion Inference](cosmos1/models/diffusion/README.md) and [Cosmos Autoregressive Inference](cosmos1/models/autoregressive/README.md). - -The code snippet below provides a gist of the inference usage. - -```bash -PROMPT="A sleek, humanoid robot stands in a vast warehouse filled with neatly stacked cardboard boxes on industrial shelves. \ -The robot's metallic body gleams under the bright, even lighting, highlighting its futuristic design and intricate joints. \ -A glowing blue light emanates from its chest, adding a touch of advanced technology. The background is dominated by rows of boxes, \ -suggesting a highly organized storage system. The floor is lined with wooden pallets, enhancing the industrial setting. \ -The camera remains static, capturing the robot's poised stance amidst the orderly environment, with a shallow depth of \ -field that keeps the focus on the robot while subtly blurring the background for a cinematic effect." - -# Example using 7B model -PYTHONPATH=$(pwd) python cosmos1/models/diffusion/inference/text2world.py \ - --checkpoint_dir checkpoints \ - --diffusion_transformer_dir Cosmos-1.0-Diffusion-7B-Text2World \ - --prompt "$PROMPT" \ - --offload_prompt_upsampler \ - --video_save_name Cosmos-1.0-Diffusion-7B-Text2World -``` - - - -We also offer [multi-GPU inference](cosmos1/models/diffusion/nemo/inference/README.md) support for Diffusion Text2World WFM models through NeMo Framework. - -### Post-training - -NeMo Framework provides GPU accelerated post-training with general post-training for both [diffusion](cosmos1/models/diffusion/nemo/post_training/README.md) and [autoregressive](cosmos1/models/autoregressive/nemo/post_training/README.md) models, with other types of post-training coming soon. - -## License and Contact - -This project will download and install additional third-party open source software projects. Review the license terms of these open source projects before use. - -NVIDIA Cosmos source code is released under the [Apache 2 License](https://www.apache.org/licenses/LICENSE-2.0). - -NVIDIA Cosmos models are released under the [NVIDIA Open Model License](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-open-model-license). For a custom license, please contact [cosmos-license@nvidia.com](mailto:cosmos-license@nvidia.com). diff --git a/RELEASE.md b/RELEASE.md deleted file mode 100644 index 99fc9ffb792085eb97a1acc7730e6e6628aabe52..0000000000000000000000000000000000000000 --- a/RELEASE.md +++ /dev/null @@ -1,7 +0,0 @@ -# Release Cadence - - -| Version | Description | Date | -|------------|----------|----------| -| [v1.0](release_notes/v0p1.md) | Initial diffusion and autoregressive WFMs release | 2025-01-06 | -| [v0.1](release_notes/v0p1.md) | Initial tokenizer release | 2024-11-06 | diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/cosmos1/models/guardrail/aegis/aegis.py b/aegis.py similarity index 96% rename from cosmos1/models/guardrail/aegis/aegis.py rename to aegis.py index 0ec0b462f9ba35a94b2158954888443b87db2096..1e0b58ca8c0949189a8c5df7a2847920becc05f4 100644 --- a/cosmos1/models/guardrail/aegis/aegis.py +++ b/aegis.py @@ -15,13 +15,14 @@ import argparse +from . import log import torch from peft import PeftModel from transformers import AutoModelForCausalLM, AutoTokenizer -from cosmos1.models.guardrail.aegis.categories import UNSAFE_CATEGORIES -from cosmos1.models.guardrail.common.core import ContentSafetyGuardrail, GuardrailRunner -from cosmos1.utils import log, misc +from .categories import UNSAFE_CATEGORIES +from .guardrail_core import ContentSafetyGuardrail, GuardrailRunner +from . import misc SAFE = misc.Color.green("SAFE") UNSAFE = misc.Color.red("UNSAFE") diff --git a/cosmos1/models/autoregressive/configs/base/tokenizer.py b/ar_config_tokenizer.py similarity index 94% rename from cosmos1/models/autoregressive/configs/base/tokenizer.py rename to ar_config_tokenizer.py index 3b8609614eee2921504dba117e3e89f710ba346a..fc20ac60191a1f8a3b250d7ecffa383a467cd5fc 100644 --- a/cosmos1/models/autoregressive/configs/base/tokenizer.py +++ b/ar_config_tokenizer.py @@ -17,10 +17,10 @@ from typing import Optional import attrs -from cosmos1.models.autoregressive.tokenizer.discrete_video import DiscreteVideoFSQStateDictTokenizer -from cosmos1.models.autoregressive.tokenizer.networks import CausalDiscreteVideoTokenizer -from cosmos1.utils.lazy_config import LazyCall as L -from cosmos1.utils.lazy_config import LazyDict +from .discrete_video import DiscreteVideoFSQStateDictTokenizer +from .ar_networks import CausalDiscreteVideoTokenizer +from .lazy_config_init import LazyCall as L +from .lazy_config_init import LazyDict def create_discrete_video_fsq_tokenizer_state_dict_config( diff --git a/cosmos1/models/autoregressive/configs/base/model.py b/ar_configs_base_model.py similarity index 98% rename from cosmos1/models/autoregressive/configs/base/model.py rename to ar_configs_base_model.py index e9f1c0f3fb45d85fab1c2259090bdc794aee7b44..7c0282c5544a109d4dde473bd0856ee95cf880ed 100644 --- a/cosmos1/models/autoregressive/configs/base/model.py +++ b/ar_configs_base_model.py @@ -17,7 +17,7 @@ from typing import Optional import attrs -from cosmos1.models.autoregressive.configs.base.tokenizer import TokenizerConfig +from .ar_config_tokenizer import TokenizerConfig @attrs.define diff --git a/cosmos1/models/autoregressive/model.py b/ar_model.py similarity index 97% rename from cosmos1/models/autoregressive/model.py rename to ar_model.py index 195de093d8fc4cba98b1255fce9bcbe2800c75e7..91ccfaf14ae0bc24f5e31c4ef479cfc995ad62fe 100644 --- a/cosmos1/models/autoregressive/model.py +++ b/ar_model.py @@ -19,23 +19,24 @@ import time from pathlib import Path from typing import Any, Dict, List, Optional, Set +from . import log import torch from safetensors.torch import load_file from torch.nn.modules.module import _IncompatibleKeys -from cosmos1.models.autoregressive.configs.base.model import ModelConfig -from cosmos1.models.autoregressive.configs.base.tokenizer import TokenizerConfig -from cosmos1.models.autoregressive.modules.mm_projector import MultimodalProjector -from cosmos1.models.autoregressive.networks.transformer import Transformer -from cosmos1.models.autoregressive.networks.vit import VisionTransformer, get_vit_config -from cosmos1.models.autoregressive.tokenizer.tokenizer import DiscreteMultimodalTokenizer, update_vocab_size -from cosmos1.models.autoregressive.utils.checkpoint import ( +from .ar_configs_base_model import ModelConfig +from .ar_config_tokenizer import TokenizerConfig +from .mm_projector import MultimodalProjector +from .ar_transformer import Transformer +from .vit import VisionTransformer, get_vit_config +from .ar_tokenizer import DiscreteMultimodalTokenizer, update_vocab_size +from .checkpoint import ( get_partial_state_dict, process_state_dict, substrings_to_ignore, ) -from cosmos1.models.autoregressive.utils.sampling import decode_n_tokens, decode_one_token, prefill -from cosmos1.utils import log, misc +from .sampling import decode_n_tokens, decode_one_token, prefill +from . import misc class AutoRegressiveModel(torch.nn.Module): diff --git a/cosmos1/models/autoregressive/modules/attention.py b/ar_modules_attention.py similarity index 98% rename from cosmos1/models/autoregressive/modules/attention.py rename to ar_modules_attention.py index 78d15826c96a449308747af75ace0b3e82043f76..865317a80aa2af41055cedac2e1bda5f0af88f14 100644 --- a/cosmos1/models/autoregressive/modules/attention.py +++ b/ar_modules_attention.py @@ -19,8 +19,8 @@ from typing import Optional, Union import torch from torch import nn -from cosmos1.models.autoregressive.modules.embedding import RotaryPositionEmbedding -from cosmos1.models.autoregressive.modules.normalization import create_norm +from .ar_modules_embedding import RotaryPositionEmbedding +from .ar_modules_normalization import create_norm class Attention(nn.Module): diff --git a/cosmos1/models/autoregressive/modules/embedding.py b/ar_modules_embedding.py similarity index 100% rename from cosmos1/models/autoregressive/modules/embedding.py rename to ar_modules_embedding.py diff --git a/cosmos1/models/autoregressive/modules/mlp.py b/ar_modules_mlp.py similarity index 100% rename from cosmos1/models/autoregressive/modules/mlp.py rename to ar_modules_mlp.py diff --git a/cosmos1/models/autoregressive/modules/normalization.py b/ar_modules_normalization.py similarity index 100% rename from cosmos1/models/autoregressive/modules/normalization.py rename to ar_modules_normalization.py diff --git a/cosmos1/models/autoregressive/tokenizer/networks.py b/ar_networks.py similarity index 92% rename from cosmos1/models/autoregressive/tokenizer/networks.py rename to ar_networks.py index f25ac36f54179dd77e2d2177895c64dab258ff71..b9e6a7dc9de961ea9d1a478805e5ca01658cdc29 100644 --- a/cosmos1/models/autoregressive/tokenizer/networks.py +++ b/ar_networks.py @@ -18,9 +18,9 @@ from collections import namedtuple import torch from torch import nn -from cosmos1.models.autoregressive.tokenizer.modules import CausalConv3d, DecoderFactorized, EncoderFactorized -from cosmos1.models.autoregressive.tokenizer.quantizers import FSQuantizer -from cosmos1.utils import log +from .ar_tokenizer_modules import CausalConv3d, DecoderFactorized, EncoderFactorized +from .ar_tokenizer_quantizers import FSQuantizer +from . import log NetworkEval = namedtuple("NetworkEval", ["reconstructions", "quant_loss", "quant_info"]) diff --git a/cosmos1/models/autoregressive/tokenizer/tokenizer.py b/ar_tokenizer.py similarity index 98% rename from cosmos1/models/autoregressive/tokenizer/tokenizer.py rename to ar_tokenizer.py index 6bda2565b9418eb0a63dc891dd6f8412434359c6..ab8c3c11cf728448ba0a4149373dc877cd1d77a0 100644 --- a/cosmos1/models/autoregressive/tokenizer/tokenizer.py +++ b/ar_tokenizer.py @@ -19,8 +19,8 @@ from typing import Optional import torch from einops import rearrange -from cosmos1.models.autoregressive.configs.base.tokenizer import TokenizerConfig -from cosmos1.utils.lazy_config import instantiate as lazy_instantiate +from .ar_config_tokenizer import TokenizerConfig +from .lazy_config_init import instantiate as lazy_instantiate def update_vocab_size( diff --git a/cosmos1/models/autoregressive/tokenizer/image_text_tokenizer.py b/ar_tokenizer_image_text_tokenizer.py similarity index 99% rename from cosmos1/models/autoregressive/tokenizer/image_text_tokenizer.py rename to ar_tokenizer_image_text_tokenizer.py index 3bbc2c82d7ac2ed01b45c27d35d7c9071c696e1f..2837d7979ac847f53ee0edfef7e125322488b393 100644 --- a/cosmos1/models/autoregressive/tokenizer/image_text_tokenizer.py +++ b/ar_tokenizer_image_text_tokenizer.py @@ -21,8 +21,8 @@ import transformers from transformers import AutoImageProcessor from transformers.image_utils import ImageInput, is_valid_image, load_image -from cosmos1.models.autoregressive.tokenizer.text_tokenizer import TextTokenizer -from cosmos1.utils import log +from .ar_tokenizer_text_tokenizer import TextTokenizer +from . import log # Configuration for different vision-language models IMAGE_CONFIGS = { diff --git a/cosmos1/models/autoregressive/tokenizer/modules.py b/ar_tokenizer_modules.py similarity index 99% rename from cosmos1/models/autoregressive/tokenizer/modules.py rename to ar_tokenizer_modules.py index 290c145380129040f471899b924a9a93d389c73b..ac3a16c8bc9a5b30c917feec99f0afbc88a1d9f2 100644 --- a/cosmos1/models/autoregressive/tokenizer/modules.py +++ b/ar_tokenizer_modules.py @@ -29,8 +29,8 @@ import torch import torch.nn as nn import torch.nn.functional as F -from cosmos1.models.autoregressive.tokenizer.patching import Patcher3D, UnPatcher3D -from cosmos1.models.autoregressive.tokenizer.utils import ( +from .ar_tokenizer_patching import Patcher3D, UnPatcher3D +from .ar_tokenizer_utils import ( CausalNormalize, batch2space, batch2time, @@ -41,7 +41,7 @@ from cosmos1.models.autoregressive.tokenizer.utils import ( space2batch, time2batch, ) -from cosmos1.utils import log +from . import log class CausalConv3d(nn.Module): diff --git a/cosmos1/models/autoregressive/tokenizer/patching.py b/ar_tokenizer_patching.py similarity index 100% rename from cosmos1/models/autoregressive/tokenizer/patching.py rename to ar_tokenizer_patching.py diff --git a/cosmos1/models/autoregressive/tokenizer/quantizers.py b/ar_tokenizer_quantizers.py similarity index 98% rename from cosmos1/models/autoregressive/tokenizer/quantizers.py rename to ar_tokenizer_quantizers.py index 589204cd9e69b734313a918df7e99c748ae7f6e5..e07b51aef6f32fb39266c2f12de27c9ff87eb4d7 100644 --- a/cosmos1/models/autoregressive/tokenizer/quantizers.py +++ b/ar_tokenizer_quantizers.py @@ -21,7 +21,7 @@ import torch import torch.nn as nn from einops import rearrange -from cosmos1.models.autoregressive.tokenizer.utils import default, pack_one, round_ste, unpack_one +from .ar_tokenizer_utils import default, pack_one, round_ste, unpack_one class FSQuantizer(nn.Module): diff --git a/cosmos1/models/autoregressive/tokenizer/text_tokenizer.py b/ar_tokenizer_text_tokenizer.py similarity index 99% rename from cosmos1/models/autoregressive/tokenizer/text_tokenizer.py rename to ar_tokenizer_text_tokenizer.py index 797457192fda248a1dfcfbe5a08298e9a48036df..4f077e827d7cbccfa13bd4942661e071313a53a2 100644 --- a/cosmos1/models/autoregressive/tokenizer/text_tokenizer.py +++ b/ar_tokenizer_text_tokenizer.py @@ -19,7 +19,7 @@ import numpy as np import torch from transformers import AutoTokenizer -from cosmos1.utils import log +from . import log def get_tokenizer_path(model_family: str, is_instruct_model: bool = False): diff --git a/cosmos1/models/autoregressive/tokenizer/utils.py b/ar_tokenizer_utils.py similarity index 100% rename from cosmos1/models/autoregressive/tokenizer/utils.py rename to ar_tokenizer_utils.py diff --git a/cosmos1/models/autoregressive/networks/transformer.py b/ar_transformer.py similarity index 97% rename from cosmos1/models/autoregressive/networks/transformer.py rename to ar_transformer.py index 66cb8ff6ab49eb0e3b12fc50757e107e27c3e599..0f6883dcf87f7732a5f4ac85c4f315923ceca31b 100644 --- a/cosmos1/models/autoregressive/networks/transformer.py +++ b/ar_transformer.py @@ -19,17 +19,17 @@ import torch import torch.nn as nn from torch.nn.modules.module import _IncompatibleKeys -from cosmos1.models.autoregressive.modules.attention import Attention -from cosmos1.models.autoregressive.modules.embedding import ( +from .ar_modules_attention import Attention +from .ar_modules_embedding import ( RotaryPositionEmbeddingPytorchV1, RotaryPositionEmbeddingPytorchV2, SinCosPosEmbAxisTE, ) -from cosmos1.models.autoregressive.modules.mlp import MLP -from cosmos1.models.autoregressive.modules.normalization import create_norm -from cosmos1.models.autoregressive.utils.checkpoint import process_state_dict, substrings_to_ignore -from cosmos1.models.autoregressive.utils.misc import maybe_convert_to_namespace -from cosmos1.utils import log +from .ar_modules_mlp import MLP +from .ar_modules_normalization import create_norm +from .checkpoint import process_state_dict, substrings_to_ignore +from .ar_utils_misc import maybe_convert_to_namespace +from . import log class TransformerBlock(nn.Module): diff --git a/cosmos1/models/autoregressive/utils/misc.py b/ar_utils_misc.py similarity index 100% rename from cosmos1/models/autoregressive/utils/misc.py rename to ar_utils_misc.py diff --git a/assets/cosmos-logo.png b/assets/cosmos-logo.png deleted file mode 100644 index 532bcc4369614e873a70ea070d9030a202c85b82..0000000000000000000000000000000000000000 Binary files a/assets/cosmos-logo.png and /dev/null differ diff --git a/cosmos1/models/diffusion/module/attention.py b/attention.py similarity index 100% rename from cosmos1/models/diffusion/module/attention.py rename to attention.py diff --git a/cosmos1/models/common/base_world_generation_pipeline.py b/base_world_generation_pipeline.py similarity index 98% rename from cosmos1/models/common/base_world_generation_pipeline.py rename to base_world_generation_pipeline.py index 742a158a78bfdac62ace0719ff9009c5459e5c57..7e9ffb4438253aa0456cbf33997a2937d9a877ab 100644 --- a/cosmos1/models/common/base_world_generation_pipeline.py +++ b/base_world_generation_pipeline.py @@ -21,8 +21,8 @@ from typing import Any import numpy as np import torch -from cosmos1.models.common.t5_text_encoder import CosmosT5TextEncoder -from cosmos1.models.guardrail.common import presets as guardrail_presets +from .t5_text_encoder import CosmosT5TextEncoder +from . import presets as guardrail_presets class BaseWorldGenerationPipeline(ABC): diff --git a/cosmos1/models/diffusion/diffusion/functional/batch_ops.py b/batch_ops.py similarity index 100% rename from cosmos1/models/diffusion/diffusion/functional/batch_ops.py rename to batch_ops.py diff --git a/cosmos1/models/guardrail/blocklist/blocklist.py b/blocklist.py similarity index 97% rename from cosmos1/models/guardrail/blocklist/blocklist.py rename to blocklist.py index fa3d30e0e74f162d8ea15e23aaded470f3c4ee90..cb9cec8aa7f61225481972e4ff26c50398706654 100644 --- a/cosmos1/models/guardrail/blocklist/blocklist.py +++ b/blocklist.py @@ -19,12 +19,13 @@ import re import string from difflib import SequenceMatcher +from . import log import nltk from better_profanity import profanity -from cosmos1.models.guardrail.blocklist.utils import read_keyword_list_from_dir, to_ascii -from cosmos1.models.guardrail.common.core import ContentSafetyGuardrail, GuardrailRunner -from cosmos1.utils import log, misc +from .guardrail_blocklist_utils import read_keyword_list_from_dir, to_ascii +from .guardrail_core import ContentSafetyGuardrail, GuardrailRunner +from . import misc DEFAULT_CHECKPOINT_DIR = "checkpoints/Cosmos-1.0-Guardrail/blocklist" CENSOR = misc.Color.red("*") diff --git a/cosmos1/models/diffusion/module/blocks.py b/blocks.py similarity index 99% rename from cosmos1/models/diffusion/module/blocks.py rename to blocks.py index aee7d6fe0f8cc91391e5f8fd71470288408d7003..bb8effcfa55fa4edfdc7939c776e452b9f7cb1d0 100644 --- a/cosmos1/models/diffusion/module/blocks.py +++ b/blocks.py @@ -22,8 +22,8 @@ from einops import rearrange, repeat from einops.layers.torch import Rearrange from torch import nn -from cosmos1.models.diffusion.module.attention import Attention, GPT2FeedForward -from cosmos1.utils import log +from .attention import Attention, GPT2FeedForward +from . import log def modulate(x, shift, scale): diff --git a/cosmos1/models/guardrail/face_blur_filter/blur_utils.py b/blur_utils.py similarity index 100% rename from cosmos1/models/guardrail/face_blur_filter/blur_utils.py rename to blur_utils.py diff --git a/cosmos1/models/guardrail/aegis/categories.py b/categories.py similarity index 100% rename from cosmos1/models/guardrail/aegis/categories.py rename to categories.py diff --git a/cosmos1/models/autoregressive/utils/checkpoint.py b/checkpoint.py similarity index 100% rename from cosmos1/models/autoregressive/utils/checkpoint.py rename to checkpoint.py diff --git a/checkpoints/README.md b/checkpoints/README.md deleted file mode 100644 index 2645e684e3011e8efe65f46985bbf38ae0493000..0000000000000000000000000000000000000000 --- a/checkpoints/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# Checkpoint directory - -Follow our instructions for downloading checkpoints in [Cosmos Diffusion Inference](../cosmos1/models/diffusion/README.md#download-checkpoints) and [Cosmos Autoregressive Inference](../cosmos1/models/autoregressive/README.md). Cosmos checkpoints will be downloaded to this directory. diff --git a/cosmos1/models/diffusion/conditioner.py b/conditioner.py similarity index 98% rename from cosmos1/models/diffusion/conditioner.py rename to conditioner.py index 15c0d9b636f6fb4b540ee1d23846d26716433c98..12d4117e859a9b24ae7a5c2b0388ab59e59aaac7 100644 --- a/cosmos1/models/diffusion/conditioner.py +++ b/conditioner.py @@ -23,9 +23,9 @@ from typing import Any, Dict, List, Optional, Tuple, Union import torch import torch.nn as nn -from cosmos1.models.diffusion.diffusion.functional.batch_ops import batch_mul -from cosmos1.utils import log -from cosmos1.utils.lazy_config import instantiate +from .batch_ops import batch_mul +from . import log +from .lazy_config_init import instantiate class BaseConditionEntry(nn.Module): diff --git a/cosmos1/utils/config.py b/config.py similarity index 98% rename from cosmos1/utils/config.py rename to config.py index 7b4cb6bb03330c3701a850bbd50523331980d0f5..7f1ecf8c749574a0661b3046ad5cd0db0d540099 100644 --- a/cosmos1/utils/config.py +++ b/config.py @@ -19,8 +19,9 @@ from typing import Any, TypeVar import attrs -from cosmos1.utils.lazy_config import LazyDict -from cosmos1.utils.misc import Color +from omegaconf import DictConfig as LazyDict + +from .misc import Color T = TypeVar("T") diff --git a/cosmos1/models/diffusion/config/base/conditioner.py b/config_base_conditioner.py similarity index 95% rename from cosmos1/models/diffusion/config/base/conditioner.py rename to config_base_conditioner.py index 3333af87b216199c38dc0c45aedec389beec536e..a414a3cc2d26b8b54428e3cbdc9f4ceaf2ae00b6 100644 --- a/cosmos1/models/diffusion/config/base/conditioner.py +++ b/config_base_conditioner.py @@ -18,9 +18,9 @@ from typing import Dict, List, Optional import attrs import torch -from cosmos1.models.diffusion.conditioner import BaseConditionEntry, TextAttr, VideoConditioner, VideoExtendConditioner -from cosmos1.utils.lazy_config import LazyCall as L -from cosmos1.utils.lazy_config import LazyDict +from .conditioner import BaseConditionEntry, TextAttr, VideoConditioner, VideoExtendConditioner +from .lazy_config_init import LazyCall as L +from .lazy_config_init import LazyDict @attrs.define(slots=False) diff --git a/cosmos1/utils/config_helper.py b/config_helper.py similarity index 99% rename from cosmos1/utils/config_helper.py rename to config_helper.py index d6f4e169663c2bfe4eb09ce2c8571c88f2127450..e1f358e711926f6550d1764f35516320ba644a09 100644 --- a/cosmos1/utils/config_helper.py +++ b/config_helper.py @@ -27,8 +27,8 @@ from hydra import compose, initialize from hydra.core.config_store import ConfigStore from omegaconf import DictConfig, OmegaConf -from cosmos1.utils import log -from cosmos1.utils.config import Config +from . import log +from .config import Config def is_attrs_or_dataclass(obj) -> bool: diff --git a/cosmos1/models/autoregressive/diffusion_decoder/config/base/conditioner.py b/cosmos1/models/autoregressive/diffusion_decoder/config/base/conditioner.py index 8124ee46e4be5608c83df6459f665dad6e9642a7..93503037bf8f3decde47e8d0250574f68a3b2e91 100644 --- a/cosmos1/models/autoregressive/diffusion_decoder/config/base/conditioner.py +++ b/cosmos1/models/autoregressive/diffusion_decoder/config/base/conditioner.py @@ -18,8 +18,8 @@ from typing import Dict, Optional import torch -from cosmos1.models.diffusion.conditioner import BaseVideoCondition, GeneralConditioner -from cosmos1.models.diffusion.config.base.conditioner import ( +from conditioner import BaseVideoCondition, GeneralConditioner +from config_base_conditioner import ( FPSConfig, ImageSizeConfig, LatentConditionConfig, @@ -28,8 +28,8 @@ from cosmos1.models.diffusion.config.base.conditioner import ( PaddingMaskConfig, TextConfig, ) -from cosmos1.utils.lazy_config import LazyCall as L -from cosmos1.utils.lazy_config import LazyDict +from lazy_config_init import LazyCall as L +from lazy_config_init import LazyDict @dataclass diff --git a/cosmos1/models/autoregressive/diffusion_decoder/config/config_latent_diffusion_decoder.py b/cosmos1/models/autoregressive/diffusion_decoder/config/config_latent_diffusion_decoder.py index f820b76d12107d700d33f7e19d4aae62049e7e31..764d78eeb3c6076eb3dae152de3c5a21585638ee 100644 --- a/cosmos1/models/autoregressive/diffusion_decoder/config/config_latent_diffusion_decoder.py +++ b/cosmos1/models/autoregressive/diffusion_decoder/config/config_latent_diffusion_decoder.py @@ -20,8 +20,8 @@ import attrs from cosmos1.models.autoregressive.diffusion_decoder.config.registry import register_configs as register_dd_configs from cosmos1.models.diffusion.config.base.model import LatentDiffusionDecoderModelConfig from cosmos1.models.diffusion.config.registry import register_configs -from cosmos1.utils import config -from cosmos1.utils.config_helper import import_all_modules_from_package +from . import config +from config_helper import import_all_modules_from_package @attrs.define(slots=False) diff --git a/cosmos1/models/autoregressive/diffusion_decoder/config/inference/cosmos_diffusiondecoder_7b.py b/cosmos1/models/autoregressive/diffusion_decoder/config/inference/cosmos_diffusiondecoder_7b.py index ad296f42b5317ca2a4a26e21ca32bf1d952566d1..004be139e14faa505edc788ae5184e4d952588fd 100644 --- a/cosmos1/models/autoregressive/diffusion_decoder/config/inference/cosmos_diffusiondecoder_7b.py +++ b/cosmos1/models/autoregressive/diffusion_decoder/config/inference/cosmos_diffusiondecoder_7b.py @@ -16,8 +16,8 @@ from hydra.core.config_store import ConfigStore from cosmos1.models.autoregressive.diffusion_decoder.network import DiffusionDecoderGeneralDIT -from cosmos1.utils.lazy_config import LazyCall as L -from cosmos1.utils.lazy_config import LazyDict +from lazy_config_init import LazyCall as L +from lazy_config_init import LazyDict num_frames = 57 Cosmos_DiffusionDecoder_7B_INFERENCE_ONLY: LazyDict = LazyDict( diff --git a/cosmos1/models/autoregressive/diffusion_decoder/config/registry.py b/cosmos1/models/autoregressive/diffusion_decoder/config/registry.py index b835fc06da23f9aef4bab0ad5aa9dc9b9a2b43d4..b0a2dd89ea79b439cb4b4ef0fcdcffd3829aa7d2 100644 --- a/cosmos1/models/autoregressive/diffusion_decoder/config/registry.py +++ b/cosmos1/models/autoregressive/diffusion_decoder/config/registry.py @@ -18,9 +18,9 @@ from hydra.core.config_store import ConfigStore from cosmos1.models.autoregressive.diffusion_decoder.config.base.conditioner import ( VideoLatentDiffusionDecoderConditionerConfig, ) -from cosmos1.models.autoregressive.tokenizer.discrete_video import DiscreteVideoFSQJITTokenizer -from cosmos1.models.diffusion.module.pretrained_vae import JITVAE, JointImageVideoSharedJITTokenizer, VideoJITTokenizer -from cosmos1.utils.lazy_config import LazyCall as L +from discrete_video import DiscreteVideoFSQJITTokenizer +from pretrained_vae import JITVAE, JointImageVideoSharedJITTokenizer, VideoJITTokenizer +from lazy_config_init import LazyCall as L def get_cosmos_video_discrete_tokenizer_comp8x16x16( diff --git a/cosmos1/models/autoregressive/diffusion_decoder/inference.py b/cosmos1/models/autoregressive/diffusion_decoder/inference.py index b923840956955a1913263d4a8151d14993a75b71..68d7c30a792c40b09bf6babda7beff591b0c0126 100644 --- a/cosmos1/models/autoregressive/diffusion_decoder/inference.py +++ b/cosmos1/models/autoregressive/diffusion_decoder/inference.py @@ -19,10 +19,10 @@ from typing import List import torch -from cosmos1.models.autoregressive.configs.inference.inference_config import DiffusionDecoderSamplingConfig +from inference_config import DiffusionDecoderSamplingConfig from cosmos1.models.autoregressive.diffusion_decoder.model import LatentDiffusionDecoderModel from cosmos1.models.autoregressive.diffusion_decoder.utils import linear_blend_video_list, split_with_overlap -from cosmos1.utils import log +from . import log def diffusion_decoder_process_tokens( diff --git a/cosmos1/models/autoregressive/diffusion_decoder/model.py b/cosmos1/models/autoregressive/diffusion_decoder/model.py index 50f4ea81a5ade75e18cccb240f599aa4a0c789cd..4693a2868109e8a7b5e5598fbe76708fa25cd19e 100644 --- a/cosmos1/models/autoregressive/diffusion_decoder/model.py +++ b/cosmos1/models/autoregressive/diffusion_decoder/model.py @@ -19,11 +19,11 @@ from typing import Callable, Dict, Optional, Tuple import torch from torch import Tensor -from cosmos1.models.diffusion.conditioner import BaseVideoCondition -from cosmos1.models.diffusion.diffusion.functional.batch_ops import batch_mul -from cosmos1.models.diffusion.diffusion.modules.res_sampler import COMMON_SOLVER_OPTIONS -from cosmos1.models.diffusion.model.model_t2w import DiffusionT2WModel as VideoDiffusionModel -from cosmos1.utils.lazy_config import instantiate as lazy_instantiate +from conditioner import BaseVideoCondition +from batch_ops import batch_mul +from res_sampler import COMMON_SOLVER_OPTIONS +from model_t2w import DiffusionT2WModel as VideoDiffusionModel +from lazy_config_init import instantiate as lazy_instantiate @dataclass diff --git a/cosmos1/models/autoregressive/diffusion_decoder/network.py b/cosmos1/models/autoregressive/diffusion_decoder/network.py index e3c114d520677a67e15fb126aad82250c7bbec44..bbd2cc1935ea11c560fb02da823e8136fbd7c8a5 100644 --- a/cosmos1/models/autoregressive/diffusion_decoder/network.py +++ b/cosmos1/models/autoregressive/diffusion_decoder/network.py @@ -20,7 +20,7 @@ from einops import rearrange from torch import nn from torchvision import transforms -from cosmos1.models.diffusion.module.blocks import PatchEmbed +from blocks import PatchEmbed from cosmos1.models.diffusion.networks.general_dit import GeneralDIT diff --git a/cosmos1/models/autoregressive/inference/base.py b/cosmos1/models/autoregressive/inference/base.py index 5d756f8c06a0d6c7355774a41dd250079fbe085a..cd2b3faa7d3656cbe7527771b5241d48695d6cec 100644 --- a/cosmos1/models/autoregressive/inference/base.py +++ b/cosmos1/models/autoregressive/inference/base.py @@ -21,7 +21,7 @@ import torch from cosmos1.models.autoregressive.inference.world_generation_pipeline import ARBaseGenerationPipeline from cosmos1.models.autoregressive.utils.inference import add_common_arguments, load_vision_input, validate_args -from cosmos1.utils import log +from . import log def parse_args(): diff --git a/cosmos1/models/autoregressive/inference/video2world.py b/cosmos1/models/autoregressive/inference/video2world.py index 4f9bd2cf9f40eefa3cbd473f10c8b7d399b4f637..dd16ee7922bf229699309e87085de355bee18f6d 100644 --- a/cosmos1/models/autoregressive/inference/video2world.py +++ b/cosmos1/models/autoregressive/inference/video2world.py @@ -21,8 +21,8 @@ import torch from cosmos1.models.autoregressive.inference.world_generation_pipeline import ARVideo2WorldGenerationPipeline from cosmos1.models.autoregressive.utils.inference import add_common_arguments, load_vision_input, validate_args -from cosmos1.utils import log -from cosmos1.utils.io import read_prompts_from_file +from . import log +from io import read_prompts_from_file def parse_args(): diff --git a/cosmos1/models/autoregressive/inference/world_generation_pipeline.py b/cosmos1/models/autoregressive/inference/world_generation_pipeline.py index bf0f36d91491e07563192637a7be6df42fa58be5..d09680363cdd2349cbcb52e240aadc072ef48acb 100644 --- a/cosmos1/models/autoregressive/inference/world_generation_pipeline.py +++ b/cosmos1/models/autoregressive/inference/world_generation_pipeline.py @@ -17,13 +17,14 @@ import gc import os from typing import List, Optional, Tuple +from . import log import numpy as np import torch from einops import rearrange -from cosmos1.models.autoregressive.configs.base.model_config import create_video2world_model_config -from cosmos1.models.autoregressive.configs.base.tokenizer import TokenizerConfig -from cosmos1.models.autoregressive.configs.inference.inference_config import ( +from model_config import create_video2world_model_config +from ar_config_tokenizer import TokenizerConfig +from inference_config import ( DataShapeConfig, DiffusionDecoderSamplingConfig, InferenceConfig, @@ -31,15 +32,15 @@ from cosmos1.models.autoregressive.configs.inference.inference_config import ( ) from cosmos1.models.autoregressive.diffusion_decoder.inference import diffusion_decoder_process_tokens from cosmos1.models.autoregressive.diffusion_decoder.model import LatentDiffusionDecoderModel -from cosmos1.models.autoregressive.model import AutoRegressiveModel +from ar_model import AutoRegressiveModel from cosmos1.models.autoregressive.utils.inference import _SUPPORTED_CONTEXT_LEN, prepare_video_batch_for_saving -from cosmos1.models.common.base_world_generation_pipeline import BaseWorldGenerationPipeline -from cosmos1.models.diffusion.inference.inference_utils import ( +from base_world_generation_pipeline import BaseWorldGenerationPipeline +from inference_utils import ( load_model_by_config, load_network_model, load_tokenizer_model, ) -from cosmos1.utils import log, misc +from . import misc def detect_model_size_from_ckpt_path(ckpt_path: str) -> str: diff --git a/cosmos1/models/autoregressive/nemo/cosmos.py b/cosmos1/models/autoregressive/nemo/cosmos.py index b4f34ce8929d2aedc3f345420d91d22081a8dddd..0001790603934f3580d4e424ee76aaac1578afac 100644 --- a/cosmos1/models/autoregressive/nemo/cosmos.py +++ b/cosmos1/models/autoregressive/nemo/cosmos.py @@ -29,7 +29,7 @@ from nemo.lightning import OptimizerModule, io from nemo.lightning.base import teardown from torch import Tensor, nn -from cosmos1.utils import log +from . import log class RotaryEmbedding3D(RotaryEmbedding): diff --git a/cosmos1/models/autoregressive/nemo/inference/general.py b/cosmos1/models/autoregressive/nemo/inference/general.py index dbc34e431da44065ecc4a484a0ed3e1acfbb464c..c1bcfe5c5b30bfbbc787fcc0213228faab65776e 100644 --- a/cosmos1/models/autoregressive/nemo/inference/general.py +++ b/cosmos1/models/autoregressive/nemo/inference/general.py @@ -34,10 +34,10 @@ from nemo.lightning import io from nemo.lightning.ckpt_utils import ckpt_to_context_subdir from cosmos1.models.autoregressive.nemo.utils import run_diffusion_decoder_model -from cosmos1.models.autoregressive.tokenizer.discrete_video import DiscreteVideoFSQJITTokenizer +from discrete_video import DiscreteVideoFSQJITTokenizer from cosmos1.models.autoregressive.utils.inference import load_vision_input -from cosmos1.models.guardrail.common import presets as guardrail_presets -from cosmos1.utils import log +from . import presets as guardrail_presets +from . import log torch._C._jit_set_texpr_fuser_enabled(False) diff --git a/cosmos1/models/autoregressive/nemo/post_training/prepare_dataset.py b/cosmos1/models/autoregressive/nemo/post_training/prepare_dataset.py index 28d531f0dc2a0e03abc13c82e54037fd9fc36a51..55848c654b7e695ebc75afdf185eab64cbf7b25c 100644 --- a/cosmos1/models/autoregressive/nemo/post_training/prepare_dataset.py +++ b/cosmos1/models/autoregressive/nemo/post_training/prepare_dataset.py @@ -23,8 +23,8 @@ from huggingface_hub import snapshot_download from nemo.collections.nlp.data.language_modeling.megatron import indexed_dataset from cosmos1.models.autoregressive.nemo.utils import read_input_videos -from cosmos1.models.autoregressive.tokenizer.discrete_video import DiscreteVideoFSQJITTokenizer -from cosmos1.utils import log +from discrete_video import DiscreteVideoFSQJITTokenizer +from . import log TOKENIZER_COMPRESSION_FACTOR = [8, 16, 16] DATA_RESOLUTION_SUPPORTED = [640, 1024] diff --git a/cosmos1/models/autoregressive/nemo/utils.py b/cosmos1/models/autoregressive/nemo/utils.py index 755986e9dff6d568f4197610d963d07b704ee7e1..71d6db832c35c25394146f747a88a6cd656cb1ac 100644 --- a/cosmos1/models/autoregressive/nemo/utils.py +++ b/cosmos1/models/autoregressive/nemo/utils.py @@ -23,16 +23,16 @@ import torch import torchvision from huggingface_hub import snapshot_download -from cosmos1.models.autoregressive.configs.inference.inference_config import DiffusionDecoderSamplingConfig +from inference_config import DiffusionDecoderSamplingConfig from cosmos1.models.autoregressive.diffusion_decoder.inference import diffusion_decoder_process_tokens from cosmos1.models.autoregressive.diffusion_decoder.model import LatentDiffusionDecoderModel -from cosmos1.models.diffusion.inference.inference_utils import ( +from inference_utils import ( load_network_model, load_tokenizer_model, skip_init_linear, ) -from cosmos1.utils import log -from cosmos1.utils.config_helper import get_config_module, override +from . import log +from config_helper import get_config_module, override TOKENIZER_COMPRESSION_FACTOR = [8, 16, 16] DATA_RESOLUTION_SUPPORTED = [640, 1024] diff --git a/cosmos1/models/autoregressive/utils/inference.py b/cosmos1/models/autoregressive/utils/inference.py index 150d41efce6a6668d3cf9f1a60d5bcf886d12931..670e3bc7b10bfbae87e7be7cac1f39844f4807d3 100644 --- a/cosmos1/models/autoregressive/utils/inference.py +++ b/cosmos1/models/autoregressive/utils/inference.py @@ -25,8 +25,8 @@ import torch import torchvision from PIL import Image -from cosmos1.models.autoregressive.configs.inference.inference_config import SamplingConfig -from cosmos1.utils import log +from inference_config import SamplingConfig +from . import log _IMAGE_EXTENSIONS = [".png", ".jpg", ".jpeg", "webp"] _VIDEO_EXTENSIONS = [".mp4"] diff --git a/cosmos1/models/diffusion/config/base/model.py b/cosmos1/models/diffusion/config/base/model.py index 97b94e554f612f2f45a34443944d3cdefc1b7c82..3850f5410c53bfe8aea6f4eb1f7faf523d063a7b 100644 --- a/cosmos1/models/diffusion/config/base/model.py +++ b/cosmos1/models/diffusion/config/base/model.py @@ -17,7 +17,7 @@ from typing import List import attrs -from cosmos1.utils.lazy_config import LazyDict +from lazy_config_init import LazyDict @attrs.define(slots=False) diff --git a/cosmos1/models/diffusion/config/base/net.py b/cosmos1/models/diffusion/config/base/net.py index 931a8ef2204209d5812b0b16a156f1a3decdb94e..29710a625969dd1e4c43aa2daf8ff6e62fae308b 100644 --- a/cosmos1/models/diffusion/config/base/net.py +++ b/cosmos1/models/diffusion/config/base/net.py @@ -16,8 +16,8 @@ import copy from cosmos1.models.diffusion.networks.general_dit import GeneralDIT -from cosmos1.utils.lazy_config import LazyCall as L -from cosmos1.utils.lazy_config import LazyDict +from lazy_config_init import LazyCall as L +from lazy_config_init import LazyDict FADITV2Config: LazyDict = L(GeneralDIT)( max_img_h=240, diff --git a/cosmos1/models/diffusion/config/base/tokenizer.py b/cosmos1/models/diffusion/config/base/tokenizer.py index f03a96abd0cf23e6285f5ffbfc1e93e65718700f..458f9a10a59ef24688f01ebc12020d8d7e5a4874 100644 --- a/cosmos1/models/diffusion/config/base/tokenizer.py +++ b/cosmos1/models/diffusion/config/base/tokenizer.py @@ -15,8 +15,8 @@ import omegaconf -from cosmos1.models.diffusion.module.pretrained_vae import JITVAE, JointImageVideoSharedJITTokenizer, VideoJITTokenizer -from cosmos1.utils.lazy_config import LazyCall as L +from pretrained_vae import JITVAE, JointImageVideoSharedJITTokenizer, VideoJITTokenizer +from lazy_config_init import LazyCall as L TOKENIZER_OPTIONS = {} diff --git a/cosmos1/models/diffusion/config/config.py b/cosmos1/models/diffusion/config/config.py index 514e47022e728e885000505f728dd5a0056d3e0d..73e8c9aaa694a19b2ff5faba76a2c7198dadb328 100644 --- a/cosmos1/models/diffusion/config/config.py +++ b/cosmos1/models/diffusion/config/config.py @@ -19,8 +19,8 @@ import attrs from cosmos1.models.diffusion.config.base.model import DefaultModelConfig from cosmos1.models.diffusion.config.registry import register_configs -from cosmos1.utils import config -from cosmos1.utils.config_helper import import_all_modules_from_package +from . import config +from config_helper import import_all_modules_from_package @attrs.define(slots=False) diff --git a/cosmos1/models/diffusion/config/inference/cosmos-1-diffusion-text2world.py b/cosmos1/models/diffusion/config/inference/cosmos-1-diffusion-text2world.py index 5b6fe2a7ffd63536a6cce293cc38b471ae21eb18..b11530091f40a7ca4b27df55ed7060f2992d7c50 100644 --- a/cosmos1/models/diffusion/config/inference/cosmos-1-diffusion-text2world.py +++ b/cosmos1/models/diffusion/config/inference/cosmos-1-diffusion-text2world.py @@ -15,7 +15,7 @@ from hydra.core.config_store import ConfigStore -from cosmos1.utils.lazy_config import LazyDict +from lazy_config_init import LazyDict Cosmos_1_0_Diffusion_Text2World_7B: LazyDict = LazyDict( dict( diff --git a/cosmos1/models/diffusion/config/inference/cosmos-1-diffusion-video2world.py b/cosmos1/models/diffusion/config/inference/cosmos-1-diffusion-video2world.py index 39f90475c23dd6983f3456bb614f779a63b1626b..d707e0e68ddad6723e845993e9fff37cd5843b26 100644 --- a/cosmos1/models/diffusion/config/inference/cosmos-1-diffusion-video2world.py +++ b/cosmos1/models/diffusion/config/inference/cosmos-1-diffusion-video2world.py @@ -16,8 +16,8 @@ from hydra.core.config_store import ConfigStore from cosmos1.models.diffusion.networks.general_dit_video_conditioned import VideoExtendGeneralDIT -from cosmos1.utils.lazy_config import LazyCall as L -from cosmos1.utils.lazy_config import LazyDict +from lazy_config_init import LazyCall as L +from lazy_config_init import LazyDict Cosmos_1_0_Diffusion_Video2World_7B: LazyDict = LazyDict( dict( diff --git a/cosmos1/models/diffusion/config/registry.py b/cosmos1/models/diffusion/config/registry.py index 5d92360f8370b4e590e4acc02ac35ac5be35a693..fad3454388285b2c2b53efcd7bb4f766e5762f26 100644 --- a/cosmos1/models/diffusion/config/registry.py +++ b/cosmos1/models/diffusion/config/registry.py @@ -15,7 +15,7 @@ from hydra.core.config_store import ConfigStore -from cosmos1.models.diffusion.config.base.conditioner import ( +from config_base_conditioner import ( BaseVideoConditionerConfig, VideoConditionerFpsSizePaddingConfig, VideoExtendConditionerConfig, diff --git a/cosmos1/models/diffusion/nemo/inference/general.py b/cosmos1/models/diffusion/nemo/inference/general.py index 397c60febed717ded44f54497edd1ebecbb7382c..a45f77b860b2647bf6315795585cb444574c59fc 100644 --- a/cosmos1/models/diffusion/nemo/inference/general.py +++ b/cosmos1/models/diffusion/nemo/inference/general.py @@ -37,7 +37,7 @@ from nemo.collections.diffusion.sampler.cosmos.cosmos_diffusion_pipeline import from transformers import T5EncoderModel, T5TokenizerFast from cosmos1.models.diffusion.nemo.inference.inference_utils import process_prompt, save_video -from cosmos1.utils import log +from . import log EXAMPLE_PROMPT = ( "The teal robot is cooking food in a kitchen. Steam rises from a simmering pot " diff --git a/cosmos1/models/diffusion/nemo/inference/inference_utils.py b/cosmos1/models/diffusion/nemo/inference/inference_utils.py index de95a04d418aace14944ddc636247ab9cbb47848..c6816b27842a11476e3eecd9cce74cff2be99d01 100644 --- a/cosmos1/models/diffusion/nemo/inference/inference_utils.py +++ b/cosmos1/models/diffusion/nemo/inference/inference_utils.py @@ -19,18 +19,18 @@ import imageio import numpy as np import torch -from cosmos1.models.autoregressive.model import AutoRegressiveModel -from cosmos1.models.diffusion.prompt_upsampler.text2world_prompt_upsampler_inference import ( +from ar_model import AutoRegressiveModel +from text2world_prompt_upsampler_inference import ( create_prompt_upsampler, run_chat_completion, ) -from cosmos1.models.guardrail.common.presets import ( +from presets import ( create_text_guardrail_runner, create_video_guardrail_runner, run_text_guardrail, run_video_guardrail, ) -from cosmos1.utils import log +from . import log def get_upsampled_prompt( diff --git a/cosmos1/models/diffusion/nemo/post_training/prepare_dataset.py b/cosmos1/models/diffusion/nemo/post_training/prepare_dataset.py index 7b4e85a48b1cbbeb0835bfebaa5acc4fc7579841..dab3ab5765311f548b5c2a7d2f7f2f50d731e39e 100644 --- a/cosmos1/models/diffusion/nemo/post_training/prepare_dataset.py +++ b/cosmos1/models/diffusion/nemo/post_training/prepare_dataset.py @@ -27,7 +27,7 @@ from nemo.collections.diffusion.models.model import DiT7BConfig from tqdm import tqdm from transformers import T5EncoderModel, T5TokenizerFast -from cosmos1.utils import log +from . import log def get_parser(): diff --git a/cosmos1/models/diffusion/networks/general_dit.py b/cosmos1/models/diffusion/networks/general_dit.py index 5b0bb143ac6c222a9a8b9c6690da7785e66a51f3..8ad88cf108ad337af11fc77dfe73a35e7d0c30d0 100644 --- a/cosmos1/models/diffusion/networks/general_dit.py +++ b/cosmos1/models/diffusion/networks/general_dit.py @@ -24,17 +24,17 @@ from einops import rearrange from torch import nn from torchvision import transforms -from cosmos1.models.diffusion.conditioner import DataType -from cosmos1.models.diffusion.module.attention import get_normalization -from cosmos1.models.diffusion.module.blocks import ( +from conditioner import DataType +from attention import get_normalization +from blocks import ( FinalLayer, GeneralDITTransformerBlock, PatchEmbed, TimestepEmbedding, Timesteps, ) -from cosmos1.models.diffusion.module.position_embedding import LearnablePosEmbAxis, VideoRopePosition3DEmb -from cosmos1.utils import log +from position_embedding import LearnablePosEmbAxis, VideoRopePosition3DEmb +from . import log class GeneralDIT(nn.Module): diff --git a/cosmos1/models/diffusion/networks/general_dit_video_conditioned.py b/cosmos1/models/diffusion/networks/general_dit_video_conditioned.py index f27bcabe29cd87fc17b0f57f9e56b2a0f1bb3959..de941593cc660ea66beb675a0feb8a1690bd0ce6 100644 --- a/cosmos1/models/diffusion/networks/general_dit_video_conditioned.py +++ b/cosmos1/models/diffusion/networks/general_dit_video_conditioned.py @@ -19,10 +19,10 @@ import torch from einops import rearrange from torch import nn -from cosmos1.models.diffusion.conditioner import DataType -from cosmos1.models.diffusion.module.blocks import TimestepEmbedding, Timesteps +from conditioner import DataType +from blocks import TimestepEmbedding, Timesteps from cosmos1.models.diffusion.networks.general_dit import GeneralDIT -from cosmos1.utils import log +from . import log class VideoExtendGeneralDIT(GeneralDIT): diff --git a/cosmos1/models/diffusion/diffusion/modules/denoiser_scaling.py b/denoiser_scaling.py similarity index 100% rename from cosmos1/models/diffusion/diffusion/modules/denoiser_scaling.py rename to denoiser_scaling.py diff --git a/cosmos1/utils/device.py b/device.py similarity index 100% rename from cosmos1/utils/device.py rename to device.py diff --git a/cosmos1/models/diffusion/diffusion/types.py b/diffusion_types.py similarity index 100% rename from cosmos1/models/diffusion/diffusion/types.py rename to diffusion_types.py diff --git a/cosmos1/models/autoregressive/tokenizer/discrete_video.py b/discrete_video.py similarity index 99% rename from cosmos1/models/autoregressive/tokenizer/discrete_video.py rename to discrete_video.py index 477c9ac9f80832448f73379bbd6c67e29a2f40da..5e5a5244c87516121f3e7686c924f8b1c66cd772 100644 --- a/cosmos1/models/autoregressive/tokenizer/discrete_video.py +++ b/discrete_video.py @@ -18,7 +18,7 @@ from typing import Optional import torch from einops import rearrange -from cosmos1.models.autoregressive.tokenizer.quantizers import FSQuantizer +from .ar_tokenizer_quantizers import FSQuantizer # Make sure jit model output consistenly during consecutive calls # Check here: https://github.com/pytorch/pytorch/issues/74534 diff --git a/cosmos1/utils/distributed.py b/distributed.py similarity index 99% rename from cosmos1/utils/distributed.py rename to distributed.py index d0a88c0519245236af13c104754bcd83b517cae3..c4f9985ebbe6973233860f3aeba7bbb57b39da64 100644 --- a/cosmos1/utils/distributed.py +++ b/distributed.py @@ -27,8 +27,8 @@ import pynvml import torch import torch.distributed as dist -from cosmos1.utils import log -from cosmos1.utils.device import Device +from . import log +from .device import Device def init() -> int | None: diff --git a/cosmos1/models/guardrail/face_blur_filter/face_blur_filter.py b/face_blur_filter.py similarity index 95% rename from cosmos1/models/guardrail/face_blur_filter/face_blur_filter.py rename to face_blur_filter.py index a4163942f69f14f10cf4fdde54eb5c1303c16782..19b7c7de3fab8ebd0bc8e070d48dc1298e427ba4 100644 --- a/cosmos1/models/guardrail/face_blur_filter/face_blur_filter.py +++ b/face_blur_filter.py @@ -16,6 +16,7 @@ import argparse import os +from . import log import numpy as np import torch from pytorch_retinaface.data import cfg_re50 @@ -24,11 +25,11 @@ from pytorch_retinaface.models.retinaface import RetinaFace from torch.utils.data import DataLoader, TensorDataset from tqdm import tqdm -from cosmos1.models.guardrail.common.core import GuardrailRunner, PostprocessingGuardrail -from cosmos1.models.guardrail.common.io_utils import get_video_filepaths, read_video, save_video -from cosmos1.models.guardrail.face_blur_filter.blur_utils import pixelate_face -from cosmos1.models.guardrail.face_blur_filter.retinaface_utils import decode_batch, filter_detected_boxes, load_model -from cosmos1.utils import log, misc +from .guardrail_core import GuardrailRunner, PostprocessingGuardrail +from .guardrail_io_utils import get_video_filepaths, read_video, save_video +from .blur_utils import pixelate_face +from .retinaface_utils import decode_batch, filter_detected_boxes, load_model +from . import misc DEFAULT_RETINAFACE_CHECKPOINT = "checkpoints/Cosmos-1.0-Guardrail/face_blur_filter/Resnet50_Final.pth" diff --git a/cosmos1/utils/lazy_config/file_io.py b/file_io.py similarity index 100% rename from cosmos1/utils/lazy_config/file_io.py rename to file_io.py diff --git a/cosmos1/models/guardrail/blocklist/utils.py b/guardrail_blocklist_utils.py similarity index 98% rename from cosmos1/models/guardrail/blocklist/utils.py rename to guardrail_blocklist_utils.py index 0c721914e1372f39ab81ad213a3b65fe30adee5b..966d46001c769adb16dd7ca011814c1d2f8b6a1a 100644 --- a/cosmos1/models/guardrail/blocklist/utils.py +++ b/guardrail_blocklist_utils.py @@ -16,7 +16,7 @@ import os import re -from cosmos1.utils import log +from . import log def read_keyword_list_from_dir(folder_path: str) -> list[str]: diff --git a/cosmos1/models/guardrail/common/core.py b/guardrail_core.py similarity index 98% rename from cosmos1/models/guardrail/common/core.py rename to guardrail_core.py index 15c7a36f3130c33d064f206b0656cf86cd91f403..d6f8dbfff88e1ba138a227a995cd9f329338898d 100644 --- a/cosmos1/models/guardrail/common/core.py +++ b/guardrail_core.py @@ -17,7 +17,7 @@ from typing import Any, Tuple import numpy as np -from cosmos1.utils import log +from . import log class ContentSafetyGuardrail: diff --git a/cosmos1/models/guardrail/common/io_utils.py b/guardrail_io_utils.py similarity index 98% rename from cosmos1/models/guardrail/common/io_utils.py rename to guardrail_io_utils.py index b027850d84e8d618e0a5bb3dc6d7cc4bb5acef66..e3d562bd840ab184fc316830fba8acdfa5c9a251 100644 --- a/cosmos1/models/guardrail/common/io_utils.py +++ b/guardrail_io_utils.py @@ -19,7 +19,7 @@ from dataclasses import dataclass import imageio import numpy as np -from cosmos1.utils import log +from . import log @dataclass diff --git a/cosmos1/models/diffusion/prompt_upsampler/inference.py b/inference.py similarity index 96% rename from cosmos1/models/diffusion/prompt_upsampler/inference.py rename to inference.py index b022dd14933e4e0d5abe5f47f61cd29675101d49..4252b3758a2d49c5809dd963f7ae403209cbff7b 100644 --- a/cosmos1/models/diffusion/prompt_upsampler/inference.py +++ b/inference.py @@ -17,9 +17,9 @@ from typing import List, Optional, TypedDict import torch -from cosmos1.models.autoregressive.model import AutoRegressiveModel -from cosmos1.models.autoregressive.tokenizer.image_text_tokenizer import ImageTextTokenizer -from cosmos1.models.autoregressive.tokenizer.text_tokenizer import TextTokenizer +from .ar_model import AutoRegressiveModel +from .ar_tokenizer_image_text_tokenizer import ImageTextTokenizer +from .ar_tokenizer_text_tokenizer import TextTokenizer class ChatPrediction(TypedDict, total=False): diff --git a/cosmos1/models/autoregressive/configs/inference/inference_config.py b/inference_config.py similarity index 97% rename from cosmos1/models/autoregressive/configs/inference/inference_config.py rename to inference_config.py index 6ff2ee93cbeb016f5c472952cb0dbcd1bab4e3fc..5ea7782d3a7217d3c5eaba9d696b0a6dc3f836ed 100644 --- a/cosmos1/models/autoregressive/configs/inference/inference_config.py +++ b/inference_config.py @@ -17,7 +17,7 @@ from typing import Any, List, Union import attrs -from cosmos1.models.autoregressive.configs.base.model import ModelConfig, TokenizerConfig +from .ar_configs_base_model import ModelConfig, TokenizerConfig @attrs.define(slots=False) diff --git a/cosmos1/models/diffusion/inference/inference_utils.py b/inference_utils.py similarity index 98% rename from cosmos1/models/diffusion/inference/inference_utils.py rename to inference_utils.py index 43e6c8cdade5c288ea551df827153b3eb33b614d..eff538580e24c856fcc2c7e659a5c3557b970160 100644 --- a/cosmos1/models/diffusion/inference/inference_utils.py +++ b/inference_utils.py @@ -24,11 +24,11 @@ import numpy as np import torch import torchvision.transforms.functional as transforms_F -from cosmos1.models.diffusion.model.model_t2w import DiffusionT2WModel -from cosmos1.models.diffusion.model.model_v2w import DiffusionV2WModel -from cosmos1.utils import log, misc -from cosmos1.utils.config_helper import get_config_module, override -from cosmos1.utils.io import load_from_fileobj +from. model_t2w import DiffusionT2WModel +from .model_v2w import DiffusionV2WModel +from .config_helper import get_config_module, override +from .utils_io import load_from_fileobj +from .misc import arch_invariant_rand TORCH_VERSION: Tuple[int, ...] = tuple(int(x) for x in torch.__version__.split(".")[:2]) if TORCH_VERSION >= (1, 11): @@ -418,7 +418,7 @@ def generate_world_from_text( 3. Decodes latents to pixel space """ x_sigma_max = ( - misc.arch_invariant_rand( + arch_invariant_rand( (1,) + tuple(state_shape), torch.float32, model.tensor_kwargs["device"], @@ -484,7 +484,7 @@ def generate_world_from_video( num_of_latent_condition = compute_num_latent_frames(model, num_input_frames) x_sigma_max = ( - misc.arch_invariant_rand( + arch_invariant_rand( (1,) + tuple(state_shape), torch.float32, model.tensor_kwargs["device"], diff --git a/cosmos1/utils/lazy_config/instantiate.py b/instantiate.py similarity index 98% rename from cosmos1/utils/lazy_config/instantiate.py rename to instantiate.py index 742ed3816bdbf910e92e80812f947c70236dd9d5..80112fd66106c65ee2e4cfdc375a53a131b7b57c 100644 --- a/cosmos1/utils/lazy_config/instantiate.py +++ b/instantiate.py @@ -20,7 +20,7 @@ from typing import Any import attrs -from cosmos1.utils.lazy_config.registry import _convert_target_to_string, locate +from .registry import _convert_target_to_string, locate __all__ = ["dump_dataclass", "instantiate"] diff --git a/cosmos1/utils/lazy_config/lazy.py b/lazy.py similarity index 98% rename from cosmos1/utils/lazy_config/lazy.py rename to lazy.py index 68f761d7b3762cf387623e609954de90eac4619a..18d48b9a2865916eabb924f6d6b42b1beffc0ab1 100644 --- a/cosmos1/utils/lazy_config/lazy.py +++ b/lazy.py @@ -29,8 +29,8 @@ import attrs import yaml from omegaconf import DictConfig, ListConfig, OmegaConf -from cosmos1.utils.lazy_config.file_io import PathManager -from cosmos1.utils.lazy_config.registry import _convert_target_to_string +from .file_io import PathManager +from .registry import _convert_target_to_string __all__ = ["LazyCall", "LazyConfig"] diff --git a/cosmos1/utils/lazy_config/__init__.py b/lazy_config_init.py similarity index 91% rename from cosmos1/utils/lazy_config/__init__.py rename to lazy_config_init.py index cb5b0ec33f05de2f3761e4f724200c4383f481d1..fa48b6ddc9171218d21c6e4e1d20e267e8b51015 100644 --- a/cosmos1/utils/lazy_config/__init__.py +++ b/lazy_config_init.py @@ -3,9 +3,9 @@ import os from omegaconf import DictConfig, OmegaConf -from cosmos1.utils.lazy_config.instantiate import instantiate -from cosmos1.utils.lazy_config.lazy import LazyCall, LazyConfig -from cosmos1.utils.lazy_config.omegaconf_patch import to_object +from .instantiate import instantiate +from .lazy import LazyCall, LazyConfig +from .omegaconf_patch import to_object OmegaConf.to_object = to_object diff --git a/cosmos1/utils/log.py b/log.py similarity index 100% rename from cosmos1/utils/log.py rename to log.py diff --git a/cosmos1/utils/misc.py b/misc.py similarity index 99% rename from cosmos1/utils/misc.py rename to misc.py index 8b0c6d66669dc220db0c24f3b14d33acc0a0c512..ceb7f22c3340b8106101e5b7bcdd34c1132ac2ad 100644 --- a/cosmos1/utils/misc.py +++ b/misc.py @@ -24,11 +24,12 @@ import time from contextlib import ContextDecorator from typing import Any, Callable, TypeVar +from . import log import numpy as np import termcolor import torch -from cosmos1.utils import distributed, log +from . import distributed def to( diff --git a/cosmos1/models/autoregressive/modules/mm_projector.py b/mm_projector.py similarity index 100% rename from cosmos1/models/autoregressive/modules/mm_projector.py rename to mm_projector.py diff --git a/cosmos1/models/autoregressive/configs/base/model_config.py b/model_config.py similarity index 97% rename from cosmos1/models/autoregressive/configs/base/model_config.py rename to model_config.py index 7c16be1b6d30426cb9af3498aa2d51fa7c451696..91a7c75fbf473f8e2a5346e855ee20dc14d808b6 100644 --- a/cosmos1/models/autoregressive/configs/base/model_config.py +++ b/model_config.py @@ -16,17 +16,17 @@ import copy from typing import Callable, List, Optional -from cosmos1.models.autoregressive.configs.base.model import ModelConfig -from cosmos1.models.autoregressive.configs.base.tokenizer import ( +from .ar_configs_base_model import ModelConfig +from .ar_config_tokenizer import ( TextTokenizerConfig, TokenizerConfig, VideoTokenizerConfig, create_discrete_video_fsq_tokenizer_state_dict_config, ) -from cosmos1.models.autoregressive.tokenizer.image_text_tokenizer import ImageTextTokenizer -from cosmos1.models.autoregressive.tokenizer.text_tokenizer import TextTokenizer -from cosmos1.utils import log -from cosmos1.utils.lazy_config import LazyCall as L +from .ar_tokenizer_image_text_tokenizer import ImageTextTokenizer +from .ar_tokenizer_text_tokenizer import TextTokenizer +from . import log +from .lazy_config_init import LazyCall as L # Common architecture specifications BASE_CONFIG = {"n_kv_heads": 8, "norm_type": "rmsnorm", "norm_eps": 1e-5, "ffn_hidden_size": 14336} diff --git a/cosmos1/models/diffusion/model/model_t2w.py b/model_t2w.py similarity index 94% rename from cosmos1/models/diffusion/model/model_t2w.py rename to model_t2w.py index c21c1489eb738471cbb17876ba535206e9afc2aa..cd5fd0da0b6fc7aadb440fc874c2e10c804c4ba3 100644 --- a/cosmos1/models/diffusion/model/model_t2w.py +++ b/model_t2w.py @@ -18,15 +18,16 @@ from typing import Callable, Dict, Optional, Tuple import torch from torch import Tensor -from cosmos1.models.diffusion.conditioner import CosmosCondition -from cosmos1.models.diffusion.diffusion.functional.batch_ops import batch_mul -from cosmos1.models.diffusion.diffusion.modules.denoiser_scaling import EDMScaling -from cosmos1.models.diffusion.diffusion.modules.res_sampler import COMMON_SOLVER_OPTIONS, Sampler -from cosmos1.models.diffusion.diffusion.types import DenoisePrediction -from cosmos1.models.diffusion.module.blocks import FourierFeatures -from cosmos1.models.diffusion.module.pretrained_vae import BaseVAE -from cosmos1.utils import log, misc -from cosmos1.utils.lazy_config import instantiate as lazy_instantiate +from .conditioner import CosmosCondition +from .batch_ops import batch_mul +from .denoiser_scaling import EDMScaling +from .res_sampler import COMMON_SOLVER_OPTIONS, Sampler +from .diffusion_types import DenoisePrediction +from .blocks import FourierFeatures +from .pretrained_vae import BaseVAE +from . import misc +from . import instantiate as lazy_instantiate +from . import log class EDMSDE: diff --git a/cosmos1/models/diffusion/model/model_v2w.py b/model_v2w.py similarity index 97% rename from cosmos1/models/diffusion/model/model_v2w.py rename to model_v2w.py index c8998c0b941cf146bc9ac4bac512a66371974c30..d62bc16de1236ee21803fe1403dc85320052bb17 100644 --- a/cosmos1/models/diffusion/model/model_v2w.py +++ b/model_v2w.py @@ -16,14 +16,15 @@ from dataclasses import dataclass from typing import Callable, Dict, Optional, Tuple, Union +from . import log import torch from torch import Tensor -from cosmos1.models.diffusion.conditioner import VideoExtendCondition -from cosmos1.models.diffusion.config.base.conditioner import VideoCondBoolConfig -from cosmos1.models.diffusion.diffusion.functional.batch_ops import batch_mul -from cosmos1.models.diffusion.model.model_t2w import DiffusionT2WModel -from cosmos1.utils import log, misc +from .conditioner import VideoExtendCondition +from .config_base_conditioner import VideoCondBoolConfig +from .batch_ops import batch_mul +from .model_t2w import DiffusionT2WModel +from . import misc @dataclass diff --git a/cosmos1/models/diffusion/diffusion/functional/multi_step.py b/multi_step.py similarity index 94% rename from cosmos1/models/diffusion/diffusion/functional/multi_step.py rename to multi_step.py index b651c600b6ebc1afed97bd92d84e9619b393e3f0..9e0887e98f9b1b1bb03534ceeeeeb763ac3ba4b2 100644 --- a/cosmos1/models/diffusion/diffusion/functional/multi_step.py +++ b/multi_step.py @@ -21,7 +21,7 @@ from typing import Callable, List, Tuple import torch -from cosmos1.models.diffusion.diffusion.functional.runge_kutta import reg_x0_euler_step, res_x0_rk2_step +from .runge_kutta import reg_x0_euler_step, res_x0_rk2_step def order2_fn( diff --git a/cosmos1/utils/lazy_config/omegaconf_patch.py b/omegaconf_patch.py similarity index 100% rename from cosmos1/utils/lazy_config/omegaconf_patch.py rename to omegaconf_patch.py diff --git a/cosmos1/models/diffusion/module/position_embedding.py b/position_embedding.py similarity index 98% rename from cosmos1/models/diffusion/module/position_embedding.py rename to position_embedding.py index 9cb16bf49d9c62d10ff54770c291da87b5a9bd5c..4a6c5f27652254282772c5b11f1a2ef61aadfe30 100644 --- a/cosmos1/models/diffusion/module/position_embedding.py +++ b/position_embedding.py @@ -19,8 +19,8 @@ import torch from einops import rearrange, repeat from torch import nn -from cosmos1.models.diffusion.module.attention import normalize -from cosmos1.models.diffusion.module.timm import trunc_normal_ +from .attention import normalize +from .timm import trunc_normal_ class VideoPositionEmb(nn.Module): diff --git a/cosmos1/models/guardrail/common/presets.py b/presets.py similarity index 86% rename from cosmos1/models/guardrail/common/presets.py rename to presets.py index 8b28c554d23066971c3ae07fc5d756e7018602c2..ea7e973a8f2fa6e4ed4787569536c1fed782338a 100644 --- a/cosmos1/models/guardrail/common/presets.py +++ b/presets.py @@ -17,12 +17,12 @@ import os import numpy as np -from cosmos1.models.guardrail.aegis.aegis import Aegis -from cosmos1.models.guardrail.blocklist.blocklist import Blocklist -from cosmos1.models.guardrail.common.core import GuardrailRunner -from cosmos1.models.guardrail.face_blur_filter.face_blur_filter import RetinaFaceFilter -from cosmos1.models.guardrail.video_content_safety_filter.video_content_safety_filter import VideoContentSafetyFilter -from cosmos1.utils import log +from .aegis import Aegis +from .blocklist import Blocklist +from .guardrail_core import GuardrailRunner +from .face_blur_filter import RetinaFaceFilter +from .video_content_safety_filter import VideoContentSafetyFilter +from . import log def create_text_guardrail_runner(checkpoint_dir: str) -> GuardrailRunner: diff --git a/cosmos1/models/diffusion/module/pretrained_vae.py b/pretrained_vae.py similarity index 100% rename from cosmos1/models/diffusion/module/pretrained_vae.py rename to pretrained_vae.py diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index b23eed7cfd8d4548ef18bb3c324473212973e45c..0000000000000000000000000000000000000000 --- a/pyproject.toml +++ /dev/null @@ -1,37 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -[tool.isort] -profile = "black" -known_third_party = ["third_party"] -skip = ["third_party"] -line_length = 120 - -[tool.black] -line-length = 120 -target_version = ['py310'] -exclude = 'third_party' - -[tool.coverage.report] -include_namespace_packages = true -skip_empty = true -omit = [ - "tests/*", - "legacy/*", - ".venv/*", - "**/test_*.py", - "config.py", - "config-3.10.py" -] diff --git a/cosmos1/utils/lazy_config/registry.py b/registry.py similarity index 100% rename from cosmos1/utils/lazy_config/registry.py rename to registry.py diff --git a/release_notes/v0p1.md b/release_notes/v0p1.md deleted file mode 100644 index a552325fb6e5e277f9e98dca928ebb60a64a9d2b..0000000000000000000000000000000000000000 --- a/release_notes/v0p1.md +++ /dev/null @@ -1,20 +0,0 @@ -# Release note - -- Cosmos 0.1 was released with the [Cosmos Tokenizer Webage](https://research.nvidia.com/labs/dir/cosmos-tokenizer/). -- 10 tokenizers were released in the [Hugging Face](https://huggingface.co/collections/nvidia/cosmos-6751e884dc10e013a0a0d8e6) as shown in the table below. -- Inference scripts for the models were released in the [Cosmos Tokenizer repository](https://github.com/NVIDIA/Cosmos-Tokenizer). - -## Released Models - -| Item | Model name | Description | Try it out | -|--|------------|----------|----------| -|1| [Cosmos-0.1-Tokenizer-CI8x8](https://huggingface.co/nvidia/Cosmos-Tokenizer-CI8x8) | Continuous image tokenizer with 8x8 compression ratio | [Inference]([cosmos1/models/diffusion/README.md](https://github.com/NVIDIA/Cosmos-Tokenizer)) | -|2| [Cosmos-0.1-Tokenizer-CI16x16](https://huggingface.co/nvidia/Cosmos-Tokenizer-CI16x16) | Continuous image tokenizer with 16x16 compression ratio | [Inference]([cosmos1/models/diffusion/README.md](https://github.com/NVIDIA/Cosmos-Tokenizer)) | -|3| [Cosmos-0.1-Tokenizer-DI8x8](https://huggingface.co/nvidia/Cosmos-Tokenizer-DI8x8) | Discrete image tokenizer with 8x8 compression ratio | [Inference]([cosmos1/models/diffusion/README.md](https://github.com/NVIDIA/Cosmos-Tokenizer)) | -|4| [Cosmos-0.1-Tokenizer-DI16x16](https://huggingface.co/nvidia/Cosmos-Tokenizer-DI16x16) | Discrete image tokenizer with 16x16 compression ratio | [Inference]([cosmos1/models/diffusion/README.md](https://github.com/NVIDIA/Cosmos-Tokenizer)) | -|5| [Cosmos-0.1-Tokenizer-CV4x8x8](https://huggingface.co/nvidia/Cosmos-Tokenizer-CV4x8x8) | Continuous video tokenizer with 4x8x8 compression ratio | [Inference]([cosmos1/models/diffusion/README.md](https://github.com/NVIDIA/Cosmos-Tokenizer)) | -|6| [Cosmos-0.1-Tokenizer-CV8x8x8](https://huggingface.co/nvidia/Cosmos-Tokenizer-CV8x8x8) | Continuous video tokenizer with 8x8x8 compression ratio | [Inference]([cosmos1/models/diffusion/README.md](https://github.com/NVIDIA/Cosmos-Tokenizer)) | -|7| [Cosmos-0.1-Tokenizer-CV8x16x16](https://huggingface.co/nvidia/Cosmos-Tokenizer-CV8x16x16) | Continuous video tokenizer with 8x16x16 compression ratio | [Inference]([cosmos1/models/diffusion/README.md](https://github.com/NVIDIA/Cosmos-Tokenizer)) | -|8| [Cosmos-0.1-Tokenizer-DV4x8x8](https://huggingface.co/nvidia/Cosmos-Tokenizer-DV4x8x8) | Discrete video tokenizer with 4x8x8 compression ratio | [Inference]([cosmos1/models/diffusion/README.md](https://github.com/NVIDIA/Cosmos-Tokenizer)) | -|9| [Cosmos-0.1-Tokenizer-DV8x8x8](https://huggingface.co/nvidia/Cosmos-Tokenizer-DV8x8x8) | Discrete video tokenizer with 8x8x8 compression ratio | [Inference]([cosmos1/models/diffusion/README.md](https://github.com/NVIDIA/Cosmos-Tokenizer)) | -|10| [Cosmos-0.1-Tokenizer-DV8x16x16](https://huggingface.co/nvidia/Cosmos-Tokenizer-DV8x16x16) | Discrete video tokenizer with 8x16x16 compression ratio | [Inference]([cosmos1/models/diffusion/README.md](https://github.com/NVIDIA/Cosmos-Tokenizer)) | diff --git a/release_notes/v1p0.md b/release_notes/v1p0.md deleted file mode 100644 index 5e6ed305e3f039c18b62e41e8cc351e62060df90..0000000000000000000000000000000000000000 --- a/release_notes/v1p0.md +++ /dev/null @@ -1,23 +0,0 @@ -# Release note - -- Cosmos 1.0 was released with the [Cosmos paper](https://research.nvidia.com/publication/2025-01_cosmos-world-foundation-model-platform-physical-ai). -- 13 models were released in the [Hugging Face](https://huggingface.co/collections/nvidia/cosmos-6751e884dc10e013a0a0d8e6) as shown in the table below. -- Inference scripts for the models were released in the [Cosmos repository](https://github.com/NVIDIA/Cosmos). - -## Released Models - -| Item | Model name | Description | Try it out | -|--|------------|----------|----------| -|1| [Cosmos-1.0-Diffusion-7B-Text2World](https://huggingface.co/nvidia/Cosmos-1.0-Diffusion-7B-Text2World) | Text to visual world generation | [Inference](cosmos1/models/diffusion/README.md) | -|2| [Cosmos-1.0-Diffusion-14B-Text2World](https://huggingface.co/nvidia/Cosmos-1.0-Diffusion-14B-Text2World) | Text to visual world generation | [Inference](cosmos1/models/diffusion/README.md) | -|3| [Cosmos-1.0-Diffusion-7B-Video2World](https://huggingface.co/nvidia/Cosmos-1.0-Diffusion-7B-Video2World) | Video + Text based future visual world generation | [Inference](cosmos1/models/diffusion/README.md) | -|4| [Cosmos-1.0-Diffusion-14B-Video2World](https://huggingface.co/nvidia/Cosmos-1.0-Diffusion-14B-Video2World) | Video + Text based future visual world generation | [Inference](cosmos1/models/diffusion/README.md) | -|5| [Cosmos-1.0-Autoregressive-4B](https://huggingface.co/nvidia/Cosmos-1.0-Autoregressive-4B) | Future visual world generation | [Inference](cosmos1/models/autoregressive/README.md) | -|6| [Cosmos-1.0-Autoregressive-12B](https://huggingface.co/nvidia/Cosmos-1.0-Autoregressive-12B) | Future visual world generation | [Inference](cosmos1/models/autoregressive/README.md) | -|7| [Cosmos-1.0-Autoregressive-5B-Video2World](https://huggingface.co/nvidia/Cosmos-1.0-Autoregressive-5B-Video2World) | Video + Text based future visual world generation | [Inference](cosmos1/models/autoregressive/README.md) | -|8| [Cosmos-1.0-Autoregressive-13B-Video2World](https://huggingface.co/nvidia/Cosmos-1.0-Autoregressive-13B-Video2World) | Video + Text based future visual world generation | [Inference](cosmos1/models/autoregressive/README.md) | -|9| [Cosmos-1.0-Tokenizer-CV8x8x8](https://huggingface.co/nvidia/Cosmos-1.0-Tokenizer-CV8x8x8) | Continuous video tokenizer with 8x8x8 compression ratio | [Inference](cosmos1/models/diffusion/README.md) | -|10| [Cosmos-1.0-Tokenizer-DV8x16x16](https://huggingface.co/nvidia/Cosmos-1.0-Tokenizer-DV8x16x16) | Discrete video tokenizer with 16x8x8 compression ratio | [Inference](cosmos1/models/autoregressive/README.md) | -|11| [Cosmos-1.0-PromptUpsampler-12B-Text2World](https://huggingface.co/nvidia/Cosmos-1.0-Prompt-Upsampler-12B-Text2World) | Prompt upsampler for Text2World | [Inference](cosmos1/models/diffusion/README.md) | -|12| [Cosmos-1.0-Diffusion-7B-Decoder-DV8x16x16ToCV8x8x8](https://huggingface.co/nvidia/Cosmos-1.0-Diffusion-7B-Decoder-DV8x16x16ToCV8x8x8) | Diffusion decoder for enhancing Cosmos 1.0 autoregressive WFMs' outputs | [Inference](cosmos1/models/autoregressive/README.md) | -|13| [Cosmos-1.0-Guardrail](https://huggingface.co/nvidia/Cosmos-1.0-Guardrail) | Guardrail contains pre-Guard and post-Guard for safe use | Embedded in model inference scripts | diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index a629f6c927f66937cbc7b7058142a0a494387ec8..0000000000000000000000000000000000000000 --- a/requirements.txt +++ /dev/null @@ -1,30 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Please keep requirements sorted alphabetically -av -better-profanity -git+https://github.com/NVlabs/Pytorch_Retinaface.git@b843f45 -hydra-core -imageio[ffmpeg] -iopath -loguru -mediapy -nltk -peft -pillow -sentencepiece -termcolor -transformers==4.45.0 diff --git a/cosmos1/models/diffusion/diffusion/modules/res_sampler.py b/res_sampler.py similarity index 97% rename from cosmos1/models/diffusion/diffusion/modules/res_sampler.py rename to res_sampler.py index 77fe1c5aefcef5f7683cc8d6bf585337a0cca41a..c531bcd5f8304985c68ce09350ef48297217cad7 100644 --- a/cosmos1/models/diffusion/diffusion/modules/res_sampler.py +++ b/res_sampler.py @@ -28,9 +28,9 @@ from typing import Any, Callable, List, Literal, Optional, Tuple, Union import attrs import torch -from cosmos1.models.diffusion.diffusion.functional.multi_step import get_multi_step_fn, is_multi_step_fn_supported -from cosmos1.models.diffusion.diffusion.functional.runge_kutta import get_runge_kutta_fn, is_runge_kutta_fn_supported -from cosmos1.utils.config import make_freezable +from .multi_step import get_multi_step_fn, is_multi_step_fn_supported +from .runge_kutta import get_runge_kutta_fn, is_runge_kutta_fn_supported +from .config import make_freezable COMMON_SOLVER_OPTIONS = Literal["2ab", "2mid", "1euler"] diff --git a/cosmos1/models/guardrail/face_blur_filter/retinaface_utils.py b/retinaface_utils.py similarity index 99% rename from cosmos1/models/guardrail/face_blur_filter/retinaface_utils.py rename to retinaface_utils.py index 27e69cec320c28d13ea1a0443f77a565b59f24dd..46d1988958050f1b95049ff5b394d00409357f4b 100644 --- a/cosmos1/models/guardrail/face_blur_filter/retinaface_utils.py +++ b/retinaface_utils.py @@ -17,7 +17,7 @@ import numpy as np import torch from pytorch_retinaface.utils.nms.py_cpu_nms import py_cpu_nms -from cosmos1.utils import log +from . import log # Adapted from https://github.com/biubug6/Pytorch_Retinaface/blob/master/detect.py diff --git a/cosmos1/models/diffusion/diffusion/functional/runge_kutta.py b/runge_kutta.py similarity index 99% rename from cosmos1/models/diffusion/diffusion/functional/runge_kutta.py rename to runge_kutta.py index d07aafe41fdafa9e323079ac57a96994b365fe88..ecffde890072dccffd2ffe67d1534044414266df 100644 --- a/cosmos1/models/diffusion/diffusion/functional/runge_kutta.py +++ b/runge_kutta.py @@ -17,7 +17,7 @@ from typing import Callable, Tuple import torch -from cosmos1.models.diffusion.diffusion.functional.batch_ops import batch_mul +from .batch_ops import batch_mul def phi1(t: torch.Tensor) -> torch.Tensor: diff --git a/cosmos1/models/autoregressive/utils/sampling.py b/sampling.py similarity index 98% rename from cosmos1/models/autoregressive/utils/sampling.py rename to sampling.py index 91ba0e7abef2ffee4e57f7cca2b8ddbca25c27e3..b9719442e0fa94cea283348a7732e0413c5a7234 100644 --- a/cosmos1/models/autoregressive/utils/sampling.py +++ b/sampling.py @@ -17,7 +17,7 @@ from typing import Optional, Tuple import torch -from cosmos1.models.autoregressive.networks.transformer import Transformer +from .ar_transformer import Transformer def sample_top_p(logits, temperature, top_p, return_probs: bool = False): diff --git a/cosmos1/models/common/t5_text_encoder.py b/t5_text_encoder.py similarity index 99% rename from cosmos1/models/common/t5_text_encoder.py rename to t5_text_encoder.py index 456d2eb8fbf56a08d77a73dcd4d422b50d43dcef..56948b8f6ac89d5fc0dd1c85a3b287f4e8aa7a51 100644 --- a/cosmos1/models/common/t5_text_encoder.py +++ b/t5_text_encoder.py @@ -19,7 +19,7 @@ import torch import transformers from transformers import T5EncoderModel, T5TokenizerFast -from cosmos1.utils import log +from . import log transformers.logging.set_verbosity_error() diff --git a/cosmos1/models/diffusion/inference/text2world.py b/text2world.py similarity index 94% rename from cosmos1/models/diffusion/inference/text2world.py rename to text2world.py index 4faaa75e4fd15c6b913fbe37b1bd0d9e6db8fb87..b9de9d51c882b0d43cbea042a928cdcf42354f85 100644 --- a/cosmos1/models/diffusion/inference/text2world.py +++ b/text2world.py @@ -16,12 +16,13 @@ import argparse import os +from . import log import torch -from cosmos1.models.diffusion.inference.inference_utils import add_common_arguments, validate_args -from cosmos1.models.diffusion.inference.world_generation_pipeline import DiffusionText2WorldGenerationPipeline -from cosmos1.utils import log, misc -from cosmos1.utils.io import read_prompts_from_file, save_video +from .inference_utils import add_common_arguments, validate_args +from .world_generation_pipeline import DiffusionText2WorldGenerationPipeline +from . import misc +from .utils_io import read_prompts_from_file, save_video torch.enable_grad(False) diff --git a/text2world_hf.py b/text2world_hf.py index 65e25aa44fd6b0ff49113f119694f0c506a5d9c9..c01a2ab9963d161e747fcdefd40c37dded65ac3d 100644 --- a/text2world_hf.py +++ b/text2world_hf.py @@ -3,12 +3,11 @@ import argparse import torch from transformers import PreTrainedModel, PretrainedConfig -# TODO: This is a bug to fix. Huggingface cannot download .cosmos1.models.diffusion.inference.inference_utils because it's in a subfolder. -from .cosmos1.models.diffusion.inference.inference_utils import add_common_arguments, validate_args -from .cosmos1.models.diffusion.inference.world_generation_pipeline import DiffusionText2WorldGenerationPipeline -import .cosmos1.utils.log as log -import .cosmos1.utils.misc as misc -from .cosmos1.utils.io import read_prompts_from_file, save_video +from .inference_utils import add_common_arguments, validate_args +from .world_generation_pipeline import DiffusionText2WorldGenerationPipeline +from . import log +from . import misc +from .utils_io import read_prompts_from_file, save_video # custom config class diff --git a/cosmos1/models/diffusion/prompt_upsampler/text2world_prompt_upsampler_inference.py b/text2world_prompt_upsampler_inference.py similarity index 94% rename from cosmos1/models/diffusion/prompt_upsampler/text2world_prompt_upsampler_inference.py rename to text2world_prompt_upsampler_inference.py index 44a83e3a364b4278fabce4102b3e331048bba455..c39c589aefee598492c1d6eec822485e6e39bc24 100644 --- a/cosmos1/models/diffusion/prompt_upsampler/text2world_prompt_upsampler_inference.py +++ b/text2world_prompt_upsampler_inference.py @@ -23,11 +23,11 @@ import argparse import os import re -from cosmos1.models.autoregressive.configs.base.model_config import create_text_model_config -from cosmos1.models.autoregressive.model import AutoRegressiveModel -from cosmos1.models.diffusion.prompt_upsampler.inference import chat_completion -from cosmos1.models.guardrail.common import presets as guardrail_presets -from cosmos1.utils import log +from .model_config import create_text_model_config +from .ar_model import AutoRegressiveModel +from .inference import chat_completion +from . import presets as guardrail_presets +from . import log def create_prompt_upsampler(checkpoint_dir: str) -> AutoRegressiveModel: diff --git a/cosmos1/models/diffusion/module/timm.py b/timm.py similarity index 100% rename from cosmos1/models/diffusion/module/timm.py rename to timm.py diff --git a/cosmos1/utils/io.py b/utils_io.py similarity index 100% rename from cosmos1/utils/io.py rename to utils_io.py diff --git a/cosmos1/models/diffusion/inference/video2world.py b/video2world.py similarity index 95% rename from cosmos1/models/diffusion/inference/video2world.py rename to video2world.py index cd495f7f75a4d1f20d9550e2015cd072ae310734..db848bb1a16258c62ed88105504b751afb54181e 100644 --- a/cosmos1/models/diffusion/inference/video2world.py +++ b/video2world.py @@ -16,12 +16,13 @@ import argparse import os +from . import log import torch -from cosmos1.models.diffusion.inference.inference_utils import add_common_arguments, check_input_frames, validate_args -from cosmos1.models.diffusion.inference.world_generation_pipeline import DiffusionVideo2WorldGenerationPipeline -from cosmos1.utils import log, misc -from cosmos1.utils.io import read_prompts_from_file, save_video +from .inference_utils import add_common_arguments, check_input_frames, validate_args +from .world_generation_pipeline import DiffusionVideo2WorldGenerationPipeline +from . import misc +from .utils_io import read_prompts_from_file, save_video torch.enable_grad(False) diff --git a/cosmos1/models/diffusion/prompt_upsampler/video2world_prompt_upsampler_inference.py b/video2world_prompt_upsampler_inference.py similarity index 93% rename from cosmos1/models/diffusion/prompt_upsampler/video2world_prompt_upsampler_inference.py rename to video2world_prompt_upsampler_inference.py index beff9f2ca574afbec4d5157ec22d0c62ceb4a64f..8950bb1bf468155880d11f859fc707a405e13b3f 100644 --- a/cosmos1/models/diffusion/prompt_upsampler/video2world_prompt_upsampler_inference.py +++ b/video2world_prompt_upsampler_inference.py @@ -26,12 +26,12 @@ from math import ceil from PIL import Image -from cosmos1.models.autoregressive.configs.base.model_config import create_vision_language_model_config -from cosmos1.models.autoregressive.model import AutoRegressiveModel -from cosmos1.models.diffusion.prompt_upsampler.inference import chat_completion -from cosmos1.models.guardrail.common import presets as guardrail_presets -from cosmos1.utils import log -from cosmos1.utils.io import load_from_fileobj +from .model_config import create_vision_language_model_config +from .ar_model import AutoRegressiveModel +from .inference import chat_completion +from . import presets as guardrail_presets +from . import log +from .utils_io import load_from_fileobj def create_vlm_prompt_upsampler( diff --git a/cosmos1/models/guardrail/video_content_safety_filter/video_content_safety_filter.py b/video_content_safety_filter.py similarity index 94% rename from cosmos1/models/guardrail/video_content_safety_filter/video_content_safety_filter.py rename to video_content_safety_filter.py index 78d812ae3a184de73d22cae5e3b55c2fe486b69e..3b1ca9462c4dccc57761afa127410cad613b6105 100644 --- a/cosmos1/models/guardrail/video_content_safety_filter/video_content_safety_filter.py +++ b/video_content_safety_filter.py @@ -18,14 +18,15 @@ import json import os from typing import Iterable, Tuple, Union +from . import log import torch from PIL import Image -from cosmos1.models.guardrail.common.core import ContentSafetyGuardrail, GuardrailRunner -from cosmos1.models.guardrail.common.io_utils import get_video_filepaths, read_video -from cosmos1.models.guardrail.video_content_safety_filter.model import ModelConfig, VideoSafetyModel -from cosmos1.models.guardrail.video_content_safety_filter.vision_encoder import SigLIPEncoder -from cosmos1.utils import log, misc +from .guardrail_core import ContentSafetyGuardrail, GuardrailRunner +from .guardrail_io_utils import get_video_filepaths, read_video +from .video_content_safety_filter_model import ModelConfig, VideoSafetyModel +from .video_content_safety_filter_vision_encoder import SigLIPEncoder +from . import misc DEFAULT_CHECKPOINT_DIR = "checkpoints/Cosmos-1.0-Guardrail/video_content_safety_filter" diff --git a/cosmos1/models/guardrail/video_content_safety_filter/model.py b/video_content_safety_filter_model.py similarity index 97% rename from cosmos1/models/guardrail/video_content_safety_filter/model.py rename to video_content_safety_filter_model.py index 1f53f1cabc2ee49c1f50dc17cef237ff1b80e37d..d4ccc005d90bad4d029bf9fdc9a66450fa9f7049 100644 --- a/cosmos1/models/guardrail/video_content_safety_filter/model.py +++ b/video_content_safety_filter_model.py @@ -17,7 +17,7 @@ import attrs import torch import torch.nn as nn -from cosmos1.utils.config import make_freezable +from .config import make_freezable @make_freezable diff --git a/cosmos1/models/guardrail/video_content_safety_filter/vision_encoder.py b/video_content_safety_filter_vision_encoder.py similarity index 100% rename from cosmos1/models/guardrail/video_content_safety_filter/vision_encoder.py rename to video_content_safety_filter_vision_encoder.py diff --git a/cosmos1/models/autoregressive/networks/vit.py b/vit.py similarity index 98% rename from cosmos1/models/autoregressive/networks/vit.py rename to vit.py index 25d0c4850bafdfaba29c2abc14b2da05578ee23e..d5604d6def61ffd9b6e3a7f2f1a1531d63390a84 100644 --- a/cosmos1/models/autoregressive/networks/vit.py +++ b/vit.py @@ -26,9 +26,9 @@ from typing import Any, Callable, Mapping, Optional, Tuple import torch import torch.nn as nn -from cosmos1.models.autoregressive.modules.normalization import create_norm -from cosmos1.models.autoregressive.networks.transformer import TransformerBlock -from cosmos1.utils import log +from .ar_modules_normalization import create_norm +from .ar_transformer import TransformerBlock +from . import log def get_vit_config(model_name: str) -> Mapping[str, Any]: diff --git a/cosmos1/models/diffusion/inference/world_generation_pipeline.py b/world_generation_pipeline.py similarity index 97% rename from cosmos1/models/diffusion/inference/world_generation_pipeline.py rename to world_generation_pipeline.py index 49cabb9cc0b1ebda91abf963bdad649c3fb1d603..8582fd3affe52ec526780a3925221d3d70740d22 100644 --- a/cosmos1/models/diffusion/inference/world_generation_pipeline.py +++ b/world_generation_pipeline.py @@ -20,8 +20,8 @@ from typing import Any, Optional import numpy as np import torch -from cosmos1.models.common.base_world_generation_pipeline import BaseWorldGenerationPipeline -from cosmos1.models.diffusion.inference.inference_utils import ( +from .base_world_generation_pipeline import BaseWorldGenerationPipeline +from .inference_utils import ( generate_world_from_text, generate_world_from_video, get_condition_latent, @@ -30,20 +30,20 @@ from cosmos1.models.diffusion.inference.inference_utils import ( load_network_model, load_tokenizer_model, ) -from cosmos1.models.diffusion.model.model_t2w import DiffusionT2WModel -from cosmos1.models.diffusion.model.model_v2w import DiffusionV2WModel -from cosmos1.models.diffusion.prompt_upsampler.text2world_prompt_upsampler_inference import ( +from .model_t2w import DiffusionT2WModel +from .model_v2w import DiffusionV2WModel +from .text2world_prompt_upsampler_inference import ( create_prompt_upsampler, run_chat_completion, ) -from cosmos1.models.diffusion.prompt_upsampler.video2world_prompt_upsampler_inference import ( +from .video2world_prompt_upsampler_inference import ( create_vlm_prompt_upsampler, prepare_dialog, ) -from cosmos1.models.diffusion.prompt_upsampler.video2world_prompt_upsampler_inference import ( +from .video2world_prompt_upsampler_inference import ( run_chat_completion as run_chat_completion_vlm, ) -from cosmos1.utils import log +from . import log MODEL_NAME_DICT = { "Cosmos-1.0-Diffusion-7B-Text2World": "Cosmos_1_0_Diffusion_Text2World_7B",