diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..f549193fb7e88a80c393ee6fe6d25b687802c1c5 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,235 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +examples.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300005-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300006-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300007-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300008-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300009-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300010-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300011-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300012-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300013-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300014-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300015-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300016-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300017-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300018-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300019-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300020-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300021-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300022-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300023-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300024-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300025-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300026-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300027-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300028-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300029-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300030-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300031-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300033-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300034.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300035-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300036.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300037-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300039-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300040.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300041-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300043-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300045-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300053-simple.obj filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300056-simple.obj filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17278951300061-simple.obj filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390003-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390008.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390009-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390010.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390011-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390012.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390013-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390015-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390017-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390023-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390025-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390027-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390029-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390031-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390033-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390035-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390037-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390039-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390041-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390043-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390045-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390047-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390049-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390051-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390052.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390053-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390054.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390055-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390056.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390057-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390058.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390059-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390061-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390063-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390065-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390067-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390069-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390071-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390073-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390075-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390077-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390079-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390081-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390085-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390087-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17280589390089-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17285060200001.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17285060200002-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17285060200003-top-bottom.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17285371260002-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17285859980001.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17285859980002.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17285859980003-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17285861380002-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17285861380003-left-right_video.avi filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17285861380004-depth_video.avi filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930002-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930003.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930004-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930005.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930006-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930010-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930012-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930016-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930018-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930020-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930026-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930028-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930036-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930046-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930050-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930052-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930053.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930054-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930055.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930056-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930057.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930058-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930059.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930060-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930061.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930062-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930063.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930064-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930066-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930070-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930072-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930080-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930082-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930084-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930120-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930126-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930132-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930142-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930147.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930152-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930154-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930156-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930158-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930160-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930162-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930164-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930166-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930168-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930170-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930172-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930174-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930176-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930178-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930180-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930182-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930184-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930186-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930188-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930190-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930194-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930196-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930198-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930199.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930200-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930202-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930204-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930206-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930208-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930210-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930212-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930213.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930214-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930216-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930218-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930220-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930222-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930224-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930226-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930228-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930230-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930232-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930234-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930236-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930238-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930240-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930242-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930244-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930246-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930248-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930250-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930252-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930253.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930254-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930256-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930258-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930260-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930261.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930262-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930263.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930264-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930265.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930266-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930268-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930270-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930272-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930274-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930276-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930278-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930280-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930282-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930284-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930286-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930288-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930290-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930292-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930294-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930296-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930298-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930300-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930302-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930304-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930306-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930308-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930310-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930312-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930316-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930318-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930322-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930324-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930326-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930328-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930330-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930332-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930334-left-right.png filter=lfs diff=lfs merge=lfs -text +outputs/depthmap-17286927930336-left-right.png filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..7d2979b029d72b67b768cf3e640f388d6bee96cc --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__/ +venv/ +.idea/ \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000000000000000000000000000000000000..3a12517b43875f3091f649014806fe6125bc792b --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,131 @@ +## Changelog +### 0.4.8 + * Depth Anything V2 support, thanks [@graemeniedermayer](https://github.com/graemeniedermayer)! +### 0.4.7 + * Tiling mode + * Reduced VRAM consumption for Depth Anything, as well as for ZoeDepth k and nk + * Some bugfixes +### 0.4.6 + * Support for [Depth Anything](https://github.com/LiheYoung/Depth-Anything). +### 0.4.5 + * Preliminary support for [Marigold](https://marigoldmonodepth.github.io). [PR #385](https://github.com/thygate/stable-diffusion-webui-depthmap-script/pull/385). +### 0.4.4 + * Compatibility with stable-diffusion-webui 1.6.0 +### 0.4.3 video processing tab + * Added an option to process videos directly from a video file. This leads to better results than batch-processing individual frames of a video. Allows generating depthmap videos, that can be used in further generations as custom depthmap videos. + * UI improvements. + * Extra stereoimage generation modes - enable in extension settings if you want to use them. + * New stereoimage generation parameter - offset exponent. Setting it to 1 may produce more realistic outputs. +### 0.4.2 + * Added UI options for 2 additional rembg models. + * Heatmap generation UI option is hidden - if you want to use it, please activate it in the extension settings. + * Bugfixes. +### 0.4.1 standalone mode + * Added ability to run DepthMap without WebUI. (Use main.py. Make sure all the dependencies are installed. The support is not feature-complete.) + * NormalMap generation +### 0.4.0 large code refactor + * UI improvements + * Improved Batch from Directory, Clip and renormalize DepthMap + * Slightly changed the behaviour of various options + * Extension may partially work even if some of the dependencies are unmet + +### 0.3.12 + * Fixed stereo image generation + * Other bugfixes +### 0.3.11 + * 3D model viewer (Experimental!) + * simple and fast (occluded) 3D mesh generation, support for equirectangular projection + (accurate results with ZoeDepth models only, no boost, no custom maps) + * default output format is now obj for inpainted mesh and simple mesh +### 0.3.10 + * ZoeDepth support (with boost), 3 new models, best results so far + * better heatmap +### 0.3.9 + * use existing/custom depthmaps in output dir for batch mode + * custom depthmap support for single file + * wavefront obj output support for inpainted mesh (enabled in settings) + * option to generate all stereo formats at once + * bugfix: convert single channel input image to rgb + * renamed midas imports to fix conflict with deforum + * ui cleanup +### 0.3.8 bugfix + * bugfix in remove background path +### 0.3.7 new features + * [rembg](https://github.com/danielgatis/rembg) Remove Background [PR](https://github.com/thygate/stable-diffusion-webui-depthmap-script/pull/78) by [@graemeniedermayer](https://github.com/graemeniedermayer) merged + * setting to flip Left/Right SBS images + * added missing parameter for 3d inpainting (repeat_inpaint_edge) + * option to generate demo videos with mesh +### 0.3.6 new feature + * implemented binary ply file format for the inpainted 3D mesh, big reduction in filesize and save/load times. + * added progress indicators to the inpainting process +### 0.3.5 bugfix + * create path to 3dphoto models before download (see [issue](https://github.com/thygate/stable-diffusion-webui-depthmap-script/issues/76)) +### 0.3.4 new featues + * depth clipping option (original idea by [@Extraltodeus](https://github.com/Extraltodeus)) + * by popular demand, 3D-Photo-Inpainting is now implemented + * generate inpainted 3D mesh (PLY) and videos of said mesh +### 0.3.3 bugfix and new midas models + * updated to midas 3.1, bringing 2 new depth models (the 512 one eats VRAM for breakfast!) + * fix Next-ViT dependency issue for new installs + * extension no longer clones repositories, all dependencies are now contained in the extension +### 0.3.2 new feature and bugfixes + * several bug fixes for apple silicon and other machines without cuda + * NEW Stereo Image Generation techniques for gap filling by [@semjon00](https://github.com/semjon00) using polylines. (See [here](https://github.com/thygate/stable-diffusion-webui-depthmap-script/pull/56)) Significant improvement in quality. +### 0.3.1 bugfix + * small speed increase for anaglyph creation + * clone midas repo before midas 3.1 to fix issue (see [here](https://github.com/thygate/stable-diffusion-webui-depthmap-script/issues/55#issue-1510266008)) +### 0.3.0 improved stereo image generation + * New improved technique for generating stereo images and balancing distortion between eyes by [@semjon00](https://github.com/semjon00) (See [here](https://github.com/thygate/stable-diffusion-webui-depthmap-script/pull/51)) + * Substantial speedup of stereo image generation code using numba JIT +### 0.2.9 new feature + * 3D Stereo (side-by-side) and red/cyan anaglyph image generation. + (Thanks to [@sina-masoud-ansari](https://github.com/sina-masoud-ansari) for the tip! Discussion [here](https://github.com/thygate/stable-diffusion-webui-depthmap-script/discussions/45)) +### 0.2.8 bugfix + * boost (pix2pix) now also able to compute on cpu + * res101 able to compute on cpu +### 0.2.7 separate tab + * Depth Tab now available for easier stand-alone (batch) processing +### 0.2.6 ui layout and settings + * added link to repo so more people find their way to the instructions. + * boost rmax setting +### 0.2.5 bugfix + * error checking on model download (now with progressbar) +### 0.2.4 high resolution depthmaps + * multi-resolution merging is now implemented, significantly improving results! + * res101 can now also compute on CPU +### 0.2.3 bugfix + * path error on linux fixed +### 0.2.2 new features + * added (experimental) support for AdelaiDepth/LeReS (GPU Only!) + * new option to view depthmap as heatmap + * optimised ui layout +### 0.2.1 bugfix + * Correct seed is now used in filename and pnginfo when running batches. (see [issue](https://github.com/thygate/stable-diffusion-webui-depthmap-script/issues/35)) +### 0.2.0 upgrade + * the script is now an extension, enabling auto installation. +### 0.1.9 bugfixes + * sd model moved to system memory while computing depthmap + * memory leak/fragmentation issue fixed + * recover from out of memory error +### 0.1.8 new options + * net size can now be set as width and height, option to match input size, sliders now have the same range as generation parameters. (see usage below) + * better error handling +### 0.1.7 bugfixes + * batch img2img now works (see [issue](https://github.com/thygate/stable-diffusion-webui-depthmap-script/issues/21#issuecomment-1306445056)) + * generation parameters now only saved when enabled in settings + * model memory freed explicitly at end of script +### 0.1.6 new option + * option to invert depthmap (black=near, white=far), as required by some viewers. +### 0.1.5 bugfix + * saving as any format other than PNG now always produces an 8 bit, 3 channel RGB image. A single channel 16 bit image is only supported when saving as PNG. (see [issue](https://github.com/thygate/stable-diffusion-webui-depthmap-script/issues/15#issuecomment-1304909019)) +### 0.1.4 update + * added support for `--no-half`. Now also works with cards that don't support half precision like GTX 16xx. ([verified](https://github.com/thygate/stable-diffusion-webui-depthmap-script/issues/12#issuecomment-1304656398)) +### 0.1.3 bugfix + * bugfix where some controls where not visible (see [issue](https://github.com/thygate/stable-diffusion-webui-depthmap-script/issues/11#issuecomment-1304400537)) +### 0.1.2 new option + * network size slider. higher resolution depth maps (see usage below) +### 0.1.1 bugfixes + * overflow issue (see [here](https://github.com/thygate/stable-diffusion-webui-depthmap-script/issues/10) for details and examples of artifacts) + * when not combining, depthmap is now saved as single channel 16 bit +### 0.1.0 + * initial version: script mode, supports generating depthmaps with 4 different midas models \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..5cd8c3f8a5ec57266b9424565994f53ab82d4699 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Bob Thiry + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 4165f7090a38f7cf919e3a8f6c1f0c47ef023d06..cf51300ac6a4d68f5a236fc90def33be4f65d67c 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,244 @@ ---- -title: Stable Diffusion Webui Depthmap Script -emoji: 🔥 -colorFrom: pink -colorTo: purple -sdk: gradio -sdk_version: 5.0.2 -app_file: app.py -pinned: false ---- - -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +--- +title: stable-diffusion-webui-depthmap-script +app_file: main.py +sdk: gradio +sdk_version: 3.50.2 +--- +# High Resolution Depth Maps for Stable Diffusion WebUI +This program is an addon for [AUTOMATIC1111's Stable Diffusion WebUI](https://github.com/AUTOMATIC1111/stable-diffusion-webui) that creates depth maps. Using either generated or custom depth maps, it can also create 3D stereo image pairs (side-by-side or anaglyph), normalmaps and 3D meshes. The outputs of the script can be viewed directly or used as an asset for a 3D engine. Please see [wiki](https://github.com/thygate/stable-diffusion-webui-depthmap-script/wiki/Viewing-Results) to learn more. The program has integration with [Rembg](https://github.com/danielgatis/rembg). It also supports batch processing, processing of videos, and can also be run in standalone mode, without Stable Diffusion WebUI. + +To generate realistic depth maps from individual images, this script uses code and models from the [Marigold](https://github.com/prs-eth/Marigold/) repository, from the [MiDaS](https://github.com/isl-org/MiDaS) and [ZoeDepth](https://github.com/isl-org/ZoeDepth) repositories by Intel ISL, or LeReS from the [AdelaiDepth](https://github.com/aim-uofa/AdelaiDepth) repository by Advanced Intelligent Machines. Multi-resolution merging as implemented by [BoostingMonocularDepth](https://github.com/compphoto/BoostingMonocularDepth) is used to generate high resolution depth maps. + +Stereoscopic images are created using a custom-written algorithm. + +3D Photography using Context-aware Layered Depth Inpainting by Virginia Tech Vision and Learning Lab, or [3D-Photo-Inpainting](https://github.com/vt-vl-lab/3d-photo-inpainting) is used to generate a `3D inpainted mesh` and render `videos` from said mesh. + +Rembg uses [U-2-Net](https://github.com/xuebinqin/U-2-Net) and [IS-Net](https://github.com/xuebinqin/DIS). + +## Depthmap Examples +[![screenshot](examples.png)](https://raw.githubusercontent.com/thygate/stable-diffusion-webui-depthmap-script/main/examples.png) + +## 3D Photo Inpainting Examples +[![video](https://img.youtube.com/vi/jRmVkIMS-SY/0.jpg)](https://www.youtube.com/watch?v=jRmVkIMS-SY) +video by [@graemeniedermayer](https://github.com/graemeniedermayer), more examples [here](https://github.com/thygate/stable-diffusion-webui-depthmap-script/discussions/50) + +## Stereo Image SBS and Anaglyph Examples +![](https://user-images.githubusercontent.com/54073010/210012661-ef07986c-2320-4700-bc54-fad3899f0186.png) +images generated by [@semjon00](https://github.com/semjon00) from CC0 photos, more examples [here](https://github.com/thygate/stable-diffusion-webui-depthmap-script/pull/56#issuecomment-1367596463). + +## Install instructions +### As extension +The script can be installed directly from WebUI. Please navigate to `Extensions` tab, then click `Available`, `Load from` and then install the `Depth Maps` extension. Alternatively, the extension can be installed from the URL: `https://github.com/thygate/stable-diffusion-webui-depthmap-script`. + +### Updating +In the WebUI, in the `Extensions` tab, in the `Installed` subtab, click `Check for Updates` and then `Apply and restart UI`. + +### Standalone +Clone the repository, install the requirements from `requirements.txt`, launch using `main.py`. + +>Model weights will be downloaded automatically on their first use and saved to /models/midas, /models/leres and /models/pix2pix. Zoedepth models are stored in the torch cache folder. + + +## Usage +Select the "DepthMap" script from the script selection box in either txt2img or img2img, or go to the Depth tab when using existing images. +![screenshot](options.png) + +The models can `Compute on` GPU and CPU, use CPU if low on VRAM. + +There are ten models available from the `Model` dropdown. For the first model, res101, see [AdelaiDepth/LeReS](https://github.com/aim-uofa/AdelaiDepth/tree/main/LeReS) for more info. The others are the midas models: dpt_beit_large_512, dpt_beit_large_384, dpt_large_384, dpt_hybrid_384, midas_v21, and midas_v21_small. See the [MiDaS](https://github.com/isl-org/MiDaS) repository for more info. The newest dpt_beit_large_512 model was trained on a 512x512 dataset but is VERY VRAM hungry. The last three models are [ZoeDepth](https://github.com/isl-org/ZoeDepth) models. + +Net size can be set with `net width` and `net height`, or will be the same as the input image when `Match input size` is enabled. There is a trade-off between structural consistency and high-frequency details with respect to net size (see [observations](https://github.com/compphoto/BoostingMonocularDepth#observations)). + +`Boost` will enable multi-resolution merging as implemented by [BoostingMonocularDepth](https://github.com/compphoto/BoostingMonocularDepth) and will significantly improve the results, mitigating the observations mentioned above, at the cost of much larger compute time. Best results with res101. + +`Clip and renormalize` allows for clipping the depthmap on the `near` and `far` side, the values in between will be renormalized to fit the available range. Set both values equal to get a b&w mask of a single depth plane at that value. This option works on the 16-bit depthmap and allows for 1000 steps to select the clip values. + +When enabled, `Invert DepthMap` will result in a depthmap with black near and white far. + +Regardless of global settings, `Save DepthMap` will always save the depthmap in the default txt2img or img2img directory with the filename suffix '_depth'. Generation parameters are saved with the image if enabled in settings. Files generated from the Depth tab are saved in the default extras-images directory. + +To see the generated output in the webui `Show DepthMap` should be enabled. When using Batch img2img this option should also be enabled. + +When `Combine into one image` is enabled, the depthmap will be combined with the original image, the orientation can be selected with `Combine axis`. When disabled, the depthmap will be saved as a 16 bit single channel PNG as opposed to a three channel (RGB), 8 bit per channel image when the option is enabled. + +When either `Generate Stereo` or `Generate anaglyph` is enabled, a stereo image pair will be generated. `Divergence` sets the amount of 3D effect that is desired. `Balance between eyes` determines where the (inevitable) distortion from filling up gaps will end up, -1 Left, +1 Right, and 0 balanced. +The different `Gap fill technique` options are : none (no gaps are filled), +naive (the original method), naive_interpolating (the original method with interpolation), polylines_soft and polylines_sharp are the latest technique, the last one being best quality and slowest. Note: All stereo image generation is done on CPU. + +To generate the mesh required to generate videos, enable `Generate 3D inpainted mesh`. This can be a lengthy process, from a few minutes for small images to an hour for very large images. This option is only available on the Depth tab. When enabled, the mesh in ply format and four demo video are generated. All files are saved to the extras directory. + +Videos can be generated from the PLY mesh on the Depth Tab. +It requires the mesh created by this extension, files created elsewhere might not work corectly, as some extra info is stored in the file (required value for dolly). Most options are self-explanatory, like `Number of frames` and `Framerate`. Two output `formats` are supported: mp4 and webm. Supersampling Anti-Aliasing (SSAA) can be used to get rid of jagged edges and flickering. The render size is scaled by this factor and then downsampled. +There are three `trajectories` to choose from : circle, straight-line, double-straight-line, to `translate` in three dimensions. The border can be `cropped` on four sides, and the `Dolly` option adjusts the FOV so the center subject will stay approximately the same size, like the dolly-zoom. + +Settings on WebUI Settings tab : +`Maximum wholesize for boost` sets the r_max value from the BoostingMonocularDepth paper, it relates to the max size that is chosen to render at internally, and directly influences the max amount of VRAM that could be used. The default value for this from the paper is 3000, I have lowered the value to 1600 so it will work more often with 8GB VRAM GPU's. +If you often get out of memory errors when computing a depthmap on GPU while using Boost, you can try lowering this value. Note the 'wholeImage being processed in : xxxx' output when using boost, this number will never be greater than the r_max, but can be larger with a larger r_max. See the paper for more details. + +> 💡 Saving as any format other than PNG always produces an 8 bit, 3 channel RGB image. A single channel 16 bit image is only supported when saving as PNG. + +## FAQ + + * `Can I use this on existing images ?` + - Yes, you can use the Depth tab to easily process existing images. + - Another way of doing this would be to use img2img with denoising strength to 0. This will effectively skip stable diffusion and use the input image. You will still have to set the correct size, and need to select `Crop and resize` instead of `Just resize` when the input image resolution does not match the set size perfectly. + * `Can I run this on Google Colab?` + - You can run the MiDaS network on their colab linked here https://pytorch.org/hub/intelisl_midas_v2/ + - You can run BoostingMonocularDepth on their colab linked here : https://colab.research.google.com/github/compphoto/BoostingMonocularDepth/blob/main/Boostmonoculardepth.ipynb + - Running this program on Colab is not officially supported, but it may work. Please look for more suitable ways of running this. If you still decide to try, standalone installation may be easier to manage. + * `What other depth-related projects could I check out?` + - [DepthFlow](https://github.com/BrokenSource/DepthFlow) by [@Tremeschin](https://github.com/Tremeschin) for a very fast generation of 2.5D videos from images (no need to create mesh beforehand!) + - Several [scripts](https://github.com/Extraltodeus?tab=repositories) by [@Extraltodeus](https://github.com/Extraltodeus) using depth maps. + - geo-11, [Depth3D](https://github.com/BlueSkyDefender/Depth3D) and [Geo3D](https://github.com/Flugan/Geo3D-Installer) for playing existing games in 3D. + - (Feel free to suggest more projects in the discussions!) + * `How can I know what changed in the new version of the script?` + - You can see the git history log or refer to the `CHANGELOG.md` file. + +## Help wanted! +Developers wanted! Please help us fix the bugs and add new features by creating MRs. +All help is heavily appreciated. +Feel free to comment and share in the discussions and submit issues. + +## Acknowledgements + +This project relies on code and information from the following papers : + +MiDaS : + +``` +@article {Ranftl2022, + author = "Ren\'{e} Ranftl and Katrin Lasinger and David Hafner and Konrad Schindler and Vladlen Koltun", + title = "Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-Shot Cross-Dataset Transfer", + journal = "IEEE Transactions on Pattern Analysis and Machine Intelligence", + year = "2022", + volume = "44", + number = "3" +} +``` + +Dense Prediction Transformers, DPT-based model : + +``` +@article{Ranftl2021, + author = {Ren\'{e} Ranftl and Alexey Bochkovskiy and Vladlen Koltun}, + title = {Vision Transformers for Dense Prediction}, + journal = {ICCV}, + year = {2021}, +} +``` + +AdelaiDepth/LeReS : + +``` +@article{yin2022towards, + title={Towards Accurate Reconstruction of 3D Scene Shape from A Single Monocular Image}, + author={Yin, Wei and Zhang, Jianming and Wang, Oliver and Niklaus, Simon and Chen, Simon and Liu, Yifan and Shen, Chunhua}, + journal={TPAMI}, + year={2022} +} +@inproceedings{Wei2021CVPR, + title = {Learning to Recover 3D Scene Shape from a Single Image}, + author = {Wei Yin and Jianming Zhang and Oliver Wang and Simon Niklaus and Long Mai and Simon Chen and Chunhua Shen}, + booktitle = {Proc. IEEE Conf. Comp. Vis. Patt. Recogn. (CVPR)}, + year = {2021} +} +``` + +Boosting Monocular Depth Estimation Models to High-Resolution via Content-Adaptive Multi-Resolution Merging : + +``` +@inproceedings{Miangoleh2021Boosting, + title={Boosting Monocular Depth Estimation Models to High-Resolution via Content-Adaptive Multi-Resolution Merging}, + author={S. Mahdi H. Miangoleh and Sebastian Dille and Long Mai and Sylvain Paris and Ya\u{g}{\i}z Aksoy}, + journal={Proc. CVPR}, + year={2021}, +} +``` + +3D Photography using Context-aware Layered Depth Inpainting : + +``` +@inproceedings{Shih3DP20, + author = {Shih, Meng-Li and Su, Shih-Yang and Kopf, Johannes and Huang, Jia-Bin}, + title = {3D Photography using Context-aware Layered Depth Inpainting}, + booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year = {2020} +} +``` + +U2-Net: + +``` +@InProceedings{Qin_2020_PR, + title = {U2-Net: Going Deeper with Nested U-Structure for Salient Object Detection}, + author = {Qin, Xuebin and Zhang, Zichen and Huang, Chenyang and Dehghan, Masood and Zaiane, Osmar and Jagersand, Martin}, + journal = {Pattern Recognition}, + volume = {106}, + pages = {107404}, + year = {2020} +} +``` + +IS-Net: + +``` +@InProceedings{qin2022, + author={Xuebin Qin and Hang Dai and Xiaobin Hu and Deng-Ping Fan and Ling Shao and Luc Van Gool}, + title={Highly Accurate Dichotomous Image Segmentation}, + booktitle={ECCV}, + year={2022} +} +``` + + +ZoeDepth : + +``` +@misc{https://doi.org/10.48550/arxiv.2302.12288, + doi = {10.48550/ARXIV.2302.12288}, + url = {https://arxiv.org/abs/2302.12288}, + author = {Bhat, Shariq Farooq and Birkl, Reiner and Wofk, Diana and Wonka, Peter and Müller, Matthias}, + keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences}, + title = {ZoeDepth: Zero-shot Transfer by Combining Relative and Metric Depth}, + publisher = {arXiv}, + year = {2023}, + copyright = {arXiv.org perpetual, non-exclusive license} +} +``` + +Marigold - Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation: + +``` +@misc{ke2023repurposing, + title={Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation}, + author={Bingxin Ke and Anton Obukhov and Shengyu Huang and Nando Metzger and Rodrigo Caye Daudt and Konrad Schindler}, + year={2023}, + eprint={2312.02145}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data + +``` +@misc{yang2024depth, + title={Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data}, + author={Lihe Yang and Bingyi Kang and Zilong Huang and Xiaogang Xu and Jiashi Feng and Hengshuang Zhao}, + year={2024}, + eprint={2401.10891}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +Depth Anything V2 + +```bibtex +@article{depth_anything_v2, + title={Depth Anything V2}, + author={Yang, Lihe and Kang, Bingyi and Huang, Zilong and Zhao, Zhen and Xu, Xiaogang and Feng, Jiashi and Zhao, Hengshuang}, + journal={arXiv:2406.09414}, + year={2024} +} +``` \ No newline at end of file diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/bundled_sources.txt b/bundled_sources.txt new file mode 100644 index 0000000000000000000000000000000000000000..8528df73cd2ab3d7e024fb8aa2a0be343aa29ae9 --- /dev/null +++ b/bundled_sources.txt @@ -0,0 +1,25 @@ +Since commit 110549b2 this extension bundles some code from other repositories. +This was done to prevent possible upstream breakage and allow fixing breakage quicker. +This file provides information about the original location of the code. +*** Some of the bundled code was already modified. *** + +dmidas +https://github.com/isl-org/MiDaS/tree/master/midas/ + +dzoedepth +https://github.com/isl-org/ZoeDepth/tree/main/zoedepth/ + +inpaint +https://github.com/vt-vl-lab/3d-photo-inpainting/ + +lib +https://github.com/aim-uofa/AdelaiDepth/tree/main/LeReS/Minist_Test/lib/ + +pix2pix +https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/ + +Marigold +https://github.com/prs-eth/Marigold/tree/22437a + +depth_anything_v2 +https://github.com/DepthAnything/Depth-Anything-V2/tree/bc0283 diff --git a/ddepth_anything_v2/DA-2K.md b/ddepth_anything_v2/DA-2K.md new file mode 100644 index 0000000000000000000000000000000000000000..acd3299bc8d008d48ed0d108a857789c9b494832 --- /dev/null +++ b/ddepth_anything_v2/DA-2K.md @@ -0,0 +1,51 @@ +# DA-2K Evaluation Benchmark + +## Introduction + +![DA-2K](assets/DA-2K.png) + +DA-2K is proposed in [Depth Anything V2](https://depth-anything-v2.github.io) to evaluate the relative depth estimation capability. It encompasses eight representative scenarios of `indoor`, `outdoor`, `non_real`, `transparent_reflective`, `adverse_style`, `aerial`, `underwater`, and `object`. It consists of 1K diverse high-quality images and 2K precise pair-wise relative depth annotations. + +Please refer to our [paper](https://arxiv.org/abs/2406.09414) for details in constructing this benchmark. + + +## Usage + +Please first [download the benchmark](https://huggingface.co/datasets/depth-anything/DA-2K/tree/main). + +All annotations are stored in `annotations.json`. The annotation file is a JSON object where each key is the path to an image file, and the value is a list of annotations associated with that image. Each annotation describes two points and identifies which point is closer to the camera. The structure is detailed below: + +``` +{ + "image_path": [ + { + "point1": [h1, w1], # (vertical position, horizontal position) + "point2": [h2, w2], # (vertical position, horizontal position) + "closer_point": "point1" # we always set "point1" as the closer one + }, + ... + ], + ... +} +``` + +To visualize the annotations: +```bash +python visualize.py [--scene-type ] +``` + +**Options** +- `--scene-type ` (optional): Specify the scene type (`indoor`, `outdoor`, `non_real`, `transparent_reflective`, `adverse_style`, `aerial`, `underwater`, and `object`). Skip this argument or set as `""` to include all scene types. + +## Citation + +If you find this benchmark useful, please consider citing: + +```bibtex +@article{depth_anything_v2, + title={Depth Anything V2}, + author={Yang, Lihe and Kang, Bingyi and Huang, Zilong and Zhao, Zhen and Xu, Xiaogang and Feng, Jiashi and Zhao, Hengshuang}, + journal={arXiv:2406.09414}, + year={2024} +} +``` \ No newline at end of file diff --git a/ddepth_anything_v2/LICENSE b/ddepth_anything_v2/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..29f81d812f3e768fa89638d1f72920dbfd1413a8 --- /dev/null +++ b/ddepth_anything_v2/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ddepth_anything_v2/README.md b/ddepth_anything_v2/README.md new file mode 100644 index 0000000000000000000000000000000000000000..869e8d8bbee9b8b763528f06d11ee5a683fb9809 --- /dev/null +++ b/ddepth_anything_v2/README.md @@ -0,0 +1,201 @@ +
+

Depth Anything V2

+ +[**Lihe Yang**](https://liheyoung.github.io/)1 · [**Bingyi Kang**](https://bingykang.github.io/)2† · [**Zilong Huang**](http://speedinghzl.github.io/)2 +
+[**Zhen Zhao**](http://zhaozhen.me/) · [**Xiaogang Xu**](https://xiaogang00.github.io/) · [**Jiashi Feng**](https://sites.google.com/site/jshfeng/)2 · [**Hengshuang Zhao**](https://hszhao.github.io/)1* + +1HKU   2TikTok +
+†project lead *corresponding author +†[Bingyi Kang](https://bingykang.github.io/) proposed this project and advised in every aspect. + +Paper PDF +Project Page + +Benchmark +
+ +This work presents Depth Anything V2. It significantly outperforms [V1](https://github.com/LiheYoung/Depth-Anything) in fine-grained details and robustness. Compared with SD-based models, it enjoys faster inference speed, fewer parameters, and higher depth accuracy. + +![teaser](assets/teaser.png) + + +## News + +- **2024-07-06:** Depth Anything V2 is supported in [Transformers](https://github.com/huggingface/transformers/). See the [instructions](https://huggingface.co/docs/transformers/main/en/model_doc/depth_anything_v2) for convenient usage. +- **2024-06-25:** Depth Anything is integrated into [Apple Core ML Models](https://developer.apple.com/machine-learning/models/). See the instructions ([V1](https://huggingface.co/apple/coreml-depth-anything-small), [V2](https://huggingface.co/apple/coreml-depth-anything-v2-small)) for usage. +- **2024-06-22:** We release [smaller metric depth models](https://github.com/DepthAnything/Depth-Anything-V2/tree/main/metric_depth#pre-trained-models) based on Depth-Anything-V2-Small and Base. +- **2024-06-20:** Our repository and project page are flagged by GitHub and removed from the public for 6 days. Sorry for the inconvenience. +- **2024-06-14:** Paper, project page, code, models, demo, and benchmark are all released. + + +## Pre-trained Models + +We provide **four models** of varying scales for robust relative depth estimation: + +| Model | Params | Checkpoint | +|:-|-:|:-:| +| Depth-Anything-V2-Small | 24.8M | [Download](https://huggingface.co/depth-anything/Depth-Anything-V2-Small/resolve/main/depth_anything_v2_vits.pth?download=true) | +| Depth-Anything-V2-Base | 97.5M | [Download](https://huggingface.co/depth-anything/Depth-Anything-V2-Base/resolve/main/depth_anything_v2_vitb.pth?download=true) | +| Depth-Anything-V2-Large | 335.3M | [Download](https://huggingface.co/depth-anything/Depth-Anything-V2-Large/resolve/main/depth_anything_v2_vitl.pth?download=true) | +| Depth-Anything-V2-Giant | 1.3B | Coming soon | + + +## Usage + +### Prepraration + +```bash +git clone https://github.com/DepthAnything/Depth-Anything-V2 +cd Depth-Anything-V2 +pip install -r requirements.txt +``` + +Download the checkpoints listed [here](#pre-trained-models) and put them under the `checkpoints` directory. + +### Use our models +```python +import cv2 +import torch + +from depth_anything_v2.dpt import DepthAnythingV2 + +DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu' + +model_configs = { + 'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]}, + 'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]}, + 'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]}, + 'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]} +} + +encoder = 'vitl' # or 'vits', 'vitb', 'vitg' + +model = DepthAnythingV2(**model_configs[encoder]) +model.load_state_dict(torch.load(f'checkpoints/depth_anything_v2_{encoder}.pth', map_location='cpu')) +model = model.to(DEVICE).eval() + +raw_img = cv2.imread('your/image/path') +depth = model.infer_image(raw_img) # HxW raw depth map in numpy +``` + +If you do not want to clone this repository, you can also load our models through [Transformers](https://github.com/huggingface/transformers/). Below is a simple code snippet. Please refer to the [official page](https://huggingface.co/docs/transformers/main/en/model_doc/depth_anything_v2) for more details. + +- Note 1: Make sure you can connect to Hugging Face and have installed the latest Transformers. +- Note 2: Due to the [upsampling difference](https://github.com/huggingface/transformers/pull/31522#issuecomment-2184123463) between OpenCV (we used) and Pillow (HF used), predictions may differ slightly. So you are more recommended to use our models through the way introduced above. +```python +from transformers import pipeline +from PIL import Image + +pipe = pipeline(task="depth-estimation", model="depth-anything/Depth-Anything-V2-Small-hf") +image = Image.open('your/image/path') +depth = pipe(image)["depth"] +``` + +### Running script on *images* + +```bash +python run.py \ + --encoder \ + --img-path --outdir \ + [--input-size ] [--pred-only] [--grayscale] +``` +Options: +- `--img-path`: You can either 1) point it to an image directory storing all interested images, 2) point it to a single image, or 3) point it to a text file storing all image paths. +- `--input-size` (optional): By default, we use input size `518` for model inference. ***You can increase the size for even more fine-grained results.*** +- `--pred-only` (optional): Only save the predicted depth map, without raw image. +- `--grayscale` (optional): Save the grayscale depth map, without applying color palette. + +For example: +```bash +python run.py --encoder vitl --img-path assets/examples --outdir depth_vis +``` + +### Running script on *videos* + +```bash +python run_video.py \ + --encoder \ + --video-path assets/examples_video --outdir video_depth_vis \ + [--input-size ] [--pred-only] [--grayscale] +``` + +***Our larger model has better temporal consistency on videos.*** + +### Gradio demo + +To use our gradio demo locally: + +```bash +python app.py +``` + +You can also try our [online demo](https://huggingface.co/spaces/Depth-Anything/Depth-Anything-V2). + +***Note: Compared to V1, we have made a minor modification to the DINOv2-DPT architecture (originating from this [issue](https://github.com/LiheYoung/Depth-Anything/issues/81)).*** In V1, we *unintentionally* used features from the last four layers of DINOv2 for decoding. In V2, we use [intermediate features](https://github.com/DepthAnything/Depth-Anything-V2/blob/2cbc36a8ce2cec41d38ee51153f112e87c8e42d8/depth_anything_v2/dpt.py#L164-L169) instead. Although this modification did not improve details or accuracy, we decided to follow this common practice. + + +## Fine-tuned to Metric Depth Estimation + +Please refer to [metric depth estimation](./metric_depth). + + +## DA-2K Evaluation Benchmark + +Please refer to [DA-2K benchmark](./DA-2K.md). + + +## Community Support + +**We sincerely appreciate all the community support for our Depth Anything series. Thank you a lot!** + +- Apple Core ML: + - https://developer.apple.com/machine-learning/models + - https://huggingface.co/apple/coreml-depth-anything-v2-small + - https://huggingface.co/apple/coreml-depth-anything-small +- Transformers: + - https://huggingface.co/docs/transformers/main/en/model_doc/depth_anything_v2 + - https://huggingface.co/docs/transformers/main/en/model_doc/depth_anything +- TensorRT: + - https://github.com/spacewalk01/depth-anything-tensorrt + - https://github.com/zhujiajian98/Depth-Anythingv2-TensorRT-python +- ONNX: https://github.com/fabio-sim/Depth-Anything-ONNX +- ComfyUI: https://github.com/kijai/ComfyUI-DepthAnythingV2 +- Transformers.js (real-time depth in web): https://huggingface.co/spaces/Xenova/webgpu-realtime-depth-estimation +- Android: + - https://github.com/shubham0204/Depth-Anything-Android + - https://github.com/FeiGeChuanShu/ncnn-android-depth_anything + + +## Acknowledgement + +We are sincerely grateful to the awesome Hugging Face team ([@Pedro Cuenca](https://huggingface.co/pcuenq), [@Niels Rogge](https://huggingface.co/nielsr), [@Merve Noyan](https://huggingface.co/merve), [@Amy Roberts](https://huggingface.co/amyeroberts), et al.) for their huge efforts in supporting our models in Transformers and Apple Core ML. + +We also thank the [DINOv2](https://github.com/facebookresearch/dinov2) team for contributing such impressive models to our community. + + +## LICENSE + +Depth-Anything-V2-Small model is under the Apache-2.0 license. Depth-Anything-V2-Base/Large/Giant models are under the CC-BY-NC-4.0 license. + + +## Citation + +If you find this project useful, please consider citing: + +```bibtex +@article{depth_anything_v2, + title={Depth Anything V2}, + author={Yang, Lihe and Kang, Bingyi and Huang, Zilong and Zhao, Zhen and Xu, Xiaogang and Feng, Jiashi and Zhao, Hengshuang}, + journal={arXiv:2406.09414}, + year={2024} +} + +@inproceedings{depth_anything_v1, + title={Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data}, + author={Yang, Lihe and Kang, Bingyi and Huang, Zilong and Xu, Xiaogang and Feng, Jiashi and Zhao, Hengshuang}, + booktitle={CVPR}, + year={2024} +} +``` diff --git a/ddepth_anything_v2/__init__.py b/ddepth_anything_v2/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5313fcb8adfac3c8c9f807330070c83fa5e5aba8 --- /dev/null +++ b/ddepth_anything_v2/__init__.py @@ -0,0 +1 @@ +from .depth_anything_v2.dpt import DepthAnythingV2 \ No newline at end of file diff --git a/ddepth_anything_v2/__pycache__/__init__.cpython-310.pyc b/ddepth_anything_v2/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2ddb91545c8bf6d0c2fb69e60327851f3650f16f Binary files /dev/null and b/ddepth_anything_v2/__pycache__/__init__.cpython-310.pyc differ diff --git a/ddepth_anything_v2/__pycache__/__init__.cpython-311.pyc b/ddepth_anything_v2/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b7563419faf2b798abc85bef56492503b65e6138 Binary files /dev/null and b/ddepth_anything_v2/__pycache__/__init__.cpython-311.pyc differ diff --git a/ddepth_anything_v2/__pycache__/__init__.cpython-312.pyc b/ddepth_anything_v2/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..92ca9e88348fbf9b04343b69a729398fc2fa528f Binary files /dev/null and b/ddepth_anything_v2/__pycache__/__init__.cpython-312.pyc differ diff --git a/ddepth_anything_v2/app.py b/ddepth_anything_v2/app.py new file mode 100644 index 0000000000000000000000000000000000000000..19d9018964dfef564726d7d140275a4e5b192e75 --- /dev/null +++ b/ddepth_anything_v2/app.py @@ -0,0 +1,88 @@ +import glob +import gradio as gr +import matplotlib +import numpy as np +from PIL import Image +import torch +import tempfile +from gradio_imageslider import ImageSlider + +from depth_anything_v2.dpt import DepthAnythingV2 + +css = """ +#img-display-container { + max-height: 100vh; +} +#img-display-input { + max-height: 80vh; +} +#img-display-output { + max-height: 80vh; +} +#download { + height: 62px; +} +""" +DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu' +model_configs = { + 'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]}, + 'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]}, + 'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]}, + 'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]} +} +encoder = 'vitl' +model = DepthAnythingV2(**model_configs[encoder]) +state_dict = torch.load(f'checkpoints/depth_anything_v2_{encoder}.pth', map_location="cpu") +model.load_state_dict(state_dict) +model = model.to(DEVICE).eval() + +title = "# Depth Anything V2" +description = """Official demo for **Depth Anything V2**. +Please refer to our [paper](https://arxiv.org/abs/2406.09414), [project page](https://depth-anything-v2.github.io), or [github](https://github.com/DepthAnything/Depth-Anything-V2) for more details.""" + +def predict_depth(image): + return model.infer_image(image) + +with gr.Blocks(css=css) as demo: + gr.Markdown(title) + gr.Markdown(description) + gr.Markdown("### Depth Prediction demo") + + with gr.Row(): + input_image = gr.Image(label="Input Image", type='numpy', elem_id='img-display-input') + depth_image_slider = ImageSlider(label="Depth Map with Slider View", elem_id='img-display-output', position=0.5) + submit = gr.Button(value="Compute Depth") + gray_depth_file = gr.File(label="Grayscale depth map", elem_id="download",) + raw_file = gr.File(label="16-bit raw output (can be considered as disparity)", elem_id="download",) + + cmap = matplotlib.colormaps.get_cmap('Spectral_r') + + def on_submit(image): + original_image = image.copy() + + h, w = image.shape[:2] + + depth = predict_depth(image[:, :, ::-1]) + + raw_depth = Image.fromarray(depth.astype('uint16')) + tmp_raw_depth = tempfile.NamedTemporaryFile(suffix='.png', delete=False) + raw_depth.save(tmp_raw_depth.name) + + depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0 + depth = depth.astype(np.uint8) + colored_depth = (cmap(depth)[:, :, :3] * 255).astype(np.uint8) + + gray_depth = Image.fromarray(depth) + tmp_gray_depth = tempfile.NamedTemporaryFile(suffix='.png', delete=False) + gray_depth.save(tmp_gray_depth.name) + + return [(original_image, colored_depth), tmp_gray_depth.name, tmp_raw_depth.name] + + submit.click(on_submit, inputs=[input_image], outputs=[depth_image_slider, gray_depth_file, raw_file]) + + example_files = glob.glob('assets/examples/*') + examples = gr.Examples(examples=example_files, inputs=[input_image], outputs=[depth_image_slider, gray_depth_file, raw_file], fn=on_submit) + + +if __name__ == '__main__': + demo.queue().launch() \ No newline at end of file diff --git a/ddepth_anything_v2/depth_anything_v2/__pycache__/dinov2.cpython-310.pyc b/ddepth_anything_v2/depth_anything_v2/__pycache__/dinov2.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3b1adcd74a60b650611cd4ada6f72e3a2ded6ec5 Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/__pycache__/dinov2.cpython-310.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/__pycache__/dinov2.cpython-311.pyc b/ddepth_anything_v2/depth_anything_v2/__pycache__/dinov2.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..88e5691f822a7b9290d2cf0ad4069fc780ffae89 Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/__pycache__/dinov2.cpython-311.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/__pycache__/dinov2.cpython-312.pyc b/ddepth_anything_v2/depth_anything_v2/__pycache__/dinov2.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c7ddb2952f846f2011e7c2ee8f20d7e9c10b375c Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/__pycache__/dinov2.cpython-312.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/__pycache__/dpt.cpython-310.pyc b/ddepth_anything_v2/depth_anything_v2/__pycache__/dpt.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9c04152aae414ef12056ca8e6f91348f85a385a5 Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/__pycache__/dpt.cpython-310.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/__pycache__/dpt.cpython-311.pyc b/ddepth_anything_v2/depth_anything_v2/__pycache__/dpt.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e6bc2e8e95c6008865d1a49650dd6d34f0ece21a Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/__pycache__/dpt.cpython-311.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/__pycache__/dpt.cpython-312.pyc b/ddepth_anything_v2/depth_anything_v2/__pycache__/dpt.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..55a751abf11571e62f243eb6f8931d42242a10b0 Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/__pycache__/dpt.cpython-312.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2.py b/ddepth_anything_v2/depth_anything_v2/dinov2.py new file mode 100644 index 0000000000000000000000000000000000000000..5cbfc7d24d37796d5310fd966b582bb3773685dc --- /dev/null +++ b/ddepth_anything_v2/depth_anything_v2/dinov2.py @@ -0,0 +1,415 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the Apache License, Version 2.0 +# found in the LICENSE file in the root directory of this source tree. + +# References: +# https://github.com/facebookresearch/dino/blob/main/vision_transformer.py +# https://github.com/rwightman/pytorch-image-models/tree/master/timm/models/vision_transformer.py + +from functools import partial +import math +import logging +from typing import Sequence, Tuple, Union, Callable + +import torch +import torch.nn as nn +import torch.utils.checkpoint +from torch.nn.init import trunc_normal_ + +from .dinov2_layers import Mlp, PatchEmbed, SwiGLUFFNFused, MemEffAttention, NestedTensorBlock as Block + + +logger = logging.getLogger("dinov2") + + +def named_apply(fn: Callable, module: nn.Module, name="", depth_first=True, include_root=False) -> nn.Module: + if not depth_first and include_root: + fn(module=module, name=name) + for child_name, child_module in module.named_children(): + child_name = ".".join((name, child_name)) if name else child_name + named_apply(fn=fn, module=child_module, name=child_name, depth_first=depth_first, include_root=True) + if depth_first and include_root: + fn(module=module, name=name) + return module + + +class BlockChunk(nn.ModuleList): + def forward(self, x): + for b in self: + x = b(x) + return x + + +class DinoVisionTransformer(nn.Module): + def __init__( + self, + img_size=224, + patch_size=16, + in_chans=3, + embed_dim=768, + depth=12, + num_heads=12, + mlp_ratio=4.0, + qkv_bias=True, + ffn_bias=True, + proj_bias=True, + drop_path_rate=0.0, + drop_path_uniform=False, + init_values=None, # for layerscale: None or 0 => no layerscale + embed_layer=PatchEmbed, + act_layer=nn.GELU, + block_fn=Block, + ffn_layer="mlp", + block_chunks=1, + num_register_tokens=0, + interpolate_antialias=False, + interpolate_offset=0.1, + ): + """ + Args: + img_size (int, tuple): input image size + patch_size (int, tuple): patch size + in_chans (int): number of input channels + embed_dim (int): embedding dimension + depth (int): depth of transformer + num_heads (int): number of attention heads + mlp_ratio (int): ratio of mlp hidden dim to embedding dim + qkv_bias (bool): enable bias for qkv if True + proj_bias (bool): enable bias for proj in attn if True + ffn_bias (bool): enable bias for ffn if True + drop_path_rate (float): stochastic depth rate + drop_path_uniform (bool): apply uniform drop rate across blocks + weight_init (str): weight init scheme + init_values (float): layer-scale init values + embed_layer (nn.Module): patch embedding layer + act_layer (nn.Module): MLP activation layer + block_fn (nn.Module): transformer block class + ffn_layer (str): "mlp", "swiglu", "swiglufused" or "identity" + block_chunks: (int) split block sequence into block_chunks units for FSDP wrap + num_register_tokens: (int) number of extra cls tokens (so-called "registers") + interpolate_antialias: (str) flag to apply anti-aliasing when interpolating positional embeddings + interpolate_offset: (float) work-around offset to apply when interpolating positional embeddings + """ + super().__init__() + norm_layer = partial(nn.LayerNorm, eps=1e-6) + + self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models + self.num_tokens = 1 + self.n_blocks = depth + self.num_heads = num_heads + self.patch_size = patch_size + self.num_register_tokens = num_register_tokens + self.interpolate_antialias = interpolate_antialias + self.interpolate_offset = interpolate_offset + + self.patch_embed = embed_layer(img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim) + num_patches = self.patch_embed.num_patches + + self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim)) + self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + self.num_tokens, embed_dim)) + assert num_register_tokens >= 0 + self.register_tokens = ( + nn.Parameter(torch.zeros(1, num_register_tokens, embed_dim)) if num_register_tokens else None + ) + + if drop_path_uniform is True: + dpr = [drop_path_rate] * depth + else: + dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)] # stochastic depth decay rule + + if ffn_layer == "mlp": + logger.info("using MLP layer as FFN") + ffn_layer = Mlp + elif ffn_layer == "swiglufused" or ffn_layer == "swiglu": + logger.info("using SwiGLU layer as FFN") + ffn_layer = SwiGLUFFNFused + elif ffn_layer == "identity": + logger.info("using Identity layer as FFN") + + def f(*args, **kwargs): + return nn.Identity() + + ffn_layer = f + else: + raise NotImplementedError + + blocks_list = [ + block_fn( + dim=embed_dim, + num_heads=num_heads, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + proj_bias=proj_bias, + ffn_bias=ffn_bias, + drop_path=dpr[i], + norm_layer=norm_layer, + act_layer=act_layer, + ffn_layer=ffn_layer, + init_values=init_values, + ) + for i in range(depth) + ] + if block_chunks > 0: + self.chunked_blocks = True + chunked_blocks = [] + chunksize = depth // block_chunks + for i in range(0, depth, chunksize): + # this is to keep the block index consistent if we chunk the block list + chunked_blocks.append([nn.Identity()] * i + blocks_list[i : i + chunksize]) + self.blocks = nn.ModuleList([BlockChunk(p) for p in chunked_blocks]) + else: + self.chunked_blocks = False + self.blocks = nn.ModuleList(blocks_list) + + self.norm = norm_layer(embed_dim) + self.head = nn.Identity() + + self.mask_token = nn.Parameter(torch.zeros(1, embed_dim)) + + self.init_weights() + + def init_weights(self): + trunc_normal_(self.pos_embed, std=0.02) + nn.init.normal_(self.cls_token, std=1e-6) + if self.register_tokens is not None: + nn.init.normal_(self.register_tokens, std=1e-6) + named_apply(init_weights_vit_timm, self) + + def interpolate_pos_encoding(self, x, w, h): + previous_dtype = x.dtype + npatch = x.shape[1] - 1 + N = self.pos_embed.shape[1] - 1 + if npatch == N and w == h: + return self.pos_embed + pos_embed = self.pos_embed.float() + class_pos_embed = pos_embed[:, 0] + patch_pos_embed = pos_embed[:, 1:] + dim = x.shape[-1] + w0 = w // self.patch_size + h0 = h // self.patch_size + # we add a small number to avoid floating point error in the interpolation + # see discussion at https://github.com/facebookresearch/dino/issues/8 + # DINOv2 with register modify the interpolate_offset from 0.1 to 0.0 + w0, h0 = w0 + self.interpolate_offset, h0 + self.interpolate_offset + # w0, h0 = w0 + 0.1, h0 + 0.1 + + sqrt_N = math.sqrt(N) + sx, sy = float(w0) / sqrt_N, float(h0) / sqrt_N + patch_pos_embed = nn.functional.interpolate( + patch_pos_embed.reshape(1, int(sqrt_N), int(sqrt_N), dim).permute(0, 3, 1, 2), + scale_factor=(sx, sy), + # (int(w0), int(h0)), # to solve the upsampling shape issue + mode="bicubic", + antialias=self.interpolate_antialias + ) + + assert int(w0) == patch_pos_embed.shape[-2] + assert int(h0) == patch_pos_embed.shape[-1] + patch_pos_embed = patch_pos_embed.permute(0, 2, 3, 1).view(1, -1, dim) + return torch.cat((class_pos_embed.unsqueeze(0), patch_pos_embed), dim=1).to(previous_dtype) + + def prepare_tokens_with_masks(self, x, masks=None): + B, nc, w, h = x.shape + x = self.patch_embed(x) + if masks is not None: + x = torch.where(masks.unsqueeze(-1), self.mask_token.to(x.dtype).unsqueeze(0), x) + + x = torch.cat((self.cls_token.expand(x.shape[0], -1, -1), x), dim=1) + x = x + self.interpolate_pos_encoding(x, w, h) + + if self.register_tokens is not None: + x = torch.cat( + ( + x[:, :1], + self.register_tokens.expand(x.shape[0], -1, -1), + x[:, 1:], + ), + dim=1, + ) + + return x + + def forward_features_list(self, x_list, masks_list): + x = [self.prepare_tokens_with_masks(x, masks) for x, masks in zip(x_list, masks_list)] + for blk in self.blocks: + x = blk(x) + + all_x = x + output = [] + for x, masks in zip(all_x, masks_list): + x_norm = self.norm(x) + output.append( + { + "x_norm_clstoken": x_norm[:, 0], + "x_norm_regtokens": x_norm[:, 1 : self.num_register_tokens + 1], + "x_norm_patchtokens": x_norm[:, self.num_register_tokens + 1 :], + "x_prenorm": x, + "masks": masks, + } + ) + return output + + def forward_features(self, x, masks=None): + if isinstance(x, list): + return self.forward_features_list(x, masks) + + x = self.prepare_tokens_with_masks(x, masks) + + for blk in self.blocks: + x = blk(x) + + x_norm = self.norm(x) + return { + "x_norm_clstoken": x_norm[:, 0], + "x_norm_regtokens": x_norm[:, 1 : self.num_register_tokens + 1], + "x_norm_patchtokens": x_norm[:, self.num_register_tokens + 1 :], + "x_prenorm": x, + "masks": masks, + } + + def _get_intermediate_layers_not_chunked(self, x, n=1): + x = self.prepare_tokens_with_masks(x) + # If n is an int, take the n last blocks. If it's a list, take them + output, total_block_len = [], len(self.blocks) + blocks_to_take = range(total_block_len - n, total_block_len) if isinstance(n, int) else n + for i, blk in enumerate(self.blocks): + x = blk(x) + if i in blocks_to_take: + output.append(x) + assert len(output) == len(blocks_to_take), f"only {len(output)} / {len(blocks_to_take)} blocks found" + return output + + def _get_intermediate_layers_chunked(self, x, n=1): + x = self.prepare_tokens_with_masks(x) + output, i, total_block_len = [], 0, len(self.blocks[-1]) + # If n is an int, take the n last blocks. If it's a list, take them + blocks_to_take = range(total_block_len - n, total_block_len) if isinstance(n, int) else n + for block_chunk in self.blocks: + for blk in block_chunk[i:]: # Passing the nn.Identity() + x = blk(x) + if i in blocks_to_take: + output.append(x) + i += 1 + assert len(output) == len(blocks_to_take), f"only {len(output)} / {len(blocks_to_take)} blocks found" + return output + + def get_intermediate_layers( + self, + x: torch.Tensor, + n: Union[int, Sequence] = 1, # Layers or n last layers to take + reshape: bool = False, + return_class_token: bool = False, + norm=True + ) -> Tuple[Union[torch.Tensor, Tuple[torch.Tensor]]]: + if self.chunked_blocks: + outputs = self._get_intermediate_layers_chunked(x, n) + else: + outputs = self._get_intermediate_layers_not_chunked(x, n) + if norm: + outputs = [self.norm(out) for out in outputs] + class_tokens = [out[:, 0] for out in outputs] + outputs = [out[:, 1 + self.num_register_tokens:] for out in outputs] + if reshape: + B, _, w, h = x.shape + outputs = [ + out.reshape(B, w // self.patch_size, h // self.patch_size, -1).permute(0, 3, 1, 2).contiguous() + for out in outputs + ] + if return_class_token: + return tuple(zip(outputs, class_tokens)) + return tuple(outputs) + + def forward(self, *args, is_training=False, **kwargs): + ret = self.forward_features(*args, **kwargs) + if is_training: + return ret + else: + return self.head(ret["x_norm_clstoken"]) + + +def init_weights_vit_timm(module: nn.Module, name: str = ""): + """ViT weight initialization, original timm impl (for reproducibility)""" + if isinstance(module, nn.Linear): + trunc_normal_(module.weight, std=0.02) + if module.bias is not None: + nn.init.zeros_(module.bias) + + +def vit_small(patch_size=16, num_register_tokens=0, **kwargs): + model = DinoVisionTransformer( + patch_size=patch_size, + embed_dim=384, + depth=12, + num_heads=6, + mlp_ratio=4, + block_fn=partial(Block, attn_class=MemEffAttention), + num_register_tokens=num_register_tokens, + **kwargs, + ) + return model + + +def vit_base(patch_size=16, num_register_tokens=0, **kwargs): + model = DinoVisionTransformer( + patch_size=patch_size, + embed_dim=768, + depth=12, + num_heads=12, + mlp_ratio=4, + block_fn=partial(Block, attn_class=MemEffAttention), + num_register_tokens=num_register_tokens, + **kwargs, + ) + return model + + +def vit_large(patch_size=16, num_register_tokens=0, **kwargs): + model = DinoVisionTransformer( + patch_size=patch_size, + embed_dim=1024, + depth=24, + num_heads=16, + mlp_ratio=4, + block_fn=partial(Block, attn_class=MemEffAttention), + num_register_tokens=num_register_tokens, + **kwargs, + ) + return model + + +def vit_giant2(patch_size=16, num_register_tokens=0, **kwargs): + """ + Close to ViT-giant, with embed-dim 1536 and 24 heads => embed-dim per head 64 + """ + model = DinoVisionTransformer( + patch_size=patch_size, + embed_dim=1536, + depth=40, + num_heads=24, + mlp_ratio=4, + block_fn=partial(Block, attn_class=MemEffAttention), + num_register_tokens=num_register_tokens, + **kwargs, + ) + return model + + +def DINOv2(model_name): + model_zoo = { + "vits": vit_small, + "vitb": vit_base, + "vitl": vit_large, + "vitg": vit_giant2 + } + + return model_zoo[model_name]( + img_size=518, + patch_size=14, + init_values=1.0, + ffn_layer="mlp" if model_name != "vitg" else "swiglufused", + block_chunks=0, + num_register_tokens=0, + interpolate_antialias=False, + interpolate_offset=0.1 + ) diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__init__.py b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e59a83eb90512d763b03e4d38536b6ae07e87541 --- /dev/null +++ b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +from .mlp import Mlp +from .patch_embed import PatchEmbed +from .swiglu_ffn import SwiGLUFFN, SwiGLUFFNFused +from .block import NestedTensorBlock +from .attention import MemEffAttention diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/__init__.cpython-310.pyc b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a5e33dc72e0045fe8fb132acd79a733db688442b Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/__init__.cpython-310.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/__init__.cpython-311.pyc b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..af2f2c10b8e45b8d710c9d8c9b63824e51d3338f Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/__init__.cpython-311.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/__init__.cpython-312.pyc b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8da43e817af22cfc12041f34587be1ff16b37fd7 Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/__init__.cpython-312.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/attention.cpython-310.pyc b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/attention.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..753659885e2b7b137f95d3183fa653d65a2b650b Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/attention.cpython-310.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/attention.cpython-311.pyc b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/attention.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4dc245ef6792d50299e0cee0e681367b3cbf8017 Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/attention.cpython-311.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/attention.cpython-312.pyc b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/attention.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..336b886373950f3b4c6563ac60fd1ae855116096 Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/attention.cpython-312.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/block.cpython-310.pyc b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/block.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b5881aa47c24aa022a5b64cd4c2a57fc4b7fb7a9 Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/block.cpython-310.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/block.cpython-311.pyc b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/block.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2516aa3c045e29eb6fd3f66325ba3977715d966e Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/block.cpython-311.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/block.cpython-312.pyc b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/block.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ab70e0ef03ed72b92860fadbc44aa00c7ddc36a7 Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/block.cpython-312.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/drop_path.cpython-310.pyc b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/drop_path.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e88947ee5bf65dcaabc06f67fcc2425d38d55629 Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/drop_path.cpython-310.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/drop_path.cpython-311.pyc b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/drop_path.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b4195c0346959bf2655000400193336ac3ba85d9 Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/drop_path.cpython-311.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/drop_path.cpython-312.pyc b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/drop_path.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d27ca9529e8fb804b60a85605f5317490a0b2b82 Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/drop_path.cpython-312.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/layer_scale.cpython-310.pyc b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/layer_scale.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2c6fc8a7d56b9149c02274af831dd6029dd3bdbd Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/layer_scale.cpython-310.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/layer_scale.cpython-311.pyc b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/layer_scale.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1f74a1184b729a4927fad779a942677b83480a62 Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/layer_scale.cpython-311.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/layer_scale.cpython-312.pyc b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/layer_scale.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3fdd4de2bef91c457366d6c5fc0ddede4662fe24 Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/layer_scale.cpython-312.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/mlp.cpython-310.pyc b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/mlp.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6c9b67e78d09b9a2b205323e41085187bb4fb963 Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/mlp.cpython-310.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/mlp.cpython-311.pyc b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/mlp.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..009844d0788a2a1dd390c065c198d5179c44dd75 Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/mlp.cpython-311.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/mlp.cpython-312.pyc b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/mlp.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..86a5018a9bb73ccb4422c4e3ce7e7075b5f439b4 Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/mlp.cpython-312.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/patch_embed.cpython-310.pyc b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/patch_embed.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0fdfa7f151a91fc46327230ff61d776387119e4a Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/patch_embed.cpython-310.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/patch_embed.cpython-311.pyc b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/patch_embed.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1d85f9fef70019e1bcbb9fdbae75365150a75ef3 Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/patch_embed.cpython-311.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/patch_embed.cpython-312.pyc b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/patch_embed.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..62d49c304cc6378811216d5a996dd1004c5de8ca Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/patch_embed.cpython-312.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/swiglu_ffn.cpython-310.pyc b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/swiglu_ffn.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0efa6629520ae8c1e07ed30836eaf6bf416019c9 Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/swiglu_ffn.cpython-310.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/swiglu_ffn.cpython-311.pyc b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/swiglu_ffn.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6cf59abd708ef9dda86eef557935eee9cdc6ba54 Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/swiglu_ffn.cpython-311.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/swiglu_ffn.cpython-312.pyc b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/swiglu_ffn.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3c7d85cc853eba123937e3db511f34b85ea6b382 Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/__pycache__/swiglu_ffn.cpython-312.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2_layers/attention.py b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/attention.py new file mode 100644 index 0000000000000000000000000000000000000000..dea0c82d55f052bf4bcb5896ad8c37158ef523d5 --- /dev/null +++ b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/attention.py @@ -0,0 +1,83 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +# References: +# https://github.com/facebookresearch/dino/blob/master/vision_transformer.py +# https://github.com/rwightman/pytorch-image-models/tree/master/timm/models/vision_transformer.py + +import logging + +from torch import Tensor +from torch import nn + + +logger = logging.getLogger("dinov2") + + +try: + from xformers.ops import memory_efficient_attention, unbind, fmha + + XFORMERS_AVAILABLE = True +except ImportError: + logger.warning("xFormers not available") + XFORMERS_AVAILABLE = False + + +class Attention(nn.Module): + def __init__( + self, + dim: int, + num_heads: int = 8, + qkv_bias: bool = False, + proj_bias: bool = True, + attn_drop: float = 0.0, + proj_drop: float = 0.0, + ) -> None: + super().__init__() + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = head_dim**-0.5 + + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim, bias=proj_bias) + self.proj_drop = nn.Dropout(proj_drop) + + def forward(self, x: Tensor) -> Tensor: + B, N, C = x.shape + qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) + + q, k, v = qkv[0] * self.scale, qkv[1], qkv[2] + attn = q @ k.transpose(-2, -1) + + attn = attn.softmax(dim=-1) + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(B, N, C) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class MemEffAttention(Attention): + def forward(self, x: Tensor, attn_bias=None) -> Tensor: + if not XFORMERS_AVAILABLE: + assert attn_bias is None, "xFormers is required for nested tensors usage" + return super().forward(x) + + B, N, C = x.shape + qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads) + + q, k, v = unbind(qkv, 2) + + x = memory_efficient_attention(q, k, v, attn_bias=attn_bias) + x = x.reshape([B, N, C]) + + x = self.proj(x) + x = self.proj_drop(x) + return x + + \ No newline at end of file diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2_layers/block.py b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/block.py new file mode 100644 index 0000000000000000000000000000000000000000..f91f3f07bd15fba91c67068c8dce2bb22d505bf7 --- /dev/null +++ b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/block.py @@ -0,0 +1,252 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +# References: +# https://github.com/facebookresearch/dino/blob/master/vision_transformer.py +# https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/patch_embed.py + +import logging +from typing import Callable, List, Any, Tuple, Dict + +import torch +from torch import nn, Tensor + +from .attention import Attention, MemEffAttention +from .drop_path import DropPath +from .layer_scale import LayerScale +from .mlp import Mlp + + +logger = logging.getLogger("dinov2") + + +try: + from xformers.ops import fmha + from xformers.ops import scaled_index_add, index_select_cat + + XFORMERS_AVAILABLE = True +except ImportError: + logger.warning("xFormers not available") + XFORMERS_AVAILABLE = False + + +class Block(nn.Module): + def __init__( + self, + dim: int, + num_heads: int, + mlp_ratio: float = 4.0, + qkv_bias: bool = False, + proj_bias: bool = True, + ffn_bias: bool = True, + drop: float = 0.0, + attn_drop: float = 0.0, + init_values=None, + drop_path: float = 0.0, + act_layer: Callable[..., nn.Module] = nn.GELU, + norm_layer: Callable[..., nn.Module] = nn.LayerNorm, + attn_class: Callable[..., nn.Module] = Attention, + ffn_layer: Callable[..., nn.Module] = Mlp, + ) -> None: + super().__init__() + # print(f"biases: qkv: {qkv_bias}, proj: {proj_bias}, ffn: {ffn_bias}") + self.norm1 = norm_layer(dim) + self.attn = attn_class( + dim, + num_heads=num_heads, + qkv_bias=qkv_bias, + proj_bias=proj_bias, + attn_drop=attn_drop, + proj_drop=drop, + ) + self.ls1 = LayerScale(dim, init_values=init_values) if init_values else nn.Identity() + self.drop_path1 = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = ffn_layer( + in_features=dim, + hidden_features=mlp_hidden_dim, + act_layer=act_layer, + drop=drop, + bias=ffn_bias, + ) + self.ls2 = LayerScale(dim, init_values=init_values) if init_values else nn.Identity() + self.drop_path2 = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + + self.sample_drop_ratio = drop_path + + def forward(self, x: Tensor) -> Tensor: + def attn_residual_func(x: Tensor) -> Tensor: + return self.ls1(self.attn(self.norm1(x))) + + def ffn_residual_func(x: Tensor) -> Tensor: + return self.ls2(self.mlp(self.norm2(x))) + + if self.training and self.sample_drop_ratio > 0.1: + # the overhead is compensated only for a drop path rate larger than 0.1 + x = drop_add_residual_stochastic_depth( + x, + residual_func=attn_residual_func, + sample_drop_ratio=self.sample_drop_ratio, + ) + x = drop_add_residual_stochastic_depth( + x, + residual_func=ffn_residual_func, + sample_drop_ratio=self.sample_drop_ratio, + ) + elif self.training and self.sample_drop_ratio > 0.0: + x = x + self.drop_path1(attn_residual_func(x)) + x = x + self.drop_path1(ffn_residual_func(x)) # FIXME: drop_path2 + else: + x = x + attn_residual_func(x) + x = x + ffn_residual_func(x) + return x + + +def drop_add_residual_stochastic_depth( + x: Tensor, + residual_func: Callable[[Tensor], Tensor], + sample_drop_ratio: float = 0.0, +) -> Tensor: + # 1) extract subset using permutation + b, n, d = x.shape + sample_subset_size = max(int(b * (1 - sample_drop_ratio)), 1) + brange = (torch.randperm(b, device=x.device))[:sample_subset_size] + x_subset = x[brange] + + # 2) apply residual_func to get residual + residual = residual_func(x_subset) + + x_flat = x.flatten(1) + residual = residual.flatten(1) + + residual_scale_factor = b / sample_subset_size + + # 3) add the residual + x_plus_residual = torch.index_add(x_flat, 0, brange, residual.to(dtype=x.dtype), alpha=residual_scale_factor) + return x_plus_residual.view_as(x) + + +def get_branges_scales(x, sample_drop_ratio=0.0): + b, n, d = x.shape + sample_subset_size = max(int(b * (1 - sample_drop_ratio)), 1) + brange = (torch.randperm(b, device=x.device))[:sample_subset_size] + residual_scale_factor = b / sample_subset_size + return brange, residual_scale_factor + + +def add_residual(x, brange, residual, residual_scale_factor, scaling_vector=None): + if scaling_vector is None: + x_flat = x.flatten(1) + residual = residual.flatten(1) + x_plus_residual = torch.index_add(x_flat, 0, brange, residual.to(dtype=x.dtype), alpha=residual_scale_factor) + else: + x_plus_residual = scaled_index_add( + x, brange, residual.to(dtype=x.dtype), scaling=scaling_vector, alpha=residual_scale_factor + ) + return x_plus_residual + + +attn_bias_cache: Dict[Tuple, Any] = {} + + +def get_attn_bias_and_cat(x_list, branges=None): + """ + this will perform the index select, cat the tensors, and provide the attn_bias from cache + """ + batch_sizes = [b.shape[0] for b in branges] if branges is not None else [x.shape[0] for x in x_list] + all_shapes = tuple((b, x.shape[1]) for b, x in zip(batch_sizes, x_list)) + if all_shapes not in attn_bias_cache.keys(): + seqlens = [] + for b, x in zip(batch_sizes, x_list): + for _ in range(b): + seqlens.append(x.shape[1]) + attn_bias = fmha.BlockDiagonalMask.from_seqlens(seqlens) + attn_bias._batch_sizes = batch_sizes + attn_bias_cache[all_shapes] = attn_bias + + if branges is not None: + cat_tensors = index_select_cat([x.flatten(1) for x in x_list], branges).view(1, -1, x_list[0].shape[-1]) + else: + tensors_bs1 = tuple(x.reshape([1, -1, *x.shape[2:]]) for x in x_list) + cat_tensors = torch.cat(tensors_bs1, dim=1) + + return attn_bias_cache[all_shapes], cat_tensors + + +def drop_add_residual_stochastic_depth_list( + x_list: List[Tensor], + residual_func: Callable[[Tensor, Any], Tensor], + sample_drop_ratio: float = 0.0, + scaling_vector=None, +) -> Tensor: + # 1) generate random set of indices for dropping samples in the batch + branges_scales = [get_branges_scales(x, sample_drop_ratio=sample_drop_ratio) for x in x_list] + branges = [s[0] for s in branges_scales] + residual_scale_factors = [s[1] for s in branges_scales] + + # 2) get attention bias and index+concat the tensors + attn_bias, x_cat = get_attn_bias_and_cat(x_list, branges) + + # 3) apply residual_func to get residual, and split the result + residual_list = attn_bias.split(residual_func(x_cat, attn_bias=attn_bias)) # type: ignore + + outputs = [] + for x, brange, residual, residual_scale_factor in zip(x_list, branges, residual_list, residual_scale_factors): + outputs.append(add_residual(x, brange, residual, residual_scale_factor, scaling_vector).view_as(x)) + return outputs + + +class NestedTensorBlock(Block): + def forward_nested(self, x_list: List[Tensor]) -> List[Tensor]: + """ + x_list contains a list of tensors to nest together and run + """ + assert isinstance(self.attn, MemEffAttention) + + if self.training and self.sample_drop_ratio > 0.0: + + def attn_residual_func(x: Tensor, attn_bias=None) -> Tensor: + return self.attn(self.norm1(x), attn_bias=attn_bias) + + def ffn_residual_func(x: Tensor, attn_bias=None) -> Tensor: + return self.mlp(self.norm2(x)) + + x_list = drop_add_residual_stochastic_depth_list( + x_list, + residual_func=attn_residual_func, + sample_drop_ratio=self.sample_drop_ratio, + scaling_vector=self.ls1.gamma if isinstance(self.ls1, LayerScale) else None, + ) + x_list = drop_add_residual_stochastic_depth_list( + x_list, + residual_func=ffn_residual_func, + sample_drop_ratio=self.sample_drop_ratio, + scaling_vector=self.ls2.gamma if isinstance(self.ls1, LayerScale) else None, + ) + return x_list + else: + + def attn_residual_func(x: Tensor, attn_bias=None) -> Tensor: + return self.ls1(self.attn(self.norm1(x), attn_bias=attn_bias)) + + def ffn_residual_func(x: Tensor, attn_bias=None) -> Tensor: + return self.ls2(self.mlp(self.norm2(x))) + + attn_bias, x = get_attn_bias_and_cat(x_list) + x = x + attn_residual_func(x, attn_bias=attn_bias) + x = x + ffn_residual_func(x) + return attn_bias.split(x) + + def forward(self, x_or_x_list): + if isinstance(x_or_x_list, Tensor): + return super().forward(x_or_x_list) + elif isinstance(x_or_x_list, list): + assert XFORMERS_AVAILABLE, "Please install xFormers for nested tensors usage" + return self.forward_nested(x_or_x_list) + else: + raise AssertionError diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2_layers/drop_path.py b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/drop_path.py new file mode 100644 index 0000000000000000000000000000000000000000..10c3bea8e40eec258bbe59087770d230a6375481 --- /dev/null +++ b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/drop_path.py @@ -0,0 +1,35 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +# References: +# https://github.com/facebookresearch/dino/blob/master/vision_transformer.py +# https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/drop.py + + +from torch import nn + + +def drop_path(x, drop_prob: float = 0.0, training: bool = False): + if drop_prob == 0.0 or not training: + return x + keep_prob = 1 - drop_prob + shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets + random_tensor = x.new_empty(shape).bernoulli_(keep_prob) + if keep_prob > 0.0: + random_tensor.div_(keep_prob) + output = x * random_tensor + return output + + +class DropPath(nn.Module): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).""" + + def __init__(self, drop_prob=None): + super(DropPath, self).__init__() + self.drop_prob = drop_prob + + def forward(self, x): + return drop_path(x, self.drop_prob, self.training) diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2_layers/layer_scale.py b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/layer_scale.py new file mode 100644 index 0000000000000000000000000000000000000000..76a4d0eedb1dc974a45e06fbe77ff3d909e36e55 --- /dev/null +++ b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/layer_scale.py @@ -0,0 +1,28 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +# Modified from: https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/vision_transformer.py#L103-L110 + +from typing import Union + +import torch +from torch import Tensor +from torch import nn + + +class LayerScale(nn.Module): + def __init__( + self, + dim: int, + init_values: Union[float, Tensor] = 1e-5, + inplace: bool = False, + ) -> None: + super().__init__() + self.inplace = inplace + self.gamma = nn.Parameter(init_values * torch.ones(dim)) + + def forward(self, x: Tensor) -> Tensor: + return x.mul_(self.gamma) if self.inplace else x * self.gamma diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2_layers/mlp.py b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/mlp.py new file mode 100644 index 0000000000000000000000000000000000000000..504987b635c9cd582a352fb2381228c9e6cd043c --- /dev/null +++ b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/mlp.py @@ -0,0 +1,41 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +# References: +# https://github.com/facebookresearch/dino/blob/master/vision_transformer.py +# https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/mlp.py + + +from typing import Callable, Optional + +from torch import Tensor, nn + + +class Mlp(nn.Module): + def __init__( + self, + in_features: int, + hidden_features: Optional[int] = None, + out_features: Optional[int] = None, + act_layer: Callable[..., nn.Module] = nn.GELU, + drop: float = 0.0, + bias: bool = True, + ) -> None: + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features, bias=bias) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features, bias=bias) + self.drop = nn.Dropout(drop) + + def forward(self, x: Tensor) -> Tensor: + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2_layers/patch_embed.py b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/patch_embed.py new file mode 100644 index 0000000000000000000000000000000000000000..f880c042ee6a33ef520c6a8c8a686c1d065b8f49 --- /dev/null +++ b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/patch_embed.py @@ -0,0 +1,89 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +# References: +# https://github.com/facebookresearch/dino/blob/master/vision_transformer.py +# https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/patch_embed.py + +from typing import Callable, Optional, Tuple, Union + +from torch import Tensor +import torch.nn as nn + + +def make_2tuple(x): + if isinstance(x, tuple): + assert len(x) == 2 + return x + + assert isinstance(x, int) + return (x, x) + + +class PatchEmbed(nn.Module): + """ + 2D image to patch embedding: (B,C,H,W) -> (B,N,D) + + Args: + img_size: Image size. + patch_size: Patch token size. + in_chans: Number of input image channels. + embed_dim: Number of linear projection output channels. + norm_layer: Normalization layer. + """ + + def __init__( + self, + img_size: Union[int, Tuple[int, int]] = 224, + patch_size: Union[int, Tuple[int, int]] = 16, + in_chans: int = 3, + embed_dim: int = 768, + norm_layer: Optional[Callable] = None, + flatten_embedding: bool = True, + ) -> None: + super().__init__() + + image_HW = make_2tuple(img_size) + patch_HW = make_2tuple(patch_size) + patch_grid_size = ( + image_HW[0] // patch_HW[0], + image_HW[1] // patch_HW[1], + ) + + self.img_size = image_HW + self.patch_size = patch_HW + self.patches_resolution = patch_grid_size + self.num_patches = patch_grid_size[0] * patch_grid_size[1] + + self.in_chans = in_chans + self.embed_dim = embed_dim + + self.flatten_embedding = flatten_embedding + + self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_HW, stride=patch_HW) + self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity() + + def forward(self, x: Tensor) -> Tensor: + _, _, H, W = x.shape + patch_H, patch_W = self.patch_size + + assert H % patch_H == 0, f"Input image height {H} is not a multiple of patch height {patch_H}" + assert W % patch_W == 0, f"Input image width {W} is not a multiple of patch width: {patch_W}" + + x = self.proj(x) # B C H W + H, W = x.size(2), x.size(3) + x = x.flatten(2).transpose(1, 2) # B HW C + x = self.norm(x) + if not self.flatten_embedding: + x = x.reshape(-1, H, W, self.embed_dim) # B H W C + return x + + def flops(self) -> float: + Ho, Wo = self.patches_resolution + flops = Ho * Wo * self.embed_dim * self.in_chans * (self.patch_size[0] * self.patch_size[1]) + if self.norm is not None: + flops += Ho * Wo * self.embed_dim + return flops diff --git a/ddepth_anything_v2/depth_anything_v2/dinov2_layers/swiglu_ffn.py b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/swiglu_ffn.py new file mode 100644 index 0000000000000000000000000000000000000000..155a3dd9f6f1a7d0f7bdf9c8f1981e58acb3b19c --- /dev/null +++ b/ddepth_anything_v2/depth_anything_v2/dinov2_layers/swiglu_ffn.py @@ -0,0 +1,63 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Callable, Optional + +from torch import Tensor, nn +import torch.nn.functional as F + + +class SwiGLUFFN(nn.Module): + def __init__( + self, + in_features: int, + hidden_features: Optional[int] = None, + out_features: Optional[int] = None, + act_layer: Callable[..., nn.Module] = None, + drop: float = 0.0, + bias: bool = True, + ) -> None: + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.w12 = nn.Linear(in_features, 2 * hidden_features, bias=bias) + self.w3 = nn.Linear(hidden_features, out_features, bias=bias) + + def forward(self, x: Tensor) -> Tensor: + x12 = self.w12(x) + x1, x2 = x12.chunk(2, dim=-1) + hidden = F.silu(x1) * x2 + return self.w3(hidden) + + +try: + from xformers.ops import SwiGLU + + XFORMERS_AVAILABLE = True +except ImportError: + SwiGLU = SwiGLUFFN + XFORMERS_AVAILABLE = False + + +class SwiGLUFFNFused(SwiGLU): + def __init__( + self, + in_features: int, + hidden_features: Optional[int] = None, + out_features: Optional[int] = None, + act_layer: Callable[..., nn.Module] = None, + drop: float = 0.0, + bias: bool = True, + ) -> None: + out_features = out_features or in_features + hidden_features = hidden_features or in_features + hidden_features = (int(hidden_features * 2 / 3) + 7) // 8 * 8 + super().__init__( + in_features=in_features, + hidden_features=hidden_features, + out_features=out_features, + bias=bias, + ) diff --git a/ddepth_anything_v2/depth_anything_v2/dpt.py b/ddepth_anything_v2/depth_anything_v2/dpt.py new file mode 100644 index 0000000000000000000000000000000000000000..acef20bfcf80318709dcf6c5e8c19b117394a06b --- /dev/null +++ b/ddepth_anything_v2/depth_anything_v2/dpt.py @@ -0,0 +1,221 @@ +import cv2 +import torch +import torch.nn as nn +import torch.nn.functional as F +from torchvision.transforms import Compose + +from .dinov2 import DINOv2 +from .util.blocks import FeatureFusionBlock, _make_scratch +from .util.transform import Resize, NormalizeImage, PrepareForNet + + +def _make_fusion_block(features, use_bn, size=None): + return FeatureFusionBlock( + features, + nn.ReLU(False), + deconv=False, + bn=use_bn, + expand=False, + align_corners=True, + size=size, + ) + + +class ConvBlock(nn.Module): + def __init__(self, in_feature, out_feature): + super().__init__() + + self.conv_block = nn.Sequential( + nn.Conv2d(in_feature, out_feature, kernel_size=3, stride=1, padding=1), + nn.BatchNorm2d(out_feature), + nn.ReLU(True) + ) + + def forward(self, x): + return self.conv_block(x) + + +class DPTHead(nn.Module): + def __init__( + self, + in_channels, + features=256, + use_bn=False, + out_channels=[256, 512, 1024, 1024], + use_clstoken=False + ): + super(DPTHead, self).__init__() + + self.use_clstoken = use_clstoken + + self.projects = nn.ModuleList([ + nn.Conv2d( + in_channels=in_channels, + out_channels=out_channel, + kernel_size=1, + stride=1, + padding=0, + ) for out_channel in out_channels + ]) + + self.resize_layers = nn.ModuleList([ + nn.ConvTranspose2d( + in_channels=out_channels[0], + out_channels=out_channels[0], + kernel_size=4, + stride=4, + padding=0), + nn.ConvTranspose2d( + in_channels=out_channels[1], + out_channels=out_channels[1], + kernel_size=2, + stride=2, + padding=0), + nn.Identity(), + nn.Conv2d( + in_channels=out_channels[3], + out_channels=out_channels[3], + kernel_size=3, + stride=2, + padding=1) + ]) + + if use_clstoken: + self.readout_projects = nn.ModuleList() + for _ in range(len(self.projects)): + self.readout_projects.append( + nn.Sequential( + nn.Linear(2 * in_channels, in_channels), + nn.GELU())) + + self.scratch = _make_scratch( + out_channels, + features, + groups=1, + expand=False, + ) + + self.scratch.stem_transpose = None + + self.scratch.refinenet1 = _make_fusion_block(features, use_bn) + self.scratch.refinenet2 = _make_fusion_block(features, use_bn) + self.scratch.refinenet3 = _make_fusion_block(features, use_bn) + self.scratch.refinenet4 = _make_fusion_block(features, use_bn) + + head_features_1 = features + head_features_2 = 32 + + self.scratch.output_conv1 = nn.Conv2d(head_features_1, head_features_1 // 2, kernel_size=3, stride=1, padding=1) + self.scratch.output_conv2 = nn.Sequential( + nn.Conv2d(head_features_1 // 2, head_features_2, kernel_size=3, stride=1, padding=1), + nn.ReLU(True), + nn.Conv2d(head_features_2, 1, kernel_size=1, stride=1, padding=0), + nn.ReLU(True), + nn.Identity(), + ) + + def forward(self, out_features, patch_h, patch_w): + out = [] + for i, x in enumerate(out_features): + if self.use_clstoken: + x, cls_token = x[0], x[1] + readout = cls_token.unsqueeze(1).expand_as(x) + x = self.readout_projects[i](torch.cat((x, readout), -1)) + else: + x = x[0] + + x = x.permute(0, 2, 1).reshape((x.shape[0], x.shape[-1], patch_h, patch_w)) + + x = self.projects[i](x) + x = self.resize_layers[i](x) + + out.append(x) + + layer_1, layer_2, layer_3, layer_4 = out + + layer_1_rn = self.scratch.layer1_rn(layer_1) + layer_2_rn = self.scratch.layer2_rn(layer_2) + layer_3_rn = self.scratch.layer3_rn(layer_3) + layer_4_rn = self.scratch.layer4_rn(layer_4) + + path_4 = self.scratch.refinenet4(layer_4_rn, size=layer_3_rn.shape[2:]) + path_3 = self.scratch.refinenet3(path_4, layer_3_rn, size=layer_2_rn.shape[2:]) + path_2 = self.scratch.refinenet2(path_3, layer_2_rn, size=layer_1_rn.shape[2:]) + path_1 = self.scratch.refinenet1(path_2, layer_1_rn) + + out = self.scratch.output_conv1(path_1) + out = F.interpolate(out, (int(patch_h * 14), int(patch_w * 14)), mode="bilinear", align_corners=True) + out = self.scratch.output_conv2(out) + + return out + + +class DepthAnythingV2(nn.Module): + def __init__( + self, + encoder='vitl', + features=256, + out_channels=[256, 512, 1024, 1024], + use_bn=False, + use_clstoken=False + ): + super(DepthAnythingV2, self).__init__() + + self.intermediate_layer_idx = { + 'vits': [2, 5, 8, 11], + 'vitb': [2, 5, 8, 11], + 'vitl': [4, 11, 17, 23], + 'vitg': [9, 19, 29, 39] + } + + self.encoder = encoder + self.pretrained = DINOv2(model_name=encoder) + + self.depth_head = DPTHead(self.pretrained.embed_dim, features, use_bn, out_channels=out_channels, use_clstoken=use_clstoken) + + def forward(self, x): + patch_h, patch_w = x.shape[-2] // 14, x.shape[-1] // 14 + + features = self.pretrained.get_intermediate_layers(x, self.intermediate_layer_idx[self.encoder], return_class_token=True) + + depth = self.depth_head(features, patch_h, patch_w) + depth = F.relu(depth) + + return depth.squeeze(1) + + @torch.no_grad() + def infer_image(self, raw_image, input_size=518): + image, (h, w) = self.image2tensor(raw_image, input_size) + + depth = self.forward(image) + + depth = F.interpolate(depth[:, None], (h, w), mode="bilinear", align_corners=True)[0, 0] + + return depth.cpu().numpy() + + def image2tensor(self, raw_image, input_size=518): + transform = Compose([ + Resize( + width=input_size, + height=input_size, + resize_target=False, + keep_aspect_ratio=True, + ensure_multiple_of=14, + resize_method='lower_bound', + image_interpolation_method=cv2.INTER_CUBIC, + ), + NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + PrepareForNet(), + ]) + + h, w = raw_image.shape[:2] + + image = cv2.cvtColor(raw_image, cv2.COLOR_BGR2RGB) / 255.0 + + image = transform({'image': image})['image'] + image = torch.from_numpy(image).unsqueeze(0) + + DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu' + image = image.to(DEVICE) + + return image, (h, w) diff --git a/ddepth_anything_v2/depth_anything_v2/util/__pycache__/blocks.cpython-310.pyc b/ddepth_anything_v2/depth_anything_v2/util/__pycache__/blocks.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c8cadd564e1863e1b1116f9ffec8e6b226a6ffcd Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/util/__pycache__/blocks.cpython-310.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/util/__pycache__/blocks.cpython-311.pyc b/ddepth_anything_v2/depth_anything_v2/util/__pycache__/blocks.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7acd3dcda8c83658ca3017686399ad7651abaf88 Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/util/__pycache__/blocks.cpython-311.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/util/__pycache__/blocks.cpython-312.pyc b/ddepth_anything_v2/depth_anything_v2/util/__pycache__/blocks.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b452a6503d835b5037dfe0d761defcded482ac83 Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/util/__pycache__/blocks.cpython-312.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/util/__pycache__/transform.cpython-310.pyc b/ddepth_anything_v2/depth_anything_v2/util/__pycache__/transform.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..387409fd67db569de54bd819e06830d772dae1cb Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/util/__pycache__/transform.cpython-310.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/util/__pycache__/transform.cpython-311.pyc b/ddepth_anything_v2/depth_anything_v2/util/__pycache__/transform.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8a6b2d0dd878330c2fd003b950e2665972f35413 Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/util/__pycache__/transform.cpython-311.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/util/__pycache__/transform.cpython-312.pyc b/ddepth_anything_v2/depth_anything_v2/util/__pycache__/transform.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b62d3ed77f824a7afe8a009b21b8a5a05dac270f Binary files /dev/null and b/ddepth_anything_v2/depth_anything_v2/util/__pycache__/transform.cpython-312.pyc differ diff --git a/ddepth_anything_v2/depth_anything_v2/util/blocks.py b/ddepth_anything_v2/depth_anything_v2/util/blocks.py new file mode 100644 index 0000000000000000000000000000000000000000..9fb66c03702d653f411c59ab9966916c348c7c6e --- /dev/null +++ b/ddepth_anything_v2/depth_anything_v2/util/blocks.py @@ -0,0 +1,148 @@ +import torch.nn as nn + + +def _make_scratch(in_shape, out_shape, groups=1, expand=False): + scratch = nn.Module() + + out_shape1 = out_shape + out_shape2 = out_shape + out_shape3 = out_shape + if len(in_shape) >= 4: + out_shape4 = out_shape + + if expand: + out_shape1 = out_shape + out_shape2 = out_shape * 2 + out_shape3 = out_shape * 4 + if len(in_shape) >= 4: + out_shape4 = out_shape * 8 + + scratch.layer1_rn = nn.Conv2d(in_shape[0], out_shape1, kernel_size=3, stride=1, padding=1, bias=False, groups=groups) + scratch.layer2_rn = nn.Conv2d(in_shape[1], out_shape2, kernel_size=3, stride=1, padding=1, bias=False, groups=groups) + scratch.layer3_rn = nn.Conv2d(in_shape[2], out_shape3, kernel_size=3, stride=1, padding=1, bias=False, groups=groups) + if len(in_shape) >= 4: + scratch.layer4_rn = nn.Conv2d(in_shape[3], out_shape4, kernel_size=3, stride=1, padding=1, bias=False, groups=groups) + + return scratch + + +class ResidualConvUnit(nn.Module): + """Residual convolution module. + """ + + def __init__(self, features, activation, bn): + """Init. + + Args: + features (int): number of features + """ + super().__init__() + + self.bn = bn + + self.groups=1 + + self.conv1 = nn.Conv2d(features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups) + + self.conv2 = nn.Conv2d(features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups) + + if self.bn == True: + self.bn1 = nn.BatchNorm2d(features) + self.bn2 = nn.BatchNorm2d(features) + + self.activation = activation + + self.skip_add = nn.quantized.FloatFunctional() + + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input + + Returns: + tensor: output + """ + + out = self.activation(x) + out = self.conv1(out) + if self.bn == True: + out = self.bn1(out) + + out = self.activation(out) + out = self.conv2(out) + if self.bn == True: + out = self.bn2(out) + + if self.groups > 1: + out = self.conv_merge(out) + + return self.skip_add.add(out, x) + + +class FeatureFusionBlock(nn.Module): + """Feature fusion block. + """ + + def __init__( + self, + features, + activation, + deconv=False, + bn=False, + expand=False, + align_corners=True, + size=None + ): + """Init. + + Args: + features (int): number of features + """ + super(FeatureFusionBlock, self).__init__() + + self.deconv = deconv + self.align_corners = align_corners + + self.groups=1 + + self.expand = expand + out_features = features + if self.expand == True: + out_features = features // 2 + + self.out_conv = nn.Conv2d(features, out_features, kernel_size=1, stride=1, padding=0, bias=True, groups=1) + + self.resConfUnit1 = ResidualConvUnit(features, activation, bn) + self.resConfUnit2 = ResidualConvUnit(features, activation, bn) + + self.skip_add = nn.quantized.FloatFunctional() + + self.size=size + + def forward(self, *xs, size=None): + """Forward pass. + + Returns: + tensor: output + """ + output = xs[0] + + if len(xs) == 2: + res = self.resConfUnit1(xs[1]) + output = self.skip_add.add(output, res) + + output = self.resConfUnit2(output) + + if (size is None) and (self.size is None): + modifier = {"scale_factor": 2} + elif size is None: + modifier = {"size": self.size} + else: + modifier = {"size": size} + + output = nn.functional.interpolate(output, **modifier, mode="bilinear", align_corners=self.align_corners) + + output = self.out_conv(output) + + return output diff --git a/ddepth_anything_v2/depth_anything_v2/util/transform.py b/ddepth_anything_v2/depth_anything_v2/util/transform.py new file mode 100644 index 0000000000000000000000000000000000000000..1cce234c86177e1ad5c84c81c7c1afb16877c9da --- /dev/null +++ b/ddepth_anything_v2/depth_anything_v2/util/transform.py @@ -0,0 +1,158 @@ +import numpy as np +import cv2 + + +class Resize(object): + """Resize sample to given size (width, height). + """ + + def __init__( + self, + width, + height, + resize_target=True, + keep_aspect_ratio=False, + ensure_multiple_of=1, + resize_method="lower_bound", + image_interpolation_method=cv2.INTER_AREA, + ): + """Init. + + Args: + width (int): desired output width + height (int): desired output height + resize_target (bool, optional): + True: Resize the full sample (image, mask, target). + False: Resize image only. + Defaults to True. + keep_aspect_ratio (bool, optional): + True: Keep the aspect ratio of the input sample. + Output sample might not have the given width and height, and + resize behaviour depends on the parameter 'resize_method'. + Defaults to False. + ensure_multiple_of (int, optional): + Output width and height is constrained to be multiple of this parameter. + Defaults to 1. + resize_method (str, optional): + "lower_bound": Output will be at least as large as the given size. + "upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.) + "minimal": Scale as least as possible. (Output size might be smaller than given size.) + Defaults to "lower_bound". + """ + self.__width = width + self.__height = height + + self.__resize_target = resize_target + self.__keep_aspect_ratio = keep_aspect_ratio + self.__multiple_of = ensure_multiple_of + self.__resize_method = resize_method + self.__image_interpolation_method = image_interpolation_method + + def constrain_to_multiple_of(self, x, min_val=0, max_val=None): + y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int) + + if max_val is not None and y > max_val: + y = (np.floor(x / self.__multiple_of) * self.__multiple_of).astype(int) + + if y < min_val: + y = (np.ceil(x / self.__multiple_of) * self.__multiple_of).astype(int) + + return y + + def get_size(self, width, height): + # determine new height and width + scale_height = self.__height / height + scale_width = self.__width / width + + if self.__keep_aspect_ratio: + if self.__resize_method == "lower_bound": + # scale such that output size is lower bound + if scale_width > scale_height: + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + elif self.__resize_method == "upper_bound": + # scale such that output size is upper bound + if scale_width < scale_height: + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + elif self.__resize_method == "minimal": + # scale as least as possbile + if abs(1 - scale_width) < abs(1 - scale_height): + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + else: + raise ValueError(f"resize_method {self.__resize_method} not implemented") + + if self.__resize_method == "lower_bound": + new_height = self.constrain_to_multiple_of(scale_height * height, min_val=self.__height) + new_width = self.constrain_to_multiple_of(scale_width * width, min_val=self.__width) + elif self.__resize_method == "upper_bound": + new_height = self.constrain_to_multiple_of(scale_height * height, max_val=self.__height) + new_width = self.constrain_to_multiple_of(scale_width * width, max_val=self.__width) + elif self.__resize_method == "minimal": + new_height = self.constrain_to_multiple_of(scale_height * height) + new_width = self.constrain_to_multiple_of(scale_width * width) + else: + raise ValueError(f"resize_method {self.__resize_method} not implemented") + + return (new_width, new_height) + + def __call__(self, sample): + width, height = self.get_size(sample["image"].shape[1], sample["image"].shape[0]) + + # resize sample + sample["image"] = cv2.resize(sample["image"], (width, height), interpolation=self.__image_interpolation_method) + + if self.__resize_target: + if "depth" in sample: + sample["depth"] = cv2.resize(sample["depth"], (width, height), interpolation=cv2.INTER_NEAREST) + + if "mask" in sample: + sample["mask"] = cv2.resize(sample["mask"].astype(np.float32), (width, height), interpolation=cv2.INTER_NEAREST) + + return sample + + +class NormalizeImage(object): + """Normlize image by given mean and std. + """ + + def __init__(self, mean, std): + self.__mean = mean + self.__std = std + + def __call__(self, sample): + sample["image"] = (sample["image"] - self.__mean) / self.__std + + return sample + + +class PrepareForNet(object): + """Prepare sample for usage as network input. + """ + + def __init__(self): + pass + + def __call__(self, sample): + image = np.transpose(sample["image"], (2, 0, 1)) + sample["image"] = np.ascontiguousarray(image).astype(np.float32) + + if "depth" in sample: + depth = sample["depth"].astype(np.float32) + sample["depth"] = np.ascontiguousarray(depth) + + if "mask" in sample: + sample["mask"] = sample["mask"].astype(np.float32) + sample["mask"] = np.ascontiguousarray(sample["mask"]) + + return sample \ No newline at end of file diff --git a/ddepth_anything_v2/metric_depth/README.md b/ddepth_anything_v2/metric_depth/README.md new file mode 100644 index 0000000000000000000000000000000000000000..42bcd1aec24f3926863146a99e68f1e2c82085da --- /dev/null +++ b/ddepth_anything_v2/metric_depth/README.md @@ -0,0 +1,114 @@ +# Depth Anything V2 for Metric Depth Estimation + +![teaser](./assets/compare_zoedepth.png) + +We here provide a simple codebase to fine-tune our Depth Anything V2 pre-trained encoder for metric depth estimation. Built on our powerful encoder, we use a simple DPT head to regress the depth. We fine-tune our pre-trained encoder on synthetic Hypersim / Virtual KITTI datasets for indoor / outdoor metric depth estimation, respectively. + + +# Pre-trained Models + +We provide **six metric depth models** of three scales for indoor and outdoor scenes, respectively. + +| Base Model | Params | Indoor (Hypersim) | Outdoor (Virtual KITTI 2) | +|:-|-:|:-:|:-:| +| Depth-Anything-V2-Small | 24.8M | [Download](https://huggingface.co/depth-anything/Depth-Anything-V2-Metric-Hypersim-Small/resolve/main/depth_anything_v2_metric_hypersim_vits.pth?download=true) | [Download](https://huggingface.co/depth-anything/Depth-Anything-V2-Metric-VKITTI-Small/resolve/main/depth_anything_v2_metric_vkitti_vits.pth?download=true) | +| Depth-Anything-V2-Base | 97.5M | [Download](https://huggingface.co/depth-anything/Depth-Anything-V2-Metric-Hypersim-Base/resolve/main/depth_anything_v2_metric_hypersim_vitb.pth?download=true) | [Download](https://huggingface.co/depth-anything/Depth-Anything-V2-Metric-VKITTI-Base/resolve/main/depth_anything_v2_metric_vkitti_vitb.pth?download=true) | +| Depth-Anything-V2-Large | 335.3M | [Download](https://huggingface.co/depth-anything/Depth-Anything-V2-Metric-Hypersim-Large/resolve/main/depth_anything_v2_metric_hypersim_vitl.pth?download=true) | [Download](https://huggingface.co/depth-anything/Depth-Anything-V2-Metric-VKITTI-Large/resolve/main/depth_anything_v2_metric_vkitti_vitl.pth?download=true) | + +*We recommend to first try our larger models (if computational cost is affordable) and the indoor version.* + +## Usage + +### Prepraration + +```bash +git clone https://github.com/DepthAnything/Depth-Anything-V2 +cd Depth-Anything-V2/metric_depth +pip install -r requirements.txt +``` + +Download the checkpoints listed [here](#pre-trained-models) and put them under the `checkpoints` directory. + +### Use our models +```python +import cv2 +import torch + +from depth_anything_v2.dpt import DepthAnythingV2 + +model_configs = { + 'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]}, + 'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]}, + 'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]} +} + +encoder = 'vitl' # or 'vits', 'vitb' +dataset = 'hypersim' # 'hypersim' for indoor model, 'vkitti' for outdoor model +max_depth = 20 # 20 for indoor model, 80 for outdoor model + +model = DepthAnythingV2(**{**model_configs[encoder], 'max_depth': max_depth}) +model.load_state_dict(torch.load(f'checkpoints/depth_anything_v2_metric_{dataset}_{encoder}.pth', map_location='cpu')) +model.eval() + +raw_img = cv2.imread('your/image/path') +depth = model.infer_image(raw_img) # HxW depth map in meters in numpy +``` + +### Running script on images + +Here, we take the `vitl` encoder as an example. You can also use `vitb` or `vits` encoders. + +```bash +# indoor scenes +python run.py \ + --encoder vitl \ + --load-from checkpoints/depth_anything_v2_metric_hypersim_vitl.pth \ + --max-depth 20 \ + --img-path --outdir [--input-size ] [--save-numpy] + +# outdoor scenes +python run.py \ + --encoder vitl \ + --load-from checkpoints/depth_anything_v2_metric_vkitti_vitl.pth \ + --max-depth 80 \ + --img-path --outdir [--input-size ] [--save-numpy] +``` + +### Project 2D images to point clouds: + +```bash +python depth_to_pointcloud.py \ + --encoder vitl \ + --load-from checkpoints/depth_anything_v2_metric_hypersim_vitl.pth \ + --max-depth 20 \ + --img-path --outdir +``` + +### Reproduce training + +Please first prepare the [Hypersim](https://github.com/apple/ml-hypersim) and [Virtual KITTI 2](https://europe.naverlabs.com/research/computer-vision/proxy-virtual-worlds-vkitti-2/) datasets. Then: + +```bash +bash dist_train.sh +``` + + +## Citation + +If you find this project useful, please consider citing: + +```bibtex +@article{depth_anything_v2, + title={Depth Anything V2}, + author={Yang, Lihe and Kang, Bingyi and Huang, Zilong and Zhao, Zhen and Xu, Xiaogang and Feng, Jiashi and Zhao, Hengshuang}, + journal={arXiv:2406.09414}, + year={2024} +} + +@inproceedings{depth_anything_v1, + title={Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data}, + author={Yang, Lihe and Kang, Bingyi and Huang, Zilong and Xu, Xiaogang and Feng, Jiashi and Zhao, Hengshuang}, + booktitle={CVPR}, + year={2024} +} +``` diff --git a/ddepth_anything_v2/metric_depth/dataset/hypersim.py b/ddepth_anything_v2/metric_depth/dataset/hypersim.py new file mode 100644 index 0000000000000000000000000000000000000000..b7daa865bd3ceac9e4fb4b7a3e2120daf1e297dd --- /dev/null +++ b/ddepth_anything_v2/metric_depth/dataset/hypersim.py @@ -0,0 +1,74 @@ +import cv2 +import h5py +import numpy as np +import torch +from torch.utils.data import Dataset +from torchvision.transforms import Compose + +from dataset.transform import Resize, NormalizeImage, PrepareForNet, Crop + + +def hypersim_distance_to_depth(npyDistance): + intWidth, intHeight, fltFocal = 1024, 768, 886.81 + + npyImageplaneX = np.linspace((-0.5 * intWidth) + 0.5, (0.5 * intWidth) - 0.5, intWidth).reshape( + 1, intWidth).repeat(intHeight, 0).astype(np.float32)[:, :, None] + npyImageplaneY = np.linspace((-0.5 * intHeight) + 0.5, (0.5 * intHeight) - 0.5, + intHeight).reshape(intHeight, 1).repeat(intWidth, 1).astype(np.float32)[:, :, None] + npyImageplaneZ = np.full([intHeight, intWidth, 1], fltFocal, np.float32) + npyImageplane = np.concatenate( + [npyImageplaneX, npyImageplaneY, npyImageplaneZ], 2) + + npyDepth = npyDistance / np.linalg.norm(npyImageplane, 2, 2) * fltFocal + return npyDepth + + +class Hypersim(Dataset): + def __init__(self, filelist_path, mode, size=(518, 518)): + + self.mode = mode + self.size = size + + with open(filelist_path, 'r') as f: + self.filelist = f.read().splitlines() + + net_w, net_h = size + self.transform = Compose([ + Resize( + width=net_w, + height=net_h, + resize_target=True if mode == 'train' else False, + keep_aspect_ratio=True, + ensure_multiple_of=14, + resize_method='lower_bound', + image_interpolation_method=cv2.INTER_CUBIC, + ), + NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + PrepareForNet(), + ] + ([Crop(size[0])] if self.mode == 'train' else [])) + + def __getitem__(self, item): + img_path = self.filelist[item].split(' ')[0] + depth_path = self.filelist[item].split(' ')[1] + + image = cv2.imread(img_path) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0 + + depth_fd = h5py.File(depth_path, "r") + distance_meters = np.array(depth_fd['dataset']) + depth = hypersim_distance_to_depth(distance_meters) + + sample = self.transform({'image': image, 'depth': depth}) + + sample['image'] = torch.from_numpy(sample['image']) + sample['depth'] = torch.from_numpy(sample['depth']) + + sample['valid_mask'] = (torch.isnan(sample['depth']) == 0) + sample['depth'][sample['valid_mask'] == 0] = 0 + + sample['image_path'] = self.filelist[item].split(' ')[0] + + return sample + + def __len__(self): + return len(self.filelist) \ No newline at end of file diff --git a/ddepth_anything_v2/metric_depth/dataset/kitti.py b/ddepth_anything_v2/metric_depth/dataset/kitti.py new file mode 100644 index 0000000000000000000000000000000000000000..3e1bdcc8dc10d59ae3cf526b004faca52a1e3108 --- /dev/null +++ b/ddepth_anything_v2/metric_depth/dataset/kitti.py @@ -0,0 +1,57 @@ +import cv2 +import torch +from torch.utils.data import Dataset +from torchvision.transforms import Compose + +from dataset.transform import Resize, NormalizeImage, PrepareForNet + + +class KITTI(Dataset): + def __init__(self, filelist_path, mode, size=(518, 518)): + if mode != 'val': + raise NotImplementedError + + self.mode = mode + self.size = size + + with open(filelist_path, 'r') as f: + self.filelist = f.read().splitlines() + + net_w, net_h = size + self.transform = Compose([ + Resize( + width=net_w, + height=net_h, + resize_target=True if mode == 'train' else False, + keep_aspect_ratio=True, + ensure_multiple_of=14, + resize_method='lower_bound', + image_interpolation_method=cv2.INTER_CUBIC, + ), + NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + PrepareForNet(), + ]) + + def __getitem__(self, item): + img_path = self.filelist[item].split(' ')[0] + depth_path = self.filelist[item].split(' ')[1] + + image = cv2.imread(img_path) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0 + + depth = cv2.imread(depth_path, cv2.IMREAD_UNCHANGED).astype('float32') + + sample = self.transform({'image': image, 'depth': depth}) + + sample['image'] = torch.from_numpy(sample['image']) + sample['depth'] = torch.from_numpy(sample['depth']) + sample['depth'] = sample['depth'] / 256.0 # convert in meters + + sample['valid_mask'] = sample['depth'] > 0 + + sample['image_path'] = self.filelist[item].split(' ')[0] + + return sample + + def __len__(self): + return len(self.filelist) \ No newline at end of file diff --git a/ddepth_anything_v2/metric_depth/dataset/transform.py b/ddepth_anything_v2/metric_depth/dataset/transform.py new file mode 100644 index 0000000000000000000000000000000000000000..c8e4c18e0cb9887ab02d1a67bc38a71a9faaf328 --- /dev/null +++ b/ddepth_anything_v2/metric_depth/dataset/transform.py @@ -0,0 +1,277 @@ +import cv2 +import math +import numpy as np +import torch +import torch.nn.functional as F + + +def apply_min_size(sample, size, image_interpolation_method=cv2.INTER_AREA): + """Rezise the sample to ensure the given size. Keeps aspect ratio. + + Args: + sample (dict): sample + size (tuple): image size + + Returns: + tuple: new size + """ + shape = list(sample["disparity"].shape) + + if shape[0] >= size[0] and shape[1] >= size[1]: + return sample + + scale = [0, 0] + scale[0] = size[0] / shape[0] + scale[1] = size[1] / shape[1] + + scale = max(scale) + + shape[0] = math.ceil(scale * shape[0]) + shape[1] = math.ceil(scale * shape[1]) + + # resize + sample["image"] = cv2.resize( + sample["image"], tuple(shape[::-1]), interpolation=image_interpolation_method + ) + + sample["disparity"] = cv2.resize( + sample["disparity"], tuple(shape[::-1]), interpolation=cv2.INTER_NEAREST + ) + sample["mask"] = cv2.resize( + sample["mask"].astype(np.float32), + tuple(shape[::-1]), + interpolation=cv2.INTER_NEAREST, + ) + sample["mask"] = sample["mask"].astype(bool) + + return tuple(shape) + + +class Resize(object): + """Resize sample to given size (width, height). + """ + + def __init__( + self, + width, + height, + resize_target=True, + keep_aspect_ratio=False, + ensure_multiple_of=1, + resize_method="lower_bound", + image_interpolation_method=cv2.INTER_AREA, + ): + """Init. + + Args: + width (int): desired output width + height (int): desired output height + resize_target (bool, optional): + True: Resize the full sample (image, mask, target). + False: Resize image only. + Defaults to True. + keep_aspect_ratio (bool, optional): + True: Keep the aspect ratio of the input sample. + Output sample might not have the given width and height, and + resize behaviour depends on the parameter 'resize_method'. + Defaults to False. + ensure_multiple_of (int, optional): + Output width and height is constrained to be multiple of this parameter. + Defaults to 1. + resize_method (str, optional): + "lower_bound": Output will be at least as large as the given size. + "upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.) + "minimal": Scale as least as possible. (Output size might be smaller than given size.) + Defaults to "lower_bound". + """ + self.__width = width + self.__height = height + + self.__resize_target = resize_target + self.__keep_aspect_ratio = keep_aspect_ratio + self.__multiple_of = ensure_multiple_of + self.__resize_method = resize_method + self.__image_interpolation_method = image_interpolation_method + + def constrain_to_multiple_of(self, x, min_val=0, max_val=None): + y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int) + + if max_val is not None and y > max_val: + y = (np.floor(x / self.__multiple_of) * self.__multiple_of).astype(int) + + if y < min_val: + y = (np.ceil(x / self.__multiple_of) * self.__multiple_of).astype(int) + + return y + + def get_size(self, width, height): + # determine new height and width + scale_height = self.__height / height + scale_width = self.__width / width + + if self.__keep_aspect_ratio: + if self.__resize_method == "lower_bound": + # scale such that output size is lower bound + if scale_width > scale_height: + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + elif self.__resize_method == "upper_bound": + # scale such that output size is upper bound + if scale_width < scale_height: + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + elif self.__resize_method == "minimal": + # scale as least as possbile + if abs(1 - scale_width) < abs(1 - scale_height): + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + else: + raise ValueError( + f"resize_method {self.__resize_method} not implemented" + ) + + if self.__resize_method == "lower_bound": + new_height = self.constrain_to_multiple_of( + scale_height * height, min_val=self.__height + ) + new_width = self.constrain_to_multiple_of( + scale_width * width, min_val=self.__width + ) + elif self.__resize_method == "upper_bound": + new_height = self.constrain_to_multiple_of( + scale_height * height, max_val=self.__height + ) + new_width = self.constrain_to_multiple_of( + scale_width * width, max_val=self.__width + ) + elif self.__resize_method == "minimal": + new_height = self.constrain_to_multiple_of(scale_height * height) + new_width = self.constrain_to_multiple_of(scale_width * width) + else: + raise ValueError(f"resize_method {self.__resize_method} not implemented") + + return (new_width, new_height) + + def __call__(self, sample): + width, height = self.get_size( + sample["image"].shape[1], sample["image"].shape[0] + ) + + # resize sample + sample["image"] = cv2.resize( + sample["image"], + (width, height), + interpolation=self.__image_interpolation_method, + ) + + if self.__resize_target: + if "disparity" in sample: + sample["disparity"] = cv2.resize( + sample["disparity"], + (width, height), + interpolation=cv2.INTER_NEAREST, + ) + + if "depth" in sample: + sample["depth"] = cv2.resize( + sample["depth"], (width, height), interpolation=cv2.INTER_NEAREST + ) + + if "semseg_mask" in sample: + # sample["semseg_mask"] = cv2.resize( + # sample["semseg_mask"], (width, height), interpolation=cv2.INTER_NEAREST + # ) + sample["semseg_mask"] = F.interpolate(torch.from_numpy(sample["semseg_mask"]).float()[None, None, ...], (height, width), mode='nearest').numpy()[0, 0] + + if "mask" in sample: + sample["mask"] = cv2.resize( + sample["mask"].astype(np.float32), + (width, height), + interpolation=cv2.INTER_NEAREST, + ) + # sample["mask"] = sample["mask"].astype(bool) + + # print(sample['image'].shape, sample['depth'].shape) + return sample + + +class NormalizeImage(object): + """Normlize image by given mean and std. + """ + + def __init__(self, mean, std): + self.__mean = mean + self.__std = std + + def __call__(self, sample): + sample["image"] = (sample["image"] - self.__mean) / self.__std + + return sample + + +class PrepareForNet(object): + """Prepare sample for usage as network input. + """ + + def __init__(self): + pass + + def __call__(self, sample): + image = np.transpose(sample["image"], (2, 0, 1)) + sample["image"] = np.ascontiguousarray(image).astype(np.float32) + + if "mask" in sample: + sample["mask"] = sample["mask"].astype(np.float32) + sample["mask"] = np.ascontiguousarray(sample["mask"]) + + if "depth" in sample: + depth = sample["depth"].astype(np.float32) + sample["depth"] = np.ascontiguousarray(depth) + + if "semseg_mask" in sample: + sample["semseg_mask"] = sample["semseg_mask"].astype(np.float32) + sample["semseg_mask"] = np.ascontiguousarray(sample["semseg_mask"]) + + return sample + + +class Crop(object): + """Crop sample for batch-wise training. Image is of shape CxHxW + """ + + def __init__(self, size): + if isinstance(size, int): + self.size = (size, size) + else: + self.size = size + + def __call__(self, sample): + h, w = sample['image'].shape[-2:] + assert h >= self.size[0] and w >= self.size[1], 'Wrong size' + + h_start = np.random.randint(0, h - self.size[0] + 1) + w_start = np.random.randint(0, w - self.size[1] + 1) + h_end = h_start + self.size[0] + w_end = w_start + self.size[1] + + sample['image'] = sample['image'][:, h_start: h_end, w_start: w_end] + + if "depth" in sample: + sample["depth"] = sample["depth"][h_start: h_end, w_start: w_end] + + if "mask" in sample: + sample["mask"] = sample["mask"][h_start: h_end, w_start: w_end] + + if "semseg_mask" in sample: + sample["semseg_mask"] = sample["semseg_mask"][h_start: h_end, w_start: w_end] + + return sample \ No newline at end of file diff --git a/ddepth_anything_v2/metric_depth/dataset/vkitti2.py b/ddepth_anything_v2/metric_depth/dataset/vkitti2.py new file mode 100644 index 0000000000000000000000000000000000000000..2be840cc6e2ff9ec0a895c66ddeb80ef17e30296 --- /dev/null +++ b/ddepth_anything_v2/metric_depth/dataset/vkitti2.py @@ -0,0 +1,54 @@ +import cv2 +import torch +from torch.utils.data import Dataset +from torchvision.transforms import Compose + +from dataset.transform import Resize, NormalizeImage, PrepareForNet, Crop + + +class VKITTI2(Dataset): + def __init__(self, filelist_path, mode, size=(518, 518)): + + self.mode = mode + self.size = size + + with open(filelist_path, 'r') as f: + self.filelist = f.read().splitlines() + + net_w, net_h = size + self.transform = Compose([ + Resize( + width=net_w, + height=net_h, + resize_target=True if mode == 'train' else False, + keep_aspect_ratio=True, + ensure_multiple_of=14, + resize_method='lower_bound', + image_interpolation_method=cv2.INTER_CUBIC, + ), + NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + PrepareForNet(), + ] + ([Crop(size[0])] if self.mode == 'train' else [])) + + def __getitem__(self, item): + img_path = self.filelist[item].split(' ')[0] + depth_path = self.filelist[item].split(' ')[1] + + image = cv2.imread(img_path) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0 + + depth = cv2.imread(depth_path, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH) / 100.0 # cm to m + + sample = self.transform({'image': image, 'depth': depth}) + + sample['image'] = torch.from_numpy(sample['image']) + sample['depth'] = torch.from_numpy(sample['depth']) + + sample['valid_mask'] = (sample['depth'] <= 80) + + sample['image_path'] = self.filelist[item].split(' ')[0] + + return sample + + def __len__(self): + return len(self.filelist) \ No newline at end of file diff --git a/ddepth_anything_v2/metric_depth/depth_anything_v2/dinov2.py b/ddepth_anything_v2/metric_depth/depth_anything_v2/dinov2.py new file mode 100644 index 0000000000000000000000000000000000000000..b336796480cd9c25afa869c79ee8f19af88b11c9 --- /dev/null +++ b/ddepth_anything_v2/metric_depth/depth_anything_v2/dinov2.py @@ -0,0 +1,415 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the Apache License, Version 2.0 +# found in the LICENSE file in the root directory of this source tree. + +# References: +# https://github.com/facebookresearch/dino/blob/main/vision_transformer.py +# https://github.com/rwightman/pytorch-image-models/tree/master/timm/models/vision_transformer.py + +from functools import partial +import math +import logging +from typing import Sequence, Tuple, Union, Callable + +import torch +import torch.nn as nn +import torch.utils.checkpoint +from torch.nn.init import trunc_normal_ + +from .dinov2_layers import Mlp, PatchEmbed, SwiGLUFFNFused, MemEffAttention, NestedTensorBlock as Block + + +logger = logging.getLogger("dinov2") + + +def named_apply(fn: Callable, module: nn.Module, name="", depth_first=True, include_root=False) -> nn.Module: + if not depth_first and include_root: + fn(module=module, name=name) + for child_name, child_module in module.named_children(): + child_name = ".".join((name, child_name)) if name else child_name + named_apply(fn=fn, module=child_module, name=child_name, depth_first=depth_first, include_root=True) + if depth_first and include_root: + fn(module=module, name=name) + return module + + +class BlockChunk(nn.ModuleList): + def forward(self, x): + for b in self: + x = b(x) + return x + + +class DinoVisionTransformer(nn.Module): + def __init__( + self, + img_size=224, + patch_size=16, + in_chans=3, + embed_dim=768, + depth=12, + num_heads=12, + mlp_ratio=4.0, + qkv_bias=True, + ffn_bias=True, + proj_bias=True, + drop_path_rate=0.0, + drop_path_uniform=False, + init_values=None, # for layerscale: None or 0 => no layerscale + embed_layer=PatchEmbed, + act_layer=nn.GELU, + block_fn=Block, + ffn_layer="mlp", + block_chunks=1, + num_register_tokens=0, + interpolate_antialias=False, + interpolate_offset=0.1, + ): + """ + Args: + img_size (int, tuple): input image size + patch_size (int, tuple): patch size + in_chans (int): number of input channels + embed_dim (int): embedding dimension + depth (int): depth of transformer + num_heads (int): number of attention heads + mlp_ratio (int): ratio of mlp hidden dim to embedding dim + qkv_bias (bool): enable bias for qkv if True + proj_bias (bool): enable bias for proj in attn if True + ffn_bias (bool): enable bias for ffn if True + drop_path_rate (float): stochastic depth rate + drop_path_uniform (bool): apply uniform drop rate across blocks + weight_init (str): weight init scheme + init_values (float): layer-scale init values + embed_layer (nn.Module): patch embedding layer + act_layer (nn.Module): MLP activation layer + block_fn (nn.Module): transformer block class + ffn_layer (str): "mlp", "swiglu", "swiglufused" or "identity" + block_chunks: (int) split block sequence into block_chunks units for FSDP wrap + num_register_tokens: (int) number of extra cls tokens (so-called "registers") + interpolate_antialias: (str) flag to apply anti-aliasing when interpolating positional embeddings + interpolate_offset: (float) work-around offset to apply when interpolating positional embeddings + """ + super().__init__() + norm_layer = partial(nn.LayerNorm, eps=1e-6) + + self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models + self.num_tokens = 1 + self.n_blocks = depth + self.num_heads = num_heads + self.patch_size = patch_size + self.num_register_tokens = num_register_tokens + self.interpolate_antialias = interpolate_antialias + self.interpolate_offset = interpolate_offset + + self.patch_embed = embed_layer(img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim) + num_patches = self.patch_embed.num_patches + + self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim)) + self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + self.num_tokens, embed_dim)) + assert num_register_tokens >= 0 + self.register_tokens = ( + nn.Parameter(torch.zeros(1, num_register_tokens, embed_dim)) if num_register_tokens else None + ) + + if drop_path_uniform is True: + dpr = [drop_path_rate] * depth + else: + dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)] # stochastic depth decay rule + + if ffn_layer == "mlp": + logger.info("using MLP layer as FFN") + ffn_layer = Mlp + elif ffn_layer == "swiglufused" or ffn_layer == "swiglu": + logger.info("using SwiGLU layer as FFN") + ffn_layer = SwiGLUFFNFused + elif ffn_layer == "identity": + logger.info("using Identity layer as FFN") + + def f(*args, **kwargs): + return nn.Identity() + + ffn_layer = f + else: + raise NotImplementedError + + blocks_list = [ + block_fn( + dim=embed_dim, + num_heads=num_heads, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + proj_bias=proj_bias, + ffn_bias=ffn_bias, + drop_path=dpr[i], + norm_layer=norm_layer, + act_layer=act_layer, + ffn_layer=ffn_layer, + init_values=init_values, + ) + for i in range(depth) + ] + if block_chunks > 0: + self.chunked_blocks = True + chunked_blocks = [] + chunksize = depth // block_chunks + for i in range(0, depth, chunksize): + # this is to keep the block index consistent if we chunk the block list + chunked_blocks.append([nn.Identity()] * i + blocks_list[i : i + chunksize]) + self.blocks = nn.ModuleList([BlockChunk(p) for p in chunked_blocks]) + else: + self.chunked_blocks = False + self.blocks = nn.ModuleList(blocks_list) + + self.norm = norm_layer(embed_dim) + self.head = nn.Identity() + + self.mask_token = nn.Parameter(torch.zeros(1, embed_dim)) + + self.init_weights() + + def init_weights(self): + trunc_normal_(self.pos_embed, std=0.02) + nn.init.normal_(self.cls_token, std=1e-6) + if self.register_tokens is not None: + nn.init.normal_(self.register_tokens, std=1e-6) + named_apply(init_weights_vit_timm, self) + + def interpolate_pos_encoding(self, x, w, h): + previous_dtype = x.dtype + npatch = x.shape[1] - 1 + N = self.pos_embed.shape[1] - 1 + if npatch == N and w == h: + return self.pos_embed + pos_embed = self.pos_embed.float() + class_pos_embed = pos_embed[:, 0] + patch_pos_embed = pos_embed[:, 1:] + dim = x.shape[-1] + w0 = w // self.patch_size + h0 = h // self.patch_size + # we add a small number to avoid floating point error in the interpolation + # see discussion at https://github.com/facebookresearch/dino/issues/8 + # DINOv2 with register modify the interpolate_offset from 0.1 to 0.0 + w0, h0 = w0 + self.interpolate_offset, h0 + self.interpolate_offset + # w0, h0 = w0 + 0.1, h0 + 0.1 + + sqrt_N = math.sqrt(N) + sx, sy = float(w0) / sqrt_N, float(h0) / sqrt_N + patch_pos_embed = nn.functional.interpolate( + patch_pos_embed.reshape(1, int(sqrt_N), int(sqrt_N), dim).permute(0, 3, 1, 2), + scale_factor=(sx, sy), + # (int(w0), int(h0)), # to solve the upsampling shape issue + mode="bicubic", + antialias=self.interpolate_antialias + ) + + assert int(w0) == patch_pos_embed.shape[-2] + assert int(h0) == patch_pos_embed.shape[-1] + patch_pos_embed = patch_pos_embed.permute(0, 2, 3, 1).view(1, -1, dim) + return torch.cat((class_pos_embed.unsqueeze(0), patch_pos_embed), dim=1).to(previous_dtype) + + def prepare_tokens_with_masks(self, x, masks=None): + B, nc, w, h = x.shape + x = self.patch_embed(x) + if masks is not None: + x = torch.where(masks.unsqueeze(-1), self.mask_token.to(x.dtype).unsqueeze(0), x) + + x = torch.cat((self.cls_token.expand(x.shape[0], -1, -1), x), dim=1) + x = x + self.interpolate_pos_encoding(x, w, h) + + if self.register_tokens is not None: + x = torch.cat( + ( + x[:, :1], + self.register_tokens.expand(x.shape[0], -1, -1), + x[:, 1:], + ), + dim=1, + ) + + return x + + def forward_features_list(self, x_list, masks_list): + x = [self.prepare_tokens_with_masks(x, masks) for x, masks in zip(x_list, masks_list)] + for blk in self.blocks: + x = blk(x) + + all_x = x + output = [] + for x, masks in zip(all_x, masks_list): + x_norm = self.norm(x) + output.append( + { + "x_norm_clstoken": x_norm[:, 0], + "x_norm_regtokens": x_norm[:, 1 : self.num_register_tokens + 1], + "x_norm_patchtokens": x_norm[:, self.num_register_tokens + 1 :], + "x_prenorm": x, + "masks": masks, + } + ) + return output + + def forward_features(self, x, masks=None): + if isinstance(x, list): + return self.forward_features_list(x, masks) + + x = self.prepare_tokens_with_masks(x, masks) + + for blk in self.blocks: + x = blk(x) + + x_norm = self.norm(x) + return { + "x_norm_clstoken": x_norm[:, 0], + "x_norm_regtokens": x_norm[:, 1 : self.num_register_tokens + 1], + "x_norm_patchtokens": x_norm[:, self.num_register_tokens + 1 :], + "x_prenorm": x, + "masks": masks, + } + + def _get_intermediate_layers_not_chunked(self, x, n=1): + x = self.prepare_tokens_with_masks(x) + # If n is an int, take the n last blocks. If it's a list, take them + output, total_block_len = [], len(self.blocks) + blocks_to_take = range(total_block_len - n, total_block_len) if isinstance(n, int) else n + for i, blk in enumerate(self.blocks): + x = blk(x) + if i in blocks_to_take: + output.append(x) + assert len(output) == len(blocks_to_take), f"only {len(output)} / {len(blocks_to_take)} blocks found" + return output + + def _get_intermediate_layers_chunked(self, x, n=1): + x = self.prepare_tokens_with_masks(x) + output, i, total_block_len = [], 0, len(self.blocks[-1]) + # If n is an int, take the n last blocks. If it's a list, take them + blocks_to_take = range(total_block_len - n, total_block_len) if isinstance(n, int) else n + for block_chunk in self.blocks: + for blk in block_chunk[i:]: # Passing the nn.Identity() + x = blk(x) + if i in blocks_to_take: + output.append(x) + i += 1 + assert len(output) == len(blocks_to_take), f"only {len(output)} / {len(blocks_to_take)} blocks found" + return output + + def get_intermediate_layers( + self, + x: torch.Tensor, + n: Union[int, Sequence] = 1, # Layers or n last layers to take + reshape: bool = False, + return_class_token: bool = False, + norm=True + ) -> Tuple[Union[torch.Tensor, Tuple[torch.Tensor]]]: + if self.chunked_blocks: + outputs = self._get_intermediate_layers_chunked(x, n) + else: + outputs = self._get_intermediate_layers_not_chunked(x, n) + if norm: + outputs = [self.norm(out) for out in outputs] + class_tokens = [out[:, 0] for out in outputs] + outputs = [out[:, 1 + self.num_register_tokens:] for out in outputs] + if reshape: + B, _, w, h = x.shape + outputs = [ + out.reshape(B, w // self.patch_size, h // self.patch_size, -1).permute(0, 3, 1, 2).contiguous() + for out in outputs + ] + if return_class_token: + return tuple(zip(outputs, class_tokens)) + return tuple(outputs) + + def forward(self, *args, is_training=False, **kwargs): + ret = self.forward_features(*args, **kwargs) + if is_training: + return ret + else: + return self.head(ret["x_norm_clstoken"]) + + +def init_weights_vit_timm(module: nn.Module, name: str = ""): + """ViT weight initialization, original timm impl (for reproducibility)""" + if isinstance(module, nn.Linear): + trunc_normal_(module.weight, std=0.02) + if module.bias is not None: + nn.init.zeros_(module.bias) + + +def vit_small(patch_size=16, num_register_tokens=0, **kwargs): + model = DinoVisionTransformer( + patch_size=patch_size, + embed_dim=384, + depth=12, + num_heads=6, + mlp_ratio=4, + block_fn=partial(Block, attn_class=MemEffAttention), + num_register_tokens=num_register_tokens, + **kwargs, + ) + return model + + +def vit_base(patch_size=16, num_register_tokens=0, **kwargs): + model = DinoVisionTransformer( + patch_size=patch_size, + embed_dim=768, + depth=12, + num_heads=12, + mlp_ratio=4, + block_fn=partial(Block, attn_class=MemEffAttention), + num_register_tokens=num_register_tokens, + **kwargs, + ) + return model + + +def vit_large(patch_size=16, num_register_tokens=0, **kwargs): + model = DinoVisionTransformer( + patch_size=patch_size, + embed_dim=1024, + depth=24, + num_heads=16, + mlp_ratio=4, + block_fn=partial(Block, attn_class=MemEffAttention), + num_register_tokens=num_register_tokens, + **kwargs, + ) + return model + + +def vit_giant2(patch_size=16, num_register_tokens=0, **kwargs): + """ + Close to ViT-giant, with embed-dim 1536 and 24 heads => embed-dim per head 64 + """ + model = DinoVisionTransformer( + patch_size=patch_size, + embed_dim=1536, + depth=40, + num_heads=24, + mlp_ratio=4, + block_fn=partial(Block, attn_class=MemEffAttention), + num_register_tokens=num_register_tokens, + **kwargs, + ) + return model + + +def DINOv2(model_name): + model_zoo = { + "vits": vit_small, + "vitb": vit_base, + "vitl": vit_large, + "vitg": vit_giant2 + } + + return model_zoo[model_name]( + img_size=518, + patch_size=14, + init_values=1.0, + ffn_layer="mlp" if model_name != "vitg" else "swiglufused", + block_chunks=0, + num_register_tokens=0, + interpolate_antialias=False, + interpolate_offset=0.1 + ) \ No newline at end of file diff --git a/ddepth_anything_v2/metric_depth/depth_anything_v2/dinov2_layers/__init__.py b/ddepth_anything_v2/metric_depth/depth_anything_v2/dinov2_layers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e59a83eb90512d763b03e4d38536b6ae07e87541 --- /dev/null +++ b/ddepth_anything_v2/metric_depth/depth_anything_v2/dinov2_layers/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +from .mlp import Mlp +from .patch_embed import PatchEmbed +from .swiglu_ffn import SwiGLUFFN, SwiGLUFFNFused +from .block import NestedTensorBlock +from .attention import MemEffAttention diff --git a/ddepth_anything_v2/metric_depth/depth_anything_v2/dinov2_layers/attention.py b/ddepth_anything_v2/metric_depth/depth_anything_v2/dinov2_layers/attention.py new file mode 100644 index 0000000000000000000000000000000000000000..dea0c82d55f052bf4bcb5896ad8c37158ef523d5 --- /dev/null +++ b/ddepth_anything_v2/metric_depth/depth_anything_v2/dinov2_layers/attention.py @@ -0,0 +1,83 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +# References: +# https://github.com/facebookresearch/dino/blob/master/vision_transformer.py +# https://github.com/rwightman/pytorch-image-models/tree/master/timm/models/vision_transformer.py + +import logging + +from torch import Tensor +from torch import nn + + +logger = logging.getLogger("dinov2") + + +try: + from xformers.ops import memory_efficient_attention, unbind, fmha + + XFORMERS_AVAILABLE = True +except ImportError: + logger.warning("xFormers not available") + XFORMERS_AVAILABLE = False + + +class Attention(nn.Module): + def __init__( + self, + dim: int, + num_heads: int = 8, + qkv_bias: bool = False, + proj_bias: bool = True, + attn_drop: float = 0.0, + proj_drop: float = 0.0, + ) -> None: + super().__init__() + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = head_dim**-0.5 + + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim, bias=proj_bias) + self.proj_drop = nn.Dropout(proj_drop) + + def forward(self, x: Tensor) -> Tensor: + B, N, C = x.shape + qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) + + q, k, v = qkv[0] * self.scale, qkv[1], qkv[2] + attn = q @ k.transpose(-2, -1) + + attn = attn.softmax(dim=-1) + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(B, N, C) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class MemEffAttention(Attention): + def forward(self, x: Tensor, attn_bias=None) -> Tensor: + if not XFORMERS_AVAILABLE: + assert attn_bias is None, "xFormers is required for nested tensors usage" + return super().forward(x) + + B, N, C = x.shape + qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads) + + q, k, v = unbind(qkv, 2) + + x = memory_efficient_attention(q, k, v, attn_bias=attn_bias) + x = x.reshape([B, N, C]) + + x = self.proj(x) + x = self.proj_drop(x) + return x + + \ No newline at end of file diff --git a/ddepth_anything_v2/metric_depth/depth_anything_v2/dinov2_layers/block.py b/ddepth_anything_v2/metric_depth/depth_anything_v2/dinov2_layers/block.py new file mode 100644 index 0000000000000000000000000000000000000000..f91f3f07bd15fba91c67068c8dce2bb22d505bf7 --- /dev/null +++ b/ddepth_anything_v2/metric_depth/depth_anything_v2/dinov2_layers/block.py @@ -0,0 +1,252 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +# References: +# https://github.com/facebookresearch/dino/blob/master/vision_transformer.py +# https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/patch_embed.py + +import logging +from typing import Callable, List, Any, Tuple, Dict + +import torch +from torch import nn, Tensor + +from .attention import Attention, MemEffAttention +from .drop_path import DropPath +from .layer_scale import LayerScale +from .mlp import Mlp + + +logger = logging.getLogger("dinov2") + + +try: + from xformers.ops import fmha + from xformers.ops import scaled_index_add, index_select_cat + + XFORMERS_AVAILABLE = True +except ImportError: + logger.warning("xFormers not available") + XFORMERS_AVAILABLE = False + + +class Block(nn.Module): + def __init__( + self, + dim: int, + num_heads: int, + mlp_ratio: float = 4.0, + qkv_bias: bool = False, + proj_bias: bool = True, + ffn_bias: bool = True, + drop: float = 0.0, + attn_drop: float = 0.0, + init_values=None, + drop_path: float = 0.0, + act_layer: Callable[..., nn.Module] = nn.GELU, + norm_layer: Callable[..., nn.Module] = nn.LayerNorm, + attn_class: Callable[..., nn.Module] = Attention, + ffn_layer: Callable[..., nn.Module] = Mlp, + ) -> None: + super().__init__() + # print(f"biases: qkv: {qkv_bias}, proj: {proj_bias}, ffn: {ffn_bias}") + self.norm1 = norm_layer(dim) + self.attn = attn_class( + dim, + num_heads=num_heads, + qkv_bias=qkv_bias, + proj_bias=proj_bias, + attn_drop=attn_drop, + proj_drop=drop, + ) + self.ls1 = LayerScale(dim, init_values=init_values) if init_values else nn.Identity() + self.drop_path1 = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = ffn_layer( + in_features=dim, + hidden_features=mlp_hidden_dim, + act_layer=act_layer, + drop=drop, + bias=ffn_bias, + ) + self.ls2 = LayerScale(dim, init_values=init_values) if init_values else nn.Identity() + self.drop_path2 = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + + self.sample_drop_ratio = drop_path + + def forward(self, x: Tensor) -> Tensor: + def attn_residual_func(x: Tensor) -> Tensor: + return self.ls1(self.attn(self.norm1(x))) + + def ffn_residual_func(x: Tensor) -> Tensor: + return self.ls2(self.mlp(self.norm2(x))) + + if self.training and self.sample_drop_ratio > 0.1: + # the overhead is compensated only for a drop path rate larger than 0.1 + x = drop_add_residual_stochastic_depth( + x, + residual_func=attn_residual_func, + sample_drop_ratio=self.sample_drop_ratio, + ) + x = drop_add_residual_stochastic_depth( + x, + residual_func=ffn_residual_func, + sample_drop_ratio=self.sample_drop_ratio, + ) + elif self.training and self.sample_drop_ratio > 0.0: + x = x + self.drop_path1(attn_residual_func(x)) + x = x + self.drop_path1(ffn_residual_func(x)) # FIXME: drop_path2 + else: + x = x + attn_residual_func(x) + x = x + ffn_residual_func(x) + return x + + +def drop_add_residual_stochastic_depth( + x: Tensor, + residual_func: Callable[[Tensor], Tensor], + sample_drop_ratio: float = 0.0, +) -> Tensor: + # 1) extract subset using permutation + b, n, d = x.shape + sample_subset_size = max(int(b * (1 - sample_drop_ratio)), 1) + brange = (torch.randperm(b, device=x.device))[:sample_subset_size] + x_subset = x[brange] + + # 2) apply residual_func to get residual + residual = residual_func(x_subset) + + x_flat = x.flatten(1) + residual = residual.flatten(1) + + residual_scale_factor = b / sample_subset_size + + # 3) add the residual + x_plus_residual = torch.index_add(x_flat, 0, brange, residual.to(dtype=x.dtype), alpha=residual_scale_factor) + return x_plus_residual.view_as(x) + + +def get_branges_scales(x, sample_drop_ratio=0.0): + b, n, d = x.shape + sample_subset_size = max(int(b * (1 - sample_drop_ratio)), 1) + brange = (torch.randperm(b, device=x.device))[:sample_subset_size] + residual_scale_factor = b / sample_subset_size + return brange, residual_scale_factor + + +def add_residual(x, brange, residual, residual_scale_factor, scaling_vector=None): + if scaling_vector is None: + x_flat = x.flatten(1) + residual = residual.flatten(1) + x_plus_residual = torch.index_add(x_flat, 0, brange, residual.to(dtype=x.dtype), alpha=residual_scale_factor) + else: + x_plus_residual = scaled_index_add( + x, brange, residual.to(dtype=x.dtype), scaling=scaling_vector, alpha=residual_scale_factor + ) + return x_plus_residual + + +attn_bias_cache: Dict[Tuple, Any] = {} + + +def get_attn_bias_and_cat(x_list, branges=None): + """ + this will perform the index select, cat the tensors, and provide the attn_bias from cache + """ + batch_sizes = [b.shape[0] for b in branges] if branges is not None else [x.shape[0] for x in x_list] + all_shapes = tuple((b, x.shape[1]) for b, x in zip(batch_sizes, x_list)) + if all_shapes not in attn_bias_cache.keys(): + seqlens = [] + for b, x in zip(batch_sizes, x_list): + for _ in range(b): + seqlens.append(x.shape[1]) + attn_bias = fmha.BlockDiagonalMask.from_seqlens(seqlens) + attn_bias._batch_sizes = batch_sizes + attn_bias_cache[all_shapes] = attn_bias + + if branges is not None: + cat_tensors = index_select_cat([x.flatten(1) for x in x_list], branges).view(1, -1, x_list[0].shape[-1]) + else: + tensors_bs1 = tuple(x.reshape([1, -1, *x.shape[2:]]) for x in x_list) + cat_tensors = torch.cat(tensors_bs1, dim=1) + + return attn_bias_cache[all_shapes], cat_tensors + + +def drop_add_residual_stochastic_depth_list( + x_list: List[Tensor], + residual_func: Callable[[Tensor, Any], Tensor], + sample_drop_ratio: float = 0.0, + scaling_vector=None, +) -> Tensor: + # 1) generate random set of indices for dropping samples in the batch + branges_scales = [get_branges_scales(x, sample_drop_ratio=sample_drop_ratio) for x in x_list] + branges = [s[0] for s in branges_scales] + residual_scale_factors = [s[1] for s in branges_scales] + + # 2) get attention bias and index+concat the tensors + attn_bias, x_cat = get_attn_bias_and_cat(x_list, branges) + + # 3) apply residual_func to get residual, and split the result + residual_list = attn_bias.split(residual_func(x_cat, attn_bias=attn_bias)) # type: ignore + + outputs = [] + for x, brange, residual, residual_scale_factor in zip(x_list, branges, residual_list, residual_scale_factors): + outputs.append(add_residual(x, brange, residual, residual_scale_factor, scaling_vector).view_as(x)) + return outputs + + +class NestedTensorBlock(Block): + def forward_nested(self, x_list: List[Tensor]) -> List[Tensor]: + """ + x_list contains a list of tensors to nest together and run + """ + assert isinstance(self.attn, MemEffAttention) + + if self.training and self.sample_drop_ratio > 0.0: + + def attn_residual_func(x: Tensor, attn_bias=None) -> Tensor: + return self.attn(self.norm1(x), attn_bias=attn_bias) + + def ffn_residual_func(x: Tensor, attn_bias=None) -> Tensor: + return self.mlp(self.norm2(x)) + + x_list = drop_add_residual_stochastic_depth_list( + x_list, + residual_func=attn_residual_func, + sample_drop_ratio=self.sample_drop_ratio, + scaling_vector=self.ls1.gamma if isinstance(self.ls1, LayerScale) else None, + ) + x_list = drop_add_residual_stochastic_depth_list( + x_list, + residual_func=ffn_residual_func, + sample_drop_ratio=self.sample_drop_ratio, + scaling_vector=self.ls2.gamma if isinstance(self.ls1, LayerScale) else None, + ) + return x_list + else: + + def attn_residual_func(x: Tensor, attn_bias=None) -> Tensor: + return self.ls1(self.attn(self.norm1(x), attn_bias=attn_bias)) + + def ffn_residual_func(x: Tensor, attn_bias=None) -> Tensor: + return self.ls2(self.mlp(self.norm2(x))) + + attn_bias, x = get_attn_bias_and_cat(x_list) + x = x + attn_residual_func(x, attn_bias=attn_bias) + x = x + ffn_residual_func(x) + return attn_bias.split(x) + + def forward(self, x_or_x_list): + if isinstance(x_or_x_list, Tensor): + return super().forward(x_or_x_list) + elif isinstance(x_or_x_list, list): + assert XFORMERS_AVAILABLE, "Please install xFormers for nested tensors usage" + return self.forward_nested(x_or_x_list) + else: + raise AssertionError diff --git a/ddepth_anything_v2/metric_depth/depth_anything_v2/dinov2_layers/drop_path.py b/ddepth_anything_v2/metric_depth/depth_anything_v2/dinov2_layers/drop_path.py new file mode 100644 index 0000000000000000000000000000000000000000..10c3bea8e40eec258bbe59087770d230a6375481 --- /dev/null +++ b/ddepth_anything_v2/metric_depth/depth_anything_v2/dinov2_layers/drop_path.py @@ -0,0 +1,35 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +# References: +# https://github.com/facebookresearch/dino/blob/master/vision_transformer.py +# https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/drop.py + + +from torch import nn + + +def drop_path(x, drop_prob: float = 0.0, training: bool = False): + if drop_prob == 0.0 or not training: + return x + keep_prob = 1 - drop_prob + shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets + random_tensor = x.new_empty(shape).bernoulli_(keep_prob) + if keep_prob > 0.0: + random_tensor.div_(keep_prob) + output = x * random_tensor + return output + + +class DropPath(nn.Module): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).""" + + def __init__(self, drop_prob=None): + super(DropPath, self).__init__() + self.drop_prob = drop_prob + + def forward(self, x): + return drop_path(x, self.drop_prob, self.training) diff --git a/ddepth_anything_v2/metric_depth/depth_anything_v2/dinov2_layers/layer_scale.py b/ddepth_anything_v2/metric_depth/depth_anything_v2/dinov2_layers/layer_scale.py new file mode 100644 index 0000000000000000000000000000000000000000..76a4d0eedb1dc974a45e06fbe77ff3d909e36e55 --- /dev/null +++ b/ddepth_anything_v2/metric_depth/depth_anything_v2/dinov2_layers/layer_scale.py @@ -0,0 +1,28 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +# Modified from: https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/vision_transformer.py#L103-L110 + +from typing import Union + +import torch +from torch import Tensor +from torch import nn + + +class LayerScale(nn.Module): + def __init__( + self, + dim: int, + init_values: Union[float, Tensor] = 1e-5, + inplace: bool = False, + ) -> None: + super().__init__() + self.inplace = inplace + self.gamma = nn.Parameter(init_values * torch.ones(dim)) + + def forward(self, x: Tensor) -> Tensor: + return x.mul_(self.gamma) if self.inplace else x * self.gamma diff --git a/ddepth_anything_v2/metric_depth/depth_anything_v2/dinov2_layers/mlp.py b/ddepth_anything_v2/metric_depth/depth_anything_v2/dinov2_layers/mlp.py new file mode 100644 index 0000000000000000000000000000000000000000..504987b635c9cd582a352fb2381228c9e6cd043c --- /dev/null +++ b/ddepth_anything_v2/metric_depth/depth_anything_v2/dinov2_layers/mlp.py @@ -0,0 +1,41 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +# References: +# https://github.com/facebookresearch/dino/blob/master/vision_transformer.py +# https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/mlp.py + + +from typing import Callable, Optional + +from torch import Tensor, nn + + +class Mlp(nn.Module): + def __init__( + self, + in_features: int, + hidden_features: Optional[int] = None, + out_features: Optional[int] = None, + act_layer: Callable[..., nn.Module] = nn.GELU, + drop: float = 0.0, + bias: bool = True, + ) -> None: + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features, bias=bias) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features, bias=bias) + self.drop = nn.Dropout(drop) + + def forward(self, x: Tensor) -> Tensor: + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x diff --git a/ddepth_anything_v2/metric_depth/depth_anything_v2/dinov2_layers/patch_embed.py b/ddepth_anything_v2/metric_depth/depth_anything_v2/dinov2_layers/patch_embed.py new file mode 100644 index 0000000000000000000000000000000000000000..f880c042ee6a33ef520c6a8c8a686c1d065b8f49 --- /dev/null +++ b/ddepth_anything_v2/metric_depth/depth_anything_v2/dinov2_layers/patch_embed.py @@ -0,0 +1,89 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +# References: +# https://github.com/facebookresearch/dino/blob/master/vision_transformer.py +# https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/patch_embed.py + +from typing import Callable, Optional, Tuple, Union + +from torch import Tensor +import torch.nn as nn + + +def make_2tuple(x): + if isinstance(x, tuple): + assert len(x) == 2 + return x + + assert isinstance(x, int) + return (x, x) + + +class PatchEmbed(nn.Module): + """ + 2D image to patch embedding: (B,C,H,W) -> (B,N,D) + + Args: + img_size: Image size. + patch_size: Patch token size. + in_chans: Number of input image channels. + embed_dim: Number of linear projection output channels. + norm_layer: Normalization layer. + """ + + def __init__( + self, + img_size: Union[int, Tuple[int, int]] = 224, + patch_size: Union[int, Tuple[int, int]] = 16, + in_chans: int = 3, + embed_dim: int = 768, + norm_layer: Optional[Callable] = None, + flatten_embedding: bool = True, + ) -> None: + super().__init__() + + image_HW = make_2tuple(img_size) + patch_HW = make_2tuple(patch_size) + patch_grid_size = ( + image_HW[0] // patch_HW[0], + image_HW[1] // patch_HW[1], + ) + + self.img_size = image_HW + self.patch_size = patch_HW + self.patches_resolution = patch_grid_size + self.num_patches = patch_grid_size[0] * patch_grid_size[1] + + self.in_chans = in_chans + self.embed_dim = embed_dim + + self.flatten_embedding = flatten_embedding + + self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_HW, stride=patch_HW) + self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity() + + def forward(self, x: Tensor) -> Tensor: + _, _, H, W = x.shape + patch_H, patch_W = self.patch_size + + assert H % patch_H == 0, f"Input image height {H} is not a multiple of patch height {patch_H}" + assert W % patch_W == 0, f"Input image width {W} is not a multiple of patch width: {patch_W}" + + x = self.proj(x) # B C H W + H, W = x.size(2), x.size(3) + x = x.flatten(2).transpose(1, 2) # B HW C + x = self.norm(x) + if not self.flatten_embedding: + x = x.reshape(-1, H, W, self.embed_dim) # B H W C + return x + + def flops(self) -> float: + Ho, Wo = self.patches_resolution + flops = Ho * Wo * self.embed_dim * self.in_chans * (self.patch_size[0] * self.patch_size[1]) + if self.norm is not None: + flops += Ho * Wo * self.embed_dim + return flops diff --git a/ddepth_anything_v2/metric_depth/depth_anything_v2/dinov2_layers/swiglu_ffn.py b/ddepth_anything_v2/metric_depth/depth_anything_v2/dinov2_layers/swiglu_ffn.py new file mode 100644 index 0000000000000000000000000000000000000000..155a3dd9f6f1a7d0f7bdf9c8f1981e58acb3b19c --- /dev/null +++ b/ddepth_anything_v2/metric_depth/depth_anything_v2/dinov2_layers/swiglu_ffn.py @@ -0,0 +1,63 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Callable, Optional + +from torch import Tensor, nn +import torch.nn.functional as F + + +class SwiGLUFFN(nn.Module): + def __init__( + self, + in_features: int, + hidden_features: Optional[int] = None, + out_features: Optional[int] = None, + act_layer: Callable[..., nn.Module] = None, + drop: float = 0.0, + bias: bool = True, + ) -> None: + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.w12 = nn.Linear(in_features, 2 * hidden_features, bias=bias) + self.w3 = nn.Linear(hidden_features, out_features, bias=bias) + + def forward(self, x: Tensor) -> Tensor: + x12 = self.w12(x) + x1, x2 = x12.chunk(2, dim=-1) + hidden = F.silu(x1) * x2 + return self.w3(hidden) + + +try: + from xformers.ops import SwiGLU + + XFORMERS_AVAILABLE = True +except ImportError: + SwiGLU = SwiGLUFFN + XFORMERS_AVAILABLE = False + + +class SwiGLUFFNFused(SwiGLU): + def __init__( + self, + in_features: int, + hidden_features: Optional[int] = None, + out_features: Optional[int] = None, + act_layer: Callable[..., nn.Module] = None, + drop: float = 0.0, + bias: bool = True, + ) -> None: + out_features = out_features or in_features + hidden_features = hidden_features or in_features + hidden_features = (int(hidden_features * 2 / 3) + 7) // 8 * 8 + super().__init__( + in_features=in_features, + hidden_features=hidden_features, + out_features=out_features, + bias=bias, + ) diff --git a/ddepth_anything_v2/metric_depth/depth_anything_v2/dpt.py b/ddepth_anything_v2/metric_depth/depth_anything_v2/dpt.py new file mode 100644 index 0000000000000000000000000000000000000000..69e57cc78cbc3297938ac4a49f9cf6a3b04d8eff --- /dev/null +++ b/ddepth_anything_v2/metric_depth/depth_anything_v2/dpt.py @@ -0,0 +1,222 @@ +import cv2 +import torch +import torch.nn as nn +import torch.nn.functional as F +from torchvision.transforms import Compose + +from .dinov2 import DINOv2 +from .util.blocks import FeatureFusionBlock, _make_scratch +from .util.transform import Resize, NormalizeImage, PrepareForNet + + +def _make_fusion_block(features, use_bn, size=None): + return FeatureFusionBlock( + features, + nn.ReLU(False), + deconv=False, + bn=use_bn, + expand=False, + align_corners=True, + size=size, + ) + + +class ConvBlock(nn.Module): + def __init__(self, in_feature, out_feature): + super().__init__() + + self.conv_block = nn.Sequential( + nn.Conv2d(in_feature, out_feature, kernel_size=3, stride=1, padding=1), + nn.BatchNorm2d(out_feature), + nn.ReLU(True) + ) + + def forward(self, x): + return self.conv_block(x) + + +class DPTHead(nn.Module): + def __init__( + self, + in_channels, + features=256, + use_bn=False, + out_channels=[256, 512, 1024, 1024], + use_clstoken=False + ): + super(DPTHead, self).__init__() + + self.use_clstoken = use_clstoken + + self.projects = nn.ModuleList([ + nn.Conv2d( + in_channels=in_channels, + out_channels=out_channel, + kernel_size=1, + stride=1, + padding=0, + ) for out_channel in out_channels + ]) + + self.resize_layers = nn.ModuleList([ + nn.ConvTranspose2d( + in_channels=out_channels[0], + out_channels=out_channels[0], + kernel_size=4, + stride=4, + padding=0), + nn.ConvTranspose2d( + in_channels=out_channels[1], + out_channels=out_channels[1], + kernel_size=2, + stride=2, + padding=0), + nn.Identity(), + nn.Conv2d( + in_channels=out_channels[3], + out_channels=out_channels[3], + kernel_size=3, + stride=2, + padding=1) + ]) + + if use_clstoken: + self.readout_projects = nn.ModuleList() + for _ in range(len(self.projects)): + self.readout_projects.append( + nn.Sequential( + nn.Linear(2 * in_channels, in_channels), + nn.GELU())) + + self.scratch = _make_scratch( + out_channels, + features, + groups=1, + expand=False, + ) + + self.scratch.stem_transpose = None + + self.scratch.refinenet1 = _make_fusion_block(features, use_bn) + self.scratch.refinenet2 = _make_fusion_block(features, use_bn) + self.scratch.refinenet3 = _make_fusion_block(features, use_bn) + self.scratch.refinenet4 = _make_fusion_block(features, use_bn) + + head_features_1 = features + head_features_2 = 32 + + self.scratch.output_conv1 = nn.Conv2d(head_features_1, head_features_1 // 2, kernel_size=3, stride=1, padding=1) + self.scratch.output_conv2 = nn.Sequential( + nn.Conv2d(head_features_1 // 2, head_features_2, kernel_size=3, stride=1, padding=1), + nn.ReLU(True), + nn.Conv2d(head_features_2, 1, kernel_size=1, stride=1, padding=0), + nn.Sigmoid() + ) + + def forward(self, out_features, patch_h, patch_w): + out = [] + for i, x in enumerate(out_features): + if self.use_clstoken: + x, cls_token = x[0], x[1] + readout = cls_token.unsqueeze(1).expand_as(x) + x = self.readout_projects[i](torch.cat((x, readout), -1)) + else: + x = x[0] + + x = x.permute(0, 2, 1).reshape((x.shape[0], x.shape[-1], patch_h, patch_w)) + + x = self.projects[i](x) + x = self.resize_layers[i](x) + + out.append(x) + + layer_1, layer_2, layer_3, layer_4 = out + + layer_1_rn = self.scratch.layer1_rn(layer_1) + layer_2_rn = self.scratch.layer2_rn(layer_2) + layer_3_rn = self.scratch.layer3_rn(layer_3) + layer_4_rn = self.scratch.layer4_rn(layer_4) + + path_4 = self.scratch.refinenet4(layer_4_rn, size=layer_3_rn.shape[2:]) + path_3 = self.scratch.refinenet3(path_4, layer_3_rn, size=layer_2_rn.shape[2:]) + path_2 = self.scratch.refinenet2(path_3, layer_2_rn, size=layer_1_rn.shape[2:]) + path_1 = self.scratch.refinenet1(path_2, layer_1_rn) + + out = self.scratch.output_conv1(path_1) + out = F.interpolate(out, (int(patch_h * 14), int(patch_w * 14)), mode="bilinear", align_corners=True) + out = self.scratch.output_conv2(out) + + return out + + +class DepthAnythingV2(nn.Module): + def __init__( + self, + encoder='vitl', + features=256, + out_channels=[256, 512, 1024, 1024], + use_bn=False, + use_clstoken=False, + max_depth=20.0 + ): + super(DepthAnythingV2, self).__init__() + + self.intermediate_layer_idx = { + 'vits': [2, 5, 8, 11], + 'vitb': [2, 5, 8, 11], + 'vitl': [4, 11, 17, 23], + 'vitg': [9, 19, 29, 39] + } + + self.max_depth = max_depth + + self.encoder = encoder + self.pretrained = DINOv2(model_name=encoder) + + self.depth_head = DPTHead(self.pretrained.embed_dim, features, use_bn, out_channels=out_channels, use_clstoken=use_clstoken) + + def forward(self, x): + patch_h, patch_w = x.shape[-2] // 14, x.shape[-1] // 14 + + features = self.pretrained.get_intermediate_layers(x, self.intermediate_layer_idx[self.encoder], return_class_token=True) + + depth = self.depth_head(features, patch_h, patch_w) * self.max_depth + + return depth.squeeze(1) + + @torch.no_grad() + def infer_image(self, raw_image, input_size=518): + image, (h, w) = self.image2tensor(raw_image, input_size) + + depth = self.forward(image) + + depth = F.interpolate(depth[:, None], (h, w), mode="bilinear", align_corners=True)[0, 0] + + return depth.cpu().numpy() + + def image2tensor(self, raw_image, input_size=518): + transform = Compose([ + Resize( + width=input_size, + height=input_size, + resize_target=False, + keep_aspect_ratio=True, + ensure_multiple_of=14, + resize_method='lower_bound', + image_interpolation_method=cv2.INTER_CUBIC, + ), + NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + PrepareForNet(), + ]) + + h, w = raw_image.shape[:2] + + image = cv2.cvtColor(raw_image, cv2.COLOR_BGR2RGB) / 255.0 + + image = transform({'image': image})['image'] + image = torch.from_numpy(image).unsqueeze(0) + + DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu' + image = image.to(DEVICE) + + return image, (h, w) diff --git a/ddepth_anything_v2/metric_depth/depth_anything_v2/util/blocks.py b/ddepth_anything_v2/metric_depth/depth_anything_v2/util/blocks.py new file mode 100644 index 0000000000000000000000000000000000000000..9fb66c03702d653f411c59ab9966916c348c7c6e --- /dev/null +++ b/ddepth_anything_v2/metric_depth/depth_anything_v2/util/blocks.py @@ -0,0 +1,148 @@ +import torch.nn as nn + + +def _make_scratch(in_shape, out_shape, groups=1, expand=False): + scratch = nn.Module() + + out_shape1 = out_shape + out_shape2 = out_shape + out_shape3 = out_shape + if len(in_shape) >= 4: + out_shape4 = out_shape + + if expand: + out_shape1 = out_shape + out_shape2 = out_shape * 2 + out_shape3 = out_shape * 4 + if len(in_shape) >= 4: + out_shape4 = out_shape * 8 + + scratch.layer1_rn = nn.Conv2d(in_shape[0], out_shape1, kernel_size=3, stride=1, padding=1, bias=False, groups=groups) + scratch.layer2_rn = nn.Conv2d(in_shape[1], out_shape2, kernel_size=3, stride=1, padding=1, bias=False, groups=groups) + scratch.layer3_rn = nn.Conv2d(in_shape[2], out_shape3, kernel_size=3, stride=1, padding=1, bias=False, groups=groups) + if len(in_shape) >= 4: + scratch.layer4_rn = nn.Conv2d(in_shape[3], out_shape4, kernel_size=3, stride=1, padding=1, bias=False, groups=groups) + + return scratch + + +class ResidualConvUnit(nn.Module): + """Residual convolution module. + """ + + def __init__(self, features, activation, bn): + """Init. + + Args: + features (int): number of features + """ + super().__init__() + + self.bn = bn + + self.groups=1 + + self.conv1 = nn.Conv2d(features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups) + + self.conv2 = nn.Conv2d(features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups) + + if self.bn == True: + self.bn1 = nn.BatchNorm2d(features) + self.bn2 = nn.BatchNorm2d(features) + + self.activation = activation + + self.skip_add = nn.quantized.FloatFunctional() + + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input + + Returns: + tensor: output + """ + + out = self.activation(x) + out = self.conv1(out) + if self.bn == True: + out = self.bn1(out) + + out = self.activation(out) + out = self.conv2(out) + if self.bn == True: + out = self.bn2(out) + + if self.groups > 1: + out = self.conv_merge(out) + + return self.skip_add.add(out, x) + + +class FeatureFusionBlock(nn.Module): + """Feature fusion block. + """ + + def __init__( + self, + features, + activation, + deconv=False, + bn=False, + expand=False, + align_corners=True, + size=None + ): + """Init. + + Args: + features (int): number of features + """ + super(FeatureFusionBlock, self).__init__() + + self.deconv = deconv + self.align_corners = align_corners + + self.groups=1 + + self.expand = expand + out_features = features + if self.expand == True: + out_features = features // 2 + + self.out_conv = nn.Conv2d(features, out_features, kernel_size=1, stride=1, padding=0, bias=True, groups=1) + + self.resConfUnit1 = ResidualConvUnit(features, activation, bn) + self.resConfUnit2 = ResidualConvUnit(features, activation, bn) + + self.skip_add = nn.quantized.FloatFunctional() + + self.size=size + + def forward(self, *xs, size=None): + """Forward pass. + + Returns: + tensor: output + """ + output = xs[0] + + if len(xs) == 2: + res = self.resConfUnit1(xs[1]) + output = self.skip_add.add(output, res) + + output = self.resConfUnit2(output) + + if (size is None) and (self.size is None): + modifier = {"scale_factor": 2} + elif size is None: + modifier = {"size": self.size} + else: + modifier = {"size": size} + + output = nn.functional.interpolate(output, **modifier, mode="bilinear", align_corners=self.align_corners) + + output = self.out_conv(output) + + return output diff --git a/ddepth_anything_v2/metric_depth/depth_anything_v2/util/transform.py b/ddepth_anything_v2/metric_depth/depth_anything_v2/util/transform.py new file mode 100644 index 0000000000000000000000000000000000000000..1cce234c86177e1ad5c84c81c7c1afb16877c9da --- /dev/null +++ b/ddepth_anything_v2/metric_depth/depth_anything_v2/util/transform.py @@ -0,0 +1,158 @@ +import numpy as np +import cv2 + + +class Resize(object): + """Resize sample to given size (width, height). + """ + + def __init__( + self, + width, + height, + resize_target=True, + keep_aspect_ratio=False, + ensure_multiple_of=1, + resize_method="lower_bound", + image_interpolation_method=cv2.INTER_AREA, + ): + """Init. + + Args: + width (int): desired output width + height (int): desired output height + resize_target (bool, optional): + True: Resize the full sample (image, mask, target). + False: Resize image only. + Defaults to True. + keep_aspect_ratio (bool, optional): + True: Keep the aspect ratio of the input sample. + Output sample might not have the given width and height, and + resize behaviour depends on the parameter 'resize_method'. + Defaults to False. + ensure_multiple_of (int, optional): + Output width and height is constrained to be multiple of this parameter. + Defaults to 1. + resize_method (str, optional): + "lower_bound": Output will be at least as large as the given size. + "upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.) + "minimal": Scale as least as possible. (Output size might be smaller than given size.) + Defaults to "lower_bound". + """ + self.__width = width + self.__height = height + + self.__resize_target = resize_target + self.__keep_aspect_ratio = keep_aspect_ratio + self.__multiple_of = ensure_multiple_of + self.__resize_method = resize_method + self.__image_interpolation_method = image_interpolation_method + + def constrain_to_multiple_of(self, x, min_val=0, max_val=None): + y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int) + + if max_val is not None and y > max_val: + y = (np.floor(x / self.__multiple_of) * self.__multiple_of).astype(int) + + if y < min_val: + y = (np.ceil(x / self.__multiple_of) * self.__multiple_of).astype(int) + + return y + + def get_size(self, width, height): + # determine new height and width + scale_height = self.__height / height + scale_width = self.__width / width + + if self.__keep_aspect_ratio: + if self.__resize_method == "lower_bound": + # scale such that output size is lower bound + if scale_width > scale_height: + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + elif self.__resize_method == "upper_bound": + # scale such that output size is upper bound + if scale_width < scale_height: + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + elif self.__resize_method == "minimal": + # scale as least as possbile + if abs(1 - scale_width) < abs(1 - scale_height): + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + else: + raise ValueError(f"resize_method {self.__resize_method} not implemented") + + if self.__resize_method == "lower_bound": + new_height = self.constrain_to_multiple_of(scale_height * height, min_val=self.__height) + new_width = self.constrain_to_multiple_of(scale_width * width, min_val=self.__width) + elif self.__resize_method == "upper_bound": + new_height = self.constrain_to_multiple_of(scale_height * height, max_val=self.__height) + new_width = self.constrain_to_multiple_of(scale_width * width, max_val=self.__width) + elif self.__resize_method == "minimal": + new_height = self.constrain_to_multiple_of(scale_height * height) + new_width = self.constrain_to_multiple_of(scale_width * width) + else: + raise ValueError(f"resize_method {self.__resize_method} not implemented") + + return (new_width, new_height) + + def __call__(self, sample): + width, height = self.get_size(sample["image"].shape[1], sample["image"].shape[0]) + + # resize sample + sample["image"] = cv2.resize(sample["image"], (width, height), interpolation=self.__image_interpolation_method) + + if self.__resize_target: + if "depth" in sample: + sample["depth"] = cv2.resize(sample["depth"], (width, height), interpolation=cv2.INTER_NEAREST) + + if "mask" in sample: + sample["mask"] = cv2.resize(sample["mask"].astype(np.float32), (width, height), interpolation=cv2.INTER_NEAREST) + + return sample + + +class NormalizeImage(object): + """Normlize image by given mean and std. + """ + + def __init__(self, mean, std): + self.__mean = mean + self.__std = std + + def __call__(self, sample): + sample["image"] = (sample["image"] - self.__mean) / self.__std + + return sample + + +class PrepareForNet(object): + """Prepare sample for usage as network input. + """ + + def __init__(self): + pass + + def __call__(self, sample): + image = np.transpose(sample["image"], (2, 0, 1)) + sample["image"] = np.ascontiguousarray(image).astype(np.float32) + + if "depth" in sample: + depth = sample["depth"].astype(np.float32) + sample["depth"] = np.ascontiguousarray(depth) + + if "mask" in sample: + sample["mask"] = sample["mask"].astype(np.float32) + sample["mask"] = np.ascontiguousarray(sample["mask"]) + + return sample \ No newline at end of file diff --git a/ddepth_anything_v2/metric_depth/depth_to_pointcloud.py b/ddepth_anything_v2/metric_depth/depth_to_pointcloud.py new file mode 100644 index 0000000000000000000000000000000000000000..d341bc2b545a63d57ab612be14675d373e90e098 --- /dev/null +++ b/ddepth_anything_v2/metric_depth/depth_to_pointcloud.py @@ -0,0 +1,114 @@ +""" +Born out of Depth Anything V1 Issue 36 +Make sure you have the necessary libraries installed. +Code by @1ssb + +This script processes a set of images to generate depth maps and corresponding point clouds. +The resulting point clouds are saved in the specified output directory. + +Usage: + python script.py --encoder vitl --load-from path_to_model --max-depth 20 --img-path path_to_images --outdir output_directory --focal-length-x 470.4 --focal-length-y 470.4 + +Arguments: + --encoder: Model encoder to use. Choices are ['vits', 'vitb', 'vitl', 'vitg']. + --load-from: Path to the pre-trained model weights. + --max-depth: Maximum depth value for the depth map. + --img-path: Path to the input image or directory containing images. + --outdir: Directory to save the output point clouds. + --focal-length-x: Focal length along the x-axis. + --focal-length-y: Focal length along the y-axis. +""" + +import argparse +import cv2 +import glob +import numpy as np +import open3d as o3d +import os +from PIL import Image +import torch + +from depth_anything_v2.dpt import DepthAnythingV2 + + +def main(): + # Parse command-line arguments + parser = argparse.ArgumentParser(description='Generate depth maps and point clouds from images.') + parser.add_argument('--encoder', default='vitl', type=str, choices=['vits', 'vitb', 'vitl', 'vitg'], + help='Model encoder to use.') + parser.add_argument('--load-from', default='', type=str, required=True, + help='Path to the pre-trained model weights.') + parser.add_argument('--max-depth', default=20, type=float, + help='Maximum depth value for the depth map.') + parser.add_argument('--img-path', type=str, required=True, + help='Path to the input image or directory containing images.') + parser.add_argument('--outdir', type=str, default='./vis_pointcloud', + help='Directory to save the output point clouds.') + parser.add_argument('--focal-length-x', default=470.4, type=float, + help='Focal length along the x-axis.') + parser.add_argument('--focal-length-y', default=470.4, type=float, + help='Focal length along the y-axis.') + + args = parser.parse_args() + + # Determine the device to use (CUDA, MPS, or CPU) + DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu' + + # Model configuration based on the chosen encoder + model_configs = { + 'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]}, + 'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]}, + 'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]}, + 'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]} + } + + # Initialize the DepthAnythingV2 model with the specified configuration + depth_anything = DepthAnythingV2(**{**model_configs[args.encoder], 'max_depth': args.max_depth}) + depth_anything.load_state_dict(torch.load(args.load_from, map_location='cpu')) + depth_anything = depth_anything.to(DEVICE).eval() + + # Get the list of image files to process + if os.path.isfile(args.img_path): + if args.img_path.endswith('txt'): + with open(args.img_path, 'r') as f: + filenames = f.read().splitlines() + else: + filenames = [args.img_path] + else: + filenames = glob.glob(os.path.join(args.img_path, '**/*'), recursive=True) + + # Create the output directory if it doesn't exist + os.makedirs(args.outdir, exist_ok=True) + + # Process each image file + for k, filename in enumerate(filenames): + print(f'Processing {k+1}/{len(filenames)}: {filename}') + + # Load the image + color_image = Image.open(filename).convert('RGB') + width, height = color_image.size + + # Read the image using OpenCV + image = cv2.imread(filename) + pred = depth_anything.infer_image(image, height) + + # Resize depth prediction to match the original image size + resized_pred = Image.fromarray(pred).resize((width, height), Image.NEAREST) + + # Generate mesh grid and calculate point cloud coordinates + x, y = np.meshgrid(np.arange(width), np.arange(height)) + x = (x - width / 2) / args.focal_length_x + y = (y - height / 2) / args.focal_length_y + z = np.array(resized_pred) + points = np.stack((np.multiply(x, z), np.multiply(y, z), z), axis=-1).reshape(-1, 3) + colors = np.array(color_image).reshape(-1, 3) / 255.0 + + # Create the point cloud and save it to the output directory + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(points) + pcd.colors = o3d.utility.Vector3dVector(colors) + o3d.io.write_point_cloud(os.path.join(args.outdir, os.path.splitext(os.path.basename(filename))[0] + ".ply"), pcd) + + +if __name__ == '__main__': + main() diff --git a/ddepth_anything_v2/metric_depth/dist_train.sh b/ddepth_anything_v2/metric_depth/dist_train.sh new file mode 100644 index 0000000000000000000000000000000000000000..188606857ac223d9f5b76f07763067b014268113 --- /dev/null +++ b/ddepth_anything_v2/metric_depth/dist_train.sh @@ -0,0 +1,26 @@ +#!/bin/bash +now=$(date +"%Y%m%d_%H%M%S") + +epoch=120 +bs=4 +gpus=8 +lr=0.000005 +encoder=vitl +dataset=hypersim # vkitti +img_size=518 +min_depth=0.001 +max_depth=20 # 80 for virtual kitti +pretrained_from=../checkpoints/depth_anything_v2_${encoder}.pth +save_path=exp/hypersim # exp/vkitti + +mkdir -p $save_path + +python3 -m torch.distributed.launch \ + --nproc_per_node=$gpus \ + --nnodes 1 \ + --node_rank=0 \ + --master_addr=localhost \ + --master_port=20596 \ + train.py --epoch $epoch --encoder $encoder --bs $bs --lr $lr --save-path $save_path --dataset $dataset \ + --img-size $img_size --min-depth $min_depth --max-depth $max_depth --pretrained-from $pretrained_from \ + --port 20596 2>&1 | tee -a $save_path/$now.log diff --git a/ddepth_anything_v2/metric_depth/requirements.txt b/ddepth_anything_v2/metric_depth/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..6079c7177838b434aeddabbdd7198622c2c031d1 --- /dev/null +++ b/ddepth_anything_v2/metric_depth/requirements.txt @@ -0,0 +1,5 @@ +matplotlib +opencv-python +open3d +torch +torchvision diff --git a/ddepth_anything_v2/metric_depth/run.py b/ddepth_anything_v2/metric_depth/run.py new file mode 100644 index 0000000000000000000000000000000000000000..dcf322d1817477b4b546921b9f53e491c6290b7d --- /dev/null +++ b/ddepth_anything_v2/metric_depth/run.py @@ -0,0 +1,81 @@ +import argparse +import cv2 +import glob +import matplotlib +import numpy as np +import os +import torch + +from depth_anything_v2.dpt import DepthAnythingV2 + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Depth Anything V2 Metric Depth Estimation') + + parser.add_argument('--img-path', type=str) + parser.add_argument('--input-size', type=int, default=518) + parser.add_argument('--outdir', type=str, default='./vis_depth') + + parser.add_argument('--encoder', type=str, default='vitl', choices=['vits', 'vitb', 'vitl', 'vitg']) + parser.add_argument('--load-from', type=str, default='checkpoints/depth_anything_v2_metric_hypersim_vitl.pth') + parser.add_argument('--max-depth', type=float, default=20) + + parser.add_argument('--save-numpy', dest='save_numpy', action='store_true', help='save the model raw output') + parser.add_argument('--pred-only', dest='pred_only', action='store_true', help='only display the prediction') + parser.add_argument('--grayscale', dest='grayscale', action='store_true', help='do not apply colorful palette') + + args = parser.parse_args() + + DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu' + + model_configs = { + 'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]}, + 'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]}, + 'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]}, + 'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]} + } + + depth_anything = DepthAnythingV2(**{**model_configs[args.encoder], 'max_depth': args.max_depth}) + depth_anything.load_state_dict(torch.load(args.load_from, map_location='cpu')) + depth_anything = depth_anything.to(DEVICE).eval() + + if os.path.isfile(args.img_path): + if args.img_path.endswith('txt'): + with open(args.img_path, 'r') as f: + filenames = f.read().splitlines() + else: + filenames = [args.img_path] + else: + filenames = glob.glob(os.path.join(args.img_path, '**/*'), recursive=True) + + os.makedirs(args.outdir, exist_ok=True) + + cmap = matplotlib.colormaps.get_cmap('Spectral') + + for k, filename in enumerate(filenames): + print(f'Progress {k+1}/{len(filenames)}: {filename}') + + raw_image = cv2.imread(filename) + + depth = depth_anything.infer_image(raw_image, args.input_size) + + if args.save_numpy: + output_path = os.path.join(args.outdir, os.path.splitext(os.path.basename(filename))[0] + '_raw_depth_meter.npy') + np.save(output_path, depth) + + depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0 + depth = depth.astype(np.uint8) + + if args.grayscale: + depth = np.repeat(depth[..., np.newaxis], 3, axis=-1) + else: + depth = (cmap(depth)[:, :, :3] * 255)[:, :, ::-1].astype(np.uint8) + + output_path = os.path.join(args.outdir, os.path.splitext(os.path.basename(filename))[0] + '.png') + if args.pred_only: + cv2.imwrite(output_path, depth) + else: + split_region = np.ones((raw_image.shape[0], 50, 3), dtype=np.uint8) * 255 + combined_result = cv2.hconcat([raw_image, split_region, depth]) + + cv2.imwrite(output_path, combined_result) \ No newline at end of file diff --git a/ddepth_anything_v2/metric_depth/train.py b/ddepth_anything_v2/metric_depth/train.py new file mode 100644 index 0000000000000000000000000000000000000000..ef45f16c0f8d2ee131c2b734374bf29b75af8132 --- /dev/null +++ b/ddepth_anything_v2/metric_depth/train.py @@ -0,0 +1,212 @@ +import argparse +import logging +import os +import pprint +import random + +import warnings +import numpy as np +import torch +import torch.backends.cudnn as cudnn +import torch.distributed as dist +from torch.utils.data import DataLoader +from torch.optim import AdamW +import torch.nn.functional as F +from torch.utils.tensorboard import SummaryWriter + +from dataset.hypersim import Hypersim +from dataset.kitti import KITTI +from dataset.vkitti2 import VKITTI2 +from depth_anything_v2.dpt import DepthAnythingV2 +from util.dist_helper import setup_distributed +from util.loss import SiLogLoss +from util.metric import eval_depth +from util.utils import init_log + + +parser = argparse.ArgumentParser(description='Depth Anything V2 for Metric Depth Estimation') + +parser.add_argument('--encoder', default='vitl', choices=['vits', 'vitb', 'vitl', 'vitg']) +parser.add_argument('--dataset', default='hypersim', choices=['hypersim', 'vkitti']) +parser.add_argument('--img-size', default=518, type=int) +parser.add_argument('--min-depth', default=0.001, type=float) +parser.add_argument('--max-depth', default=20, type=float) +parser.add_argument('--epochs', default=40, type=int) +parser.add_argument('--bs', default=2, type=int) +parser.add_argument('--lr', default=0.000005, type=float) +parser.add_argument('--pretrained-from', type=str) +parser.add_argument('--save-path', type=str, required=True) +parser.add_argument('--local-rank', default=0, type=int) +parser.add_argument('--port', default=None, type=int) + + +def main(): + args = parser.parse_args() + + warnings.simplefilter('ignore', np.RankWarning) + + logger = init_log('global', logging.INFO) + logger.propagate = 0 + + rank, world_size = setup_distributed(port=args.port) + + if rank == 0: + all_args = {**vars(args), 'ngpus': world_size} + logger.info('{}\n'.format(pprint.pformat(all_args))) + writer = SummaryWriter(args.save_path) + + cudnn.enabled = True + cudnn.benchmark = True + + size = (args.img_size, args.img_size) + if args.dataset == 'hypersim': + trainset = Hypersim('dataset/splits/hypersim/train.txt', 'train', size=size) + elif args.dataset == 'vkitti': + trainset = VKITTI2('dataset/splits/vkitti2/train.txt', 'train', size=size) + else: + raise NotImplementedError + trainsampler = torch.utils.data.distributed.DistributedSampler(trainset) + trainloader = DataLoader(trainset, batch_size=args.bs, pin_memory=True, num_workers=4, drop_last=True, sampler=trainsampler) + + if args.dataset == 'hypersim': + valset = Hypersim('dataset/splits/hypersim/val.txt', 'val', size=size) + elif args.dataset == 'vkitti': + valset = KITTI('dataset/splits/kitti/val.txt', 'val', size=size) + else: + raise NotImplementedError + valsampler = torch.utils.data.distributed.DistributedSampler(valset) + valloader = DataLoader(valset, batch_size=1, pin_memory=True, num_workers=4, drop_last=True, sampler=valsampler) + + local_rank = int(os.environ["LOCAL_RANK"]) + + model_configs = { + 'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]}, + 'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]}, + 'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]}, + 'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]} + } + model = DepthAnythingV2(**{**model_configs[args.encoder], 'max_depth': args.max_depth}) + + if args.pretrained_from: + model.load_state_dict({k: v for k, v in torch.load(args.pretrained_from, map_location='cpu').items() if 'pretrained' in k}, strict=False) + + model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) + model.cuda(local_rank) + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[local_rank], broadcast_buffers=False, + output_device=local_rank, find_unused_parameters=True) + + criterion = SiLogLoss().cuda(local_rank) + + optimizer = AdamW([{'params': [param for name, param in model.named_parameters() if 'pretrained' in name], 'lr': args.lr}, + {'params': [param for name, param in model.named_parameters() if 'pretrained' not in name], 'lr': args.lr * 10.0}], + lr=args.lr, betas=(0.9, 0.999), weight_decay=0.01) + + total_iters = args.epochs * len(trainloader) + + previous_best = {'d1': 0, 'd2': 0, 'd3': 0, 'abs_rel': 100, 'sq_rel': 100, 'rmse': 100, 'rmse_log': 100, 'log10': 100, 'silog': 100} + + for epoch in range(args.epochs): + if rank == 0: + logger.info('===========> Epoch: {:}/{:}, d1: {:.3f}, d2: {:.3f}, d3: {:.3f}'.format(epoch, args.epochs, previous_best['d1'], previous_best['d2'], previous_best['d3'])) + logger.info('===========> Epoch: {:}/{:}, abs_rel: {:.3f}, sq_rel: {:.3f}, rmse: {:.3f}, rmse_log: {:.3f}, ' + 'log10: {:.3f}, silog: {:.3f}'.format( + epoch, args.epochs, previous_best['abs_rel'], previous_best['sq_rel'], previous_best['rmse'], + previous_best['rmse_log'], previous_best['log10'], previous_best['silog'])) + + trainloader.sampler.set_epoch(epoch + 1) + + model.train() + total_loss = 0 + + for i, sample in enumerate(trainloader): + optimizer.zero_grad() + + img, depth, valid_mask = sample['image'].cuda(), sample['depth'].cuda(), sample['valid_mask'].cuda() + + if random.random() < 0.5: + img = img.flip(-1) + depth = depth.flip(-1) + valid_mask = valid_mask.flip(-1) + + pred = model(img) + + loss = criterion(pred, depth, (valid_mask == 1) & (depth >= args.min_depth) & (depth <= args.max_depth)) + + loss.backward() + optimizer.step() + + total_loss += loss.item() + + iters = epoch * len(trainloader) + i + + lr = args.lr * (1 - iters / total_iters) ** 0.9 + + optimizer.param_groups[0]["lr"] = lr + optimizer.param_groups[1]["lr"] = lr * 10.0 + + if rank == 0: + writer.add_scalar('train/loss', loss.item(), iters) + + if rank == 0 and i % 100 == 0: + logger.info('Iter: {}/{}, LR: {:.7f}, Loss: {:.3f}'.format(i, len(trainloader), optimizer.param_groups[0]['lr'], loss.item())) + + model.eval() + + results = {'d1': torch.tensor([0.0]).cuda(), 'd2': torch.tensor([0.0]).cuda(), 'd3': torch.tensor([0.0]).cuda(), + 'abs_rel': torch.tensor([0.0]).cuda(), 'sq_rel': torch.tensor([0.0]).cuda(), 'rmse': torch.tensor([0.0]).cuda(), + 'rmse_log': torch.tensor([0.0]).cuda(), 'log10': torch.tensor([0.0]).cuda(), 'silog': torch.tensor([0.0]).cuda()} + nsamples = torch.tensor([0.0]).cuda() + + for i, sample in enumerate(valloader): + + img, depth, valid_mask = sample['image'].cuda().float(), sample['depth'].cuda()[0], sample['valid_mask'].cuda()[0] + + with torch.no_grad(): + pred = model(img) + pred = F.interpolate(pred[:, None], depth.shape[-2:], mode='bilinear', align_corners=True)[0, 0] + + valid_mask = (valid_mask == 1) & (depth >= args.min_depth) & (depth <= args.max_depth) + + if valid_mask.sum() < 10: + continue + + cur_results = eval_depth(pred[valid_mask], depth[valid_mask]) + + for k in results.keys(): + results[k] += cur_results[k] + nsamples += 1 + + torch.distributed.barrier() + + for k in results.keys(): + dist.reduce(results[k], dst=0) + dist.reduce(nsamples, dst=0) + + if rank == 0: + logger.info('==========================================================================================') + logger.info('{:>8}, {:>8}, {:>8}, {:>8}, {:>8}, {:>8}, {:>8}, {:>8}, {:>8}'.format(*tuple(results.keys()))) + logger.info('{:8.3f}, {:8.3f}, {:8.3f}, {:8.3f}, {:8.3f}, {:8.3f}, {:8.3f}, {:8.3f}, {:8.3f}'.format(*tuple([(v / nsamples).item() for v in results.values()]))) + logger.info('==========================================================================================') + print() + + for name, metric in results.items(): + writer.add_scalar(f'eval/{name}', (metric / nsamples).item(), epoch) + + for k in results.keys(): + if k in ['d1', 'd2', 'd3']: + previous_best[k] = max(previous_best[k], (results[k] / nsamples).item()) + else: + previous_best[k] = min(previous_best[k], (results[k] / nsamples).item()) + + if rank == 0: + checkpoint = { + 'model': model.state_dict(), + 'optimizer': optimizer.state_dict(), + 'epoch': epoch, + 'previous_best': previous_best, + } + torch.save(checkpoint, os.path.join(args.save_path, 'latest.pth')) + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/ddepth_anything_v2/metric_depth/util/dist_helper.py b/ddepth_anything_v2/metric_depth/util/dist_helper.py new file mode 100644 index 0000000000000000000000000000000000000000..49c65420b8d1496783c1c435bb4b6dba3416048f --- /dev/null +++ b/ddepth_anything_v2/metric_depth/util/dist_helper.py @@ -0,0 +1,41 @@ +import os +import subprocess + +import torch +import torch.distributed as dist + + +def setup_distributed(backend="nccl", port=None): + """AdaHessian Optimizer + Lifted from https://github.com/BIGBALLON/distribuuuu/blob/master/distribuuuu/utils.py + Originally licensed MIT, Copyright (c) 2020 Wei Li + """ + num_gpus = torch.cuda.device_count() + + if "SLURM_JOB_ID" in os.environ: + rank = int(os.environ["SLURM_PROCID"]) + world_size = int(os.environ["SLURM_NTASKS"]) + node_list = os.environ["SLURM_NODELIST"] + addr = subprocess.getoutput(f"scontrol show hostname {node_list} | head -n1") + # specify master port + if port is not None: + os.environ["MASTER_PORT"] = str(port) + elif "MASTER_PORT" not in os.environ: + os.environ["MASTER_PORT"] = "10685" + if "MASTER_ADDR" not in os.environ: + os.environ["MASTER_ADDR"] = addr + os.environ["WORLD_SIZE"] = str(world_size) + os.environ["LOCAL_RANK"] = str(rank % num_gpus) + os.environ["RANK"] = str(rank) + else: + rank = int(os.environ["RANK"]) + world_size = int(os.environ["WORLD_SIZE"]) + + torch.cuda.set_device(rank % num_gpus) + + dist.init_process_group( + backend=backend, + world_size=world_size, + rank=rank, + ) + return rank, world_size diff --git a/ddepth_anything_v2/metric_depth/util/loss.py b/ddepth_anything_v2/metric_depth/util/loss.py new file mode 100644 index 0000000000000000000000000000000000000000..ab808798c0c607db4f64f248d8dbea7efcdee1f2 --- /dev/null +++ b/ddepth_anything_v2/metric_depth/util/loss.py @@ -0,0 +1,16 @@ +import torch +from torch import nn + + +class SiLogLoss(nn.Module): + def __init__(self, lambd=0.5): + super().__init__() + self.lambd = lambd + + def forward(self, pred, target, valid_mask): + valid_mask = valid_mask.detach() + diff_log = torch.log(target[valid_mask]) - torch.log(pred[valid_mask]) + loss = torch.sqrt(torch.pow(diff_log, 2).mean() - + self.lambd * torch.pow(diff_log.mean(), 2)) + + return loss diff --git a/ddepth_anything_v2/metric_depth/util/metric.py b/ddepth_anything_v2/metric_depth/util/metric.py new file mode 100644 index 0000000000000000000000000000000000000000..b4c05b29100728e93836e6d2712a9412eff94f4d --- /dev/null +++ b/ddepth_anything_v2/metric_depth/util/metric.py @@ -0,0 +1,26 @@ +import torch + + +def eval_depth(pred, target): + assert pred.shape == target.shape + + thresh = torch.max((target / pred), (pred / target)) + + d1 = torch.sum(thresh < 1.25).float() / len(thresh) + d2 = torch.sum(thresh < 1.25 ** 2).float() / len(thresh) + d3 = torch.sum(thresh < 1.25 ** 3).float() / len(thresh) + + diff = pred - target + diff_log = torch.log(pred) - torch.log(target) + + abs_rel = torch.mean(torch.abs(diff) / target) + sq_rel = torch.mean(torch.pow(diff, 2) / target) + + rmse = torch.sqrt(torch.mean(torch.pow(diff, 2))) + rmse_log = torch.sqrt(torch.mean(torch.pow(diff_log , 2))) + + log10 = torch.mean(torch.abs(torch.log10(pred) - torch.log10(target))) + silog = torch.sqrt(torch.pow(diff_log, 2).mean() - 0.5 * torch.pow(diff_log.mean(), 2)) + + return {'d1': d1.item(), 'd2': d2.item(), 'd3': d3.item(), 'abs_rel': abs_rel.item(), 'sq_rel': sq_rel.item(), + 'rmse': rmse.item(), 'rmse_log': rmse_log.item(), 'log10':log10.item(), 'silog':silog.item()} \ No newline at end of file diff --git a/ddepth_anything_v2/metric_depth/util/utils.py b/ddepth_anything_v2/metric_depth/util/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..f97ab65ea06ba0062127a8400ab1dd0442259fcb --- /dev/null +++ b/ddepth_anything_v2/metric_depth/util/utils.py @@ -0,0 +1,26 @@ +import os +import re +import numpy as np +import logging + +logs = set() + + +def init_log(name, level=logging.INFO): + if (name, level) in logs: + return + logs.add((name, level)) + logger = logging.getLogger(name) + logger.setLevel(level) + ch = logging.StreamHandler() + ch.setLevel(level) + if "SLURM_PROCID" in os.environ: + rank = int(os.environ["SLURM_PROCID"]) + logger.addFilter(lambda record: rank == 0) + else: + rank = 0 + format_str = "[%(asctime)s][%(levelname)8s] %(message)s" + formatter = logging.Formatter(format_str) + ch.setFormatter(formatter) + logger.addHandler(ch) + return logger diff --git a/ddepth_anything_v2/requirements.txt b/ddepth_anything_v2/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..111fdb4ec1b7158293e0cff5b04d8572d2b0098e --- /dev/null +++ b/ddepth_anything_v2/requirements.txt @@ -0,0 +1,6 @@ +gradio_imageslider +gradio==4.29.0 +matplotlib +opencv-python +torch +torchvision diff --git a/ddepth_anything_v2/run.py b/ddepth_anything_v2/run.py new file mode 100644 index 0000000000000000000000000000000000000000..52b53d6aa2b0d6311c1cc4783c3e23ab4c468014 --- /dev/null +++ b/ddepth_anything_v2/run.py @@ -0,0 +1,73 @@ +import argparse +import cv2 +import glob +import matplotlib +import numpy as np +import os +import torch + +from depth_anything_v2.dpt import DepthAnythingV2 + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Depth Anything V2') + + parser.add_argument('--img-path', type=str) + parser.add_argument('--input-size', type=int, default=518) + parser.add_argument('--outdir', type=str, default='./vis_depth') + + parser.add_argument('--encoder', type=str, default='vitl', choices=['vits', 'vitb', 'vitl', 'vitg']) + + parser.add_argument('--pred-only', dest='pred_only', action='store_true', help='only display the prediction') + parser.add_argument('--grayscale', dest='grayscale', action='store_true', help='do not apply colorful palette') + + args = parser.parse_args() + + DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu' + + model_configs = { + 'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]}, + 'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]}, + 'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]}, + 'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]} + } + + depth_anything = DepthAnythingV2(**model_configs[args.encoder]) + depth_anything.load_state_dict(torch.load(f'checkpoints/depth_anything_v2_{args.encoder}.pth', map_location='cpu')) + depth_anything = depth_anything.to(DEVICE).eval() + + if os.path.isfile(args.img_path): + if args.img_path.endswith('txt'): + with open(args.img_path, 'r') as f: + filenames = f.read().splitlines() + else: + filenames = [args.img_path] + else: + filenames = glob.glob(os.path.join(args.img_path, '**/*'), recursive=True) + + os.makedirs(args.outdir, exist_ok=True) + + cmap = matplotlib.colormaps.get_cmap('Spectral_r') + + for k, filename in enumerate(filenames): + print(f'Progress {k+1}/{len(filenames)}: {filename}') + + raw_image = cv2.imread(filename) + + depth = depth_anything.infer_image(raw_image, args.input_size) + + depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0 + depth = depth.astype(np.uint8) + + if args.grayscale: + depth = np.repeat(depth[..., np.newaxis], 3, axis=-1) + else: + depth = (cmap(depth)[:, :, :3] * 255)[:, :, ::-1].astype(np.uint8) + + if args.pred_only: + cv2.imwrite(os.path.join(args.outdir, os.path.splitext(os.path.basename(filename))[0] + '.png'), depth) + else: + split_region = np.ones((raw_image.shape[0], 50, 3), dtype=np.uint8) * 255 + combined_result = cv2.hconcat([raw_image, split_region, depth]) + + cv2.imwrite(os.path.join(args.outdir, os.path.splitext(os.path.basename(filename))[0] + '.png'), combined_result) \ No newline at end of file diff --git a/ddepth_anything_v2/run_video.py b/ddepth_anything_v2/run_video.py new file mode 100644 index 0000000000000000000000000000000000000000..46cca8426681a7cfe0223b0e722f18a449a9702d --- /dev/null +++ b/ddepth_anything_v2/run_video.py @@ -0,0 +1,92 @@ +import argparse +import cv2 +import glob +import matplotlib +import numpy as np +import os +import torch + +from depth_anything_v2.dpt import DepthAnythingV2 + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Depth Anything V2') + + parser.add_argument('--video-path', type=str) + parser.add_argument('--input-size', type=int, default=518) + parser.add_argument('--outdir', type=str, default='./vis_video_depth') + + parser.add_argument('--encoder', type=str, default='vitl', choices=['vits', 'vitb', 'vitl', 'vitg']) + + parser.add_argument('--pred-only', dest='pred_only', action='store_true', help='only display the prediction') + parser.add_argument('--grayscale', dest='grayscale', action='store_true', help='do not apply colorful palette') + + args = parser.parse_args() + + DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu' + + model_configs = { + 'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]}, + 'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]}, + 'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]}, + 'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]} + } + + depth_anything = DepthAnythingV2(**model_configs[args.encoder]) + depth_anything.load_state_dict(torch.load(f'checkpoints/depth_anything_v2_{args.encoder}.pth', map_location='cpu')) + depth_anything = depth_anything.to(DEVICE).eval() + + if os.path.isfile(args.video_path): + if args.video_path.endswith('txt'): + with open(args.video_path, 'r') as f: + lines = f.read().splitlines() + else: + filenames = [args.video_path] + else: + filenames = glob.glob(os.path.join(args.video_path, '**/*'), recursive=True) + + os.makedirs(args.outdir, exist_ok=True) + + margin_width = 50 + cmap = matplotlib.colormaps.get_cmap('Spectral_r') + + for k, filename in enumerate(filenames): + print(f'Progress {k+1}/{len(filenames)}: {filename}') + + raw_video = cv2.VideoCapture(filename) + frame_width, frame_height = int(raw_video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(raw_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) + frame_rate = int(raw_video.get(cv2.CAP_PROP_FPS)) + + if args.pred_only: + output_width = frame_width + else: + output_width = frame_width * 2 + margin_width + + output_path = os.path.join(args.outdir, os.path.splitext(os.path.basename(filename))[0] + '.mp4') + out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*"mp4v"), frame_rate, (output_width, frame_height)) + + while raw_video.isOpened(): + ret, raw_frame = raw_video.read() + if not ret: + break + + depth = depth_anything.infer_image(raw_frame, args.input_size) + + depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0 + depth = depth.astype(np.uint8) + + if args.grayscale: + depth = np.repeat(depth[..., np.newaxis], 3, axis=-1) + else: + depth = (cmap(depth)[:, :, :3] * 255)[:, :, ::-1].astype(np.uint8) + + if args.pred_only: + out.write(depth) + else: + split_region = np.ones((frame_height, margin_width, 3), dtype=np.uint8) * 255 + combined_frame = cv2.hconcat([raw_frame, split_region, depth]) + + out.write(combined_frame) + + raw_video.release() + out.release() diff --git a/dmarigold/marigold/__init__.py b/dmarigold/marigold/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4e60c5d9f24b613d7acd20a5cf518e69151647ba --- /dev/null +++ b/dmarigold/marigold/__init__.py @@ -0,0 +1 @@ +from .marigold_pipeline import MarigoldPipeline, MarigoldDepthOutput diff --git a/dmarigold/marigold/__pycache__/__init__.cpython-310.pyc b/dmarigold/marigold/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bc9f8fcaefa619d4a6d447c7e2b7f5f9cb6489d9 Binary files /dev/null and b/dmarigold/marigold/__pycache__/__init__.cpython-310.pyc differ diff --git a/dmarigold/marigold/__pycache__/__init__.cpython-311.pyc b/dmarigold/marigold/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..485c1bac16d1d2eccb06a8c7ab7a9558d9ee2aef Binary files /dev/null and b/dmarigold/marigold/__pycache__/__init__.cpython-311.pyc differ diff --git a/dmarigold/marigold/__pycache__/__init__.cpython-312.pyc b/dmarigold/marigold/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e0d4dde0326bbabc8a40bd9d32047f1aab77ff51 Binary files /dev/null and b/dmarigold/marigold/__pycache__/__init__.cpython-312.pyc differ diff --git a/dmarigold/marigold/__pycache__/marigold_pipeline.cpython-310.pyc b/dmarigold/marigold/__pycache__/marigold_pipeline.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..55c28515c55c455556734b70d03651d8f5fd21fc Binary files /dev/null and b/dmarigold/marigold/__pycache__/marigold_pipeline.cpython-310.pyc differ diff --git a/dmarigold/marigold/__pycache__/marigold_pipeline.cpython-311.pyc b/dmarigold/marigold/__pycache__/marigold_pipeline.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8e7482e011df5f81d4a9dafe3ecaa6eeac509768 Binary files /dev/null and b/dmarigold/marigold/__pycache__/marigold_pipeline.cpython-311.pyc differ diff --git a/dmarigold/marigold/__pycache__/marigold_pipeline.cpython-312.pyc b/dmarigold/marigold/__pycache__/marigold_pipeline.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..63932e16bf76ab5d1dd99cf15c66659dd8166b36 Binary files /dev/null and b/dmarigold/marigold/__pycache__/marigold_pipeline.cpython-312.pyc differ diff --git a/dmarigold/marigold/marigold_pipeline.py b/dmarigold/marigold/marigold_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..10ac5477914638105972d7a084494800dbddf874 --- /dev/null +++ b/dmarigold/marigold/marigold_pipeline.py @@ -0,0 +1,359 @@ +# Author: Bingxin Ke +# Last modified: 2023-12-15 + +from typing import List, Dict, Union + +import torch +from torch.utils.data import DataLoader, TensorDataset +import numpy as np +from tqdm.auto import tqdm +from PIL import Image + +from diffusers import ( + DiffusionPipeline, + DDIMScheduler, + UNet2DConditionModel, + AutoencoderKL, +) +from diffusers.utils import BaseOutput +from transformers import CLIPTextModel, CLIPTokenizer + +from .util.image_util import chw2hwc, colorize_depth_maps, resize_max_res +from .util.batchsize import find_batch_size +from .util.ensemble import ensemble_depths + + +class MarigoldDepthOutput(BaseOutput): + """ + Output class for Marigold monocular depth prediction pipeline. + + Args: + depth_np (np.ndarray): + Predicted depth map, with depth values in the range of [0, 1] + depth_colored (PIL.Image.Image): + Colorized depth map, with the shape of [3, H, W] and values in [0, 1] + uncertainty (None` or `np.ndarray): + Uncalibrated uncertainty(MAD, median absolute deviation) coming from ensembling. + """ + + depth_np: np.ndarray + depth_colored: Image.Image + uncertainty: Union[None, np.ndarray] + + +class MarigoldPipeline(DiffusionPipeline): + """ + Pipeline for monocular depth estimation using Marigold: https://arxiv.org/abs/2312.02145. + + This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the + library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.) + + Args: + unet (UNet2DConditionModel): + Conditional U-Net to denoise the depth latent, conditioned on image latent. + vae (AutoencoderKL): + Variational Auto-Encoder (VAE) Model to encode and decode images and depth maps + to and from latent representations. + scheduler (DDIMScheduler): + A scheduler to be used in combination with `unet` to denoise the encoded image latents. + text_encoder (CLIPTextModel): + Text-encoder, for empty text embedding. + tokenizer (CLIPTokenizer): + CLIP tokenizer. + """ + + rgb_latent_scale_factor = 0.18215 + depth_latent_scale_factor = 0.18215 + + def __init__( + self, + unet: UNet2DConditionModel, + vae: AutoencoderKL, + scheduler: DDIMScheduler, + text_encoder: CLIPTextModel, + tokenizer: CLIPTokenizer, + ): + super().__init__() + + self.register_modules( + unet=unet, + vae=vae, + scheduler=scheduler, + text_encoder=text_encoder, + tokenizer=tokenizer, + ) + + self.empty_text_embed = None + + @torch.no_grad() + def __call__( + self, + input_image: Image, + denoising_steps: int = 10, + ensemble_size: int = 10, + processing_res: int = 768, + match_input_res: bool = True, + batch_size: int = 0, + color_map: str = "Spectral", + show_progress_bar: bool = True, + ensemble_kwargs: Dict = None, + ) -> MarigoldDepthOutput: + """ + Function invoked when calling the pipeline. + + Args: + input_image (Image): + Input RGB (or gray-scale) image. + processing_res (int, optional): + Maximum resolution of processing. + If set to 0: will not resize at all. + Defaults to 768. + match_input_res (bool, optional): + Resize depth prediction to match input resolution. + Only valid if `limit_input_res` is not None. + Defaults to True. + denoising_steps (int, optional): + Number of diffusion denoising steps (DDIM) during inference. + Defaults to 10. + ensemble_size (int, optional): + Number of predictions to be ensembled. + Defaults to 10. + batch_size (int, optional): + Inference batch size, no bigger than `num_ensemble`. + If set to 0, the script will automatically decide the proper batch size. + Defaults to 0. + show_progress_bar (bool, optional): + Display a progress bar of diffusion denoising. + Defaults to True. + color_map (str, optional): + Colormap used to colorize the depth map. + Defaults to "Spectral". + ensemble_kwargs () + Returns: + `MarigoldDepthOutput` + """ + + device = self.device + input_size = input_image.size + + if not match_input_res: + assert ( + processing_res is not None + ), "Value error: `resize_output_back` is only valid with " + assert processing_res >= 0 + assert denoising_steps >= 1 + assert ensemble_size >= 1 + + # ----------------- Image Preprocess ----------------- + # Resize image + if processing_res > 0: + input_image = resize_max_res( + input_image, max_edge_resolution=processing_res + ) + # Convert the image to RGB, to 1.remove the alpha channel 2.convert B&W to 3-channel + input_image = input_image.convert("RGB") + image = np.asarray(input_image) + + # Normalize rgb values + rgb = np.transpose(image, (2, 0, 1)) # [H, W, rgb] -> [rgb, H, W] + rgb_norm = rgb / 255.0 + rgb_norm = torch.from_numpy(rgb_norm).to(self.vae.dtype) + rgb_norm = rgb_norm.to(device) + assert rgb_norm.min() >= 0.0 and rgb_norm.max() <= 1.0 + + # ----------------- Predicting depth ----------------- + # Batch repeated input image + duplicated_rgb = torch.stack([rgb_norm] * ensemble_size) + single_rgb_dataset = TensorDataset(duplicated_rgb) + if batch_size > 0: + _bs = batch_size + else: + _bs = find_batch_size( + ensemble_size=ensemble_size, input_res=max(rgb_norm.shape[1:]) + ) + + single_rgb_loader = DataLoader( + single_rgb_dataset, batch_size=_bs, shuffle=False + ) + + # Predict depth maps (batched) + depth_pred_ls = [] + if show_progress_bar: + iterable = tqdm( + single_rgb_loader, desc=" " * 2 + "Inference batches", leave=False + ) + else: + iterable = single_rgb_loader + for batch in iterable: + (batched_img,) = batch + depth_pred_raw = self.single_infer( + rgb_in=batched_img, + num_inference_steps=denoising_steps, + show_pbar=show_progress_bar, + ) + depth_pred_ls.append(depth_pred_raw.detach().clone()) + depth_preds = torch.concat(depth_pred_ls, axis=0).squeeze() + torch.cuda.empty_cache() # clear vram cache for ensembling + + # ----------------- Test-time ensembling ----------------- + if ensemble_size > 1: + depth_pred, pred_uncert = ensemble_depths( + depth_preds, **(ensemble_kwargs or {}) + ) + else: + depth_pred = depth_preds + pred_uncert = None + + # ----------------- Post processing ----------------- + # Scale prediction to [0, 1] + min_d = torch.min(depth_pred) + max_d = torch.max(depth_pred) + depth_pred = (depth_pred - min_d) / (max_d - min_d) + + # Convert to numpy + depth_pred = depth_pred.cpu().numpy().astype(np.float32) + + # Resize back to original resolution + if match_input_res: + pred_img = Image.fromarray(depth_pred) + pred_img = pred_img.resize(input_size) + depth_pred = np.asarray(pred_img) + + # Clip output range + depth_pred = depth_pred.clip(0, 1) + + # Colorize + depth_colored = colorize_depth_maps( + depth_pred, 0, 1, cmap=color_map + ).squeeze() # [3, H, W], value in (0, 1) + depth_colored = (depth_colored * 255).astype(np.uint8) + depth_colored_hwc = chw2hwc(depth_colored) + depth_colored_img = Image.fromarray(depth_colored_hwc) + return MarigoldDepthOutput( + depth_np=depth_pred, + depth_colored=depth_colored_img, + uncertainty=pred_uncert, + ) + + def __encode_empty_text(self): + """ + Encode text embedding for empty prompt + """ + prompt = "" + text_inputs = self.tokenizer( + prompt, + padding="do_not_pad", + max_length=self.tokenizer.model_max_length, + truncation=True, + return_tensors="pt", + ) + text_input_ids = text_inputs.input_ids.to(self.text_encoder.device) + self.empty_text_embed = self.text_encoder(text_input_ids)[0] + + @torch.no_grad() + def single_infer( + self, rgb_in: torch.Tensor, num_inference_steps: int, show_pbar: bool + ) -> torch.Tensor: + """ + Perform an individual depth prediction without ensembling. + + Args: + rgb_in (torch.Tensor): + Input RGB image. + num_inference_steps (int): + Number of diffusion denoisign steps (DDIM) during inference. + show_pbar (bool): + Display a progress bar of diffusion denoising. + + Returns: + torch.Tensor: Predicted depth map. + """ + device = rgb_in.device + + # Set timesteps + self.scheduler.set_timesteps(num_inference_steps, device=device) + timesteps = self.scheduler.timesteps # [T] + + # Encode image + rgb_latent = self.encode_rgb(rgb_in) + + # Initial depth map (noise) + depth_latent = torch.randn(rgb_latent.shape, device=device, dtype=rgb_latent.dtype) # [B, 4, h, w] + + # Batched empty text embedding + if self.empty_text_embed is None: + self.__encode_empty_text() + batch_empty_text_embed = self.empty_text_embed.repeat( + (rgb_latent.shape[0], 1, 1) + ) # [B, 2, 1024] + + # Denoising loop + if show_pbar: + iterable = tqdm( + enumerate(timesteps), + total=len(timesteps), + leave=False, + desc=" " * 4 + "Diffusion denoising", + ) + else: + iterable = enumerate(timesteps) + + for i, t in iterable: + unet_input = torch.cat( + [rgb_latent, depth_latent], dim=1 + ) # this order is important + + # predict the noise residual + noise_pred = self.unet( + unet_input, t, encoder_hidden_states=batch_empty_text_embed + ).sample # [B, 4, h, w] + + # compute the previous noisy sample x_t -> x_t-1 + depth_latent = self.scheduler.step(noise_pred, t, depth_latent).prev_sample + depth = self.decode_depth(depth_latent) + + # clip prediction + depth = torch.clip(depth, -1.0, 1.0) + # shift to [0, 1] + depth = depth * 2.0 - 1.0 + + return depth + + def encode_rgb(self, rgb_in: torch.Tensor) -> torch.Tensor: + """ + Encode RGB image into latent. + + Args: + rgb_in (torch.Tensor): + Input RGB image to be encoded. + + Returns: + torch.Tensor: Image latent + """ + # encode + h = self.vae.encoder(rgb_in) + moments = self.vae.quant_conv(h) + mean, logvar = torch.chunk(moments, 2, dim=1) + # scale latent + rgb_latent = mean * self.rgb_latent_scale_factor + return rgb_latent + + def decode_depth(self, depth_latent: torch.Tensor) -> torch.Tensor: + """ + Decode depth latent into depth map. + + Args: + depth_latent (torch.Tensor): + Depth latent to be decoded. + + Returns: + torch.Tensor: Decoded depth map. + """ + # scale latent + depth_latent = depth_latent / self.depth_latent_scale_factor + # decode + z = self.vae.post_quant_conv(depth_latent) + stacked = self.vae.decoder(z) + # mean of output channels + depth_mean = stacked.mean(dim=1, keepdim=True) + return depth_mean diff --git a/dmarigold/marigold/util/__pycache__/batchsize.cpython-310.pyc b/dmarigold/marigold/util/__pycache__/batchsize.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..faf3f3d74b18dee22f9c6f96e9d11ba431e28733 Binary files /dev/null and b/dmarigold/marigold/util/__pycache__/batchsize.cpython-310.pyc differ diff --git a/dmarigold/marigold/util/__pycache__/batchsize.cpython-311.pyc b/dmarigold/marigold/util/__pycache__/batchsize.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..50f494856d64f6322af135a9e47f7f9781fd4313 Binary files /dev/null and b/dmarigold/marigold/util/__pycache__/batchsize.cpython-311.pyc differ diff --git a/dmarigold/marigold/util/__pycache__/batchsize.cpython-312.pyc b/dmarigold/marigold/util/__pycache__/batchsize.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..edd4c5446032fe4bdf262fae21ce764919fd2d76 Binary files /dev/null and b/dmarigold/marigold/util/__pycache__/batchsize.cpython-312.pyc differ diff --git a/dmarigold/marigold/util/__pycache__/ensemble.cpython-310.pyc b/dmarigold/marigold/util/__pycache__/ensemble.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a76601a3f67c83211c2b5aff9b5239d7c2c190fb Binary files /dev/null and b/dmarigold/marigold/util/__pycache__/ensemble.cpython-310.pyc differ diff --git a/dmarigold/marigold/util/__pycache__/ensemble.cpython-311.pyc b/dmarigold/marigold/util/__pycache__/ensemble.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ef335ec72bf469ed9ce10d7431bfd4d8b7eb3e7d Binary files /dev/null and b/dmarigold/marigold/util/__pycache__/ensemble.cpython-311.pyc differ diff --git a/dmarigold/marigold/util/__pycache__/ensemble.cpython-312.pyc b/dmarigold/marigold/util/__pycache__/ensemble.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4c9927cffdf86a6f14a0b6429cdc35b9f21d3b7b Binary files /dev/null and b/dmarigold/marigold/util/__pycache__/ensemble.cpython-312.pyc differ diff --git a/dmarigold/marigold/util/__pycache__/image_util.cpython-310.pyc b/dmarigold/marigold/util/__pycache__/image_util.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d2158f6280a45165b174d3b124f4ec29d51b6754 Binary files /dev/null and b/dmarigold/marigold/util/__pycache__/image_util.cpython-310.pyc differ diff --git a/dmarigold/marigold/util/__pycache__/image_util.cpython-311.pyc b/dmarigold/marigold/util/__pycache__/image_util.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ea3b481644e05a3cd9346047ce93814b9bcebd69 Binary files /dev/null and b/dmarigold/marigold/util/__pycache__/image_util.cpython-311.pyc differ diff --git a/dmarigold/marigold/util/__pycache__/image_util.cpython-312.pyc b/dmarigold/marigold/util/__pycache__/image_util.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a0e82530640302ae2e683665f5df9858bd4d74e9 Binary files /dev/null and b/dmarigold/marigold/util/__pycache__/image_util.cpython-312.pyc differ diff --git a/dmarigold/marigold/util/batchsize.py b/dmarigold/marigold/util/batchsize.py new file mode 100644 index 0000000000000000000000000000000000000000..45c6caf8a91049787f5cef1668b4325f1af76bd0 --- /dev/null +++ b/dmarigold/marigold/util/batchsize.py @@ -0,0 +1,51 @@ +# Author: Bingxin Ke +# Last modified: 2023-12-15 + +import torch +import math + + +# Search table for suggested max. inference batch size +bs_search_table = [ + # tested on A100-PCIE-80GB + {"res": 768, "total_vram": 79, "bs": 35}, + {"res": 1024, "total_vram": 79, "bs": 20}, + # tested on A100-PCIE-40GB + {"res": 768, "total_vram": 39, "bs": 15}, + {"res": 1024, "total_vram": 39, "bs": 8}, + # tested on RTX3090, RTX4090 + {"res": 512, "total_vram": 23, "bs": 20}, + {"res": 768, "total_vram": 23, "bs": 7}, + {"res": 1024, "total_vram": 23, "bs": 3}, + # tested on GTX1080Ti + {"res": 512, "total_vram": 10, "bs": 5}, + {"res": 768, "total_vram": 10, "bs": 2}, +] + + +def find_batch_size(ensemble_size: int, input_res: int) -> int: + """ + Automatically search for suitable operating batch size. + + Args: + ensemble_size (int): Number of predictions to be ensembled + input_res (int): Operating resolution of the input image. + + Returns: + int: Operating batch size + """ + if not torch.cuda.is_available(): + return 1 + + total_vram = torch.cuda.mem_get_info()[1] / 1024.0**3 + + for settings in sorted(bs_search_table, key=lambda k: (k["res"], -k["total_vram"])): + if input_res <= settings["res"] and total_vram >= settings["total_vram"]: + bs = settings["bs"] + if bs > ensemble_size: + bs = ensemble_size + elif bs > math.ceil(ensemble_size / 2) and bs < ensemble_size: + bs = math.ceil(ensemble_size / 2) + return bs + + return 1 diff --git a/dmarigold/marigold/util/ensemble.py b/dmarigold/marigold/util/ensemble.py new file mode 100644 index 0000000000000000000000000000000000000000..07a8aea6d3f97ad2c13c9825d590f38f402f7a19 --- /dev/null +++ b/dmarigold/marigold/util/ensemble.py @@ -0,0 +1,117 @@ +# Test align depth images +# Author: Bingxin Ke +# Last modified: 2023-12-15 + +import numpy as np +import torch + +from scipy.optimize import minimize + + +def inter_distances(tensors: torch.Tensor): + """ + To calculate the distance between each two depth maps. + """ + distances = [] + for i, j in torch.combinations(torch.arange(tensors.shape[0])): + arr1 = tensors[i : i + 1] + arr2 = tensors[j : j + 1] + distances.append(arr1 - arr2) + dist = torch.concatenate(distances, dim=0) + return dist + + +def ensemble_depths( + input_images: torch.Tensor, + regularizer_strength: float = 0.02, + max_iter: int = 2, + tol: float = 1e-3, + reduction: str = "median", + max_res: int = None, +): + """ + To ensemble multiple affine-invariant depth images (up to scale and shift), + by aligning estimating the scale and shift + """ + device = input_images.device + dtype = np.float32 + + original_input = input_images.clone() + n_img = input_images.shape[0] + ori_shape = input_images.shape + + if max_res is not None: + scale_factor = torch.min(max_res / torch.tensor(ori_shape[-2:])) + if scale_factor < 1: + downscaler = torch.nn.Upsample(scale_factor=scale_factor, mode="nearest") + input_images = downscaler(torch.from_numpy(input_images)).numpy() + + # init guess + _min = np.min(input_images.reshape((n_img, -1)).cpu().numpy(), axis=1) + _max = np.max(input_images.reshape((n_img, -1)).cpu().numpy(), axis=1) + s_init = 1.0 / (_max - _min).reshape((-1, 1, 1)) + t_init = (-1 * s_init.flatten() * _min.flatten()).reshape((-1, 1, 1)) + x = np.concatenate([s_init, t_init]).reshape(-1) + + input_images = input_images.to(device) + + # objective function + def closure(x): + x = x.astype(dtype) + l = len(x) + s = x[: int(l / 2)] + t = x[int(l / 2) :] + s = torch.from_numpy(s).to(device) + t = torch.from_numpy(t).to(device) + + transformed_arrays = input_images * s.view((-1, 1, 1)) + t.view((-1, 1, 1)) + dists = inter_distances(transformed_arrays) + sqrt_dist = torch.sqrt(torch.mean(dists**2)) + + if "mean" == reduction: + pred = torch.mean(transformed_arrays, dim=0) + elif "median" == reduction: + pred = torch.median(transformed_arrays, dim=0).values + else: + raise ValueError + + near_err = torch.sqrt((0 - torch.min(pred)) ** 2) + far_err = torch.sqrt((1 - torch.max(pred)) ** 2) + + err = sqrt_dist + (near_err + far_err) * regularizer_strength + err = err.detach().cpu().numpy() + return err + + res = minimize( + closure, x, method="BFGS", tol=tol, options={"maxiter": max_iter, "disp": False} + ) + x = res.x + x = x.astype(dtype) + l = len(x) + s = x[: int(l / 2)] + t = x[int(l / 2) :] + + # Prediction + s = torch.from_numpy(s).to(device) + t = torch.from_numpy(t).to(device) + transformed_arrays = original_input * s.view(-1, 1, 1) + t.view(-1, 1, 1) + if "mean" == reduction: + aligned_images = torch.mean(transformed_arrays, dim=0) + std = torch.std(transformed_arrays, dim=0) + uncertainty = std + elif "median" == reduction: + aligned_images = torch.median(transformed_arrays, dim=0).values + # MAD (median absolute deviation) as uncertainty indicator + abs_dev = torch.abs(transformed_arrays - aligned_images) + mad = torch.median(abs_dev, dim=0).values + uncertainty = mad + else: + raise ValueError(f"Unknown reduction method: {reduction}") + + # Scale and shift to [0, 1] + _min = torch.min(aligned_images) + _max = torch.max(aligned_images) + aligned_images = (aligned_images - _min) / (_max - _min) + uncertainty /= _max - _min + + return aligned_images, uncertainty diff --git a/dmarigold/marigold/util/image_util.py b/dmarigold/marigold/util/image_util.py new file mode 100644 index 0000000000000000000000000000000000000000..8dd8bba53284c5141008b809032d199807ac6d67 --- /dev/null +++ b/dmarigold/marigold/util/image_util.py @@ -0,0 +1,77 @@ +import matplotlib +import numpy as np +import torch +from PIL import Image + + +def colorize_depth_maps( + depth_map, min_depth, max_depth, cmap="Spectral", valid_mask=None +): + """ + Colorize depth maps. + """ + assert len(depth_map.shape) >= 2, "Invalid dimension" + + if isinstance(depth_map, torch.Tensor): + depth = depth_map.detach().clone().squeeze().numpy() + elif isinstance(depth_map, np.ndarray): + depth = depth_map.copy().squeeze() + # reshape to [ (B,) H, W ] + if depth.ndim < 3: + depth = depth[np.newaxis, :, :] + + # colorize + cm = matplotlib.colormaps[cmap] + depth = ((depth - min_depth) / (max_depth - min_depth)).clip(0, 1) + img_colored_np = cm(depth, bytes=False)[:, :, :, 0:3] # value from 0 to 1 + img_colored_np = np.rollaxis(img_colored_np, 3, 1) + + if valid_mask is not None: + if isinstance(depth_map, torch.Tensor): + valid_mask = valid_mask.detach().numpy() + valid_mask = valid_mask.squeeze() # [H, W] or [B, H, W] + if valid_mask.ndim < 3: + valid_mask = valid_mask[np.newaxis, np.newaxis, :, :] + else: + valid_mask = valid_mask[:, np.newaxis, :, :] + valid_mask = np.repeat(valid_mask, 3, axis=1) + img_colored_np[~valid_mask] = 0 + + if isinstance(depth_map, torch.Tensor): + img_colored = torch.from_numpy(img_colored_np).float() + elif isinstance(depth_map, np.ndarray): + img_colored = img_colored_np + + return img_colored + + +def chw2hwc(chw): + assert 3 == len(chw.shape) + if isinstance(chw, torch.Tensor): + hwc = torch.permute(chw, (1, 2, 0)) + elif isinstance(chw, np.ndarray): + hwc = np.moveaxis(chw, 0, -1) + return hwc + + +def resize_max_res(img: Image.Image, max_edge_resolution: int) -> Image.Image: + """ + Resize image to limit maximum edge length while keeping aspect ratio + + Args: + img (Image.Image): Image to be resized + max_edge_resolution (int): Maximum edge length (px). + + Returns: + Image.Image: Resized image. + """ + original_width, original_height = img.size + downscale_factor = min( + max_edge_resolution / original_width, max_edge_resolution / original_height + ) + + new_width = int(original_width * downscale_factor) + new_height = int(original_height * downscale_factor) + + resized_img = img.resize((new_width, new_height)) + return resized_img diff --git a/dmarigold/marigold/util/seed_all.py b/dmarigold/marigold/util/seed_all.py new file mode 100644 index 0000000000000000000000000000000000000000..f43cff17f38978ef5ef98933448b4c78935801c4 --- /dev/null +++ b/dmarigold/marigold/util/seed_all.py @@ -0,0 +1,13 @@ +import numpy as np +import random +import torch + + +def seed_all(seed: int = 0): + """ + Set random seeds of all components. + """ + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) diff --git a/dmidas/LICENSE b/dmidas/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..0365733785a449c285c6ac704ef443f385fe798c --- /dev/null +++ b/dmidas/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 Intel ISL (Intel Intelligent Systems Lab) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/dmidas/__pycache__/base_model.cpython-310.pyc b/dmidas/__pycache__/base_model.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..383a2eaf2919e55656534bd7d690b8da4476d586 Binary files /dev/null and b/dmidas/__pycache__/base_model.cpython-310.pyc differ diff --git a/dmidas/__pycache__/base_model.cpython-311.pyc b/dmidas/__pycache__/base_model.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a565b178d2ef67052e3cbb9699d29bdb4502ad67 Binary files /dev/null and b/dmidas/__pycache__/base_model.cpython-311.pyc differ diff --git a/dmidas/__pycache__/base_model.cpython-312.pyc b/dmidas/__pycache__/base_model.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6afbe776ea9c75c33e9ba80b936c81b92c4a4c05 Binary files /dev/null and b/dmidas/__pycache__/base_model.cpython-312.pyc differ diff --git a/dmidas/__pycache__/blocks.cpython-310.pyc b/dmidas/__pycache__/blocks.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2996f0450fd6d0531d34b4f5ef3b914afaf40127 Binary files /dev/null and b/dmidas/__pycache__/blocks.cpython-310.pyc differ diff --git a/dmidas/__pycache__/blocks.cpython-311.pyc b/dmidas/__pycache__/blocks.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..45847d065ec13c9a715d80ea9ffb7c32baf10c0e Binary files /dev/null and b/dmidas/__pycache__/blocks.cpython-311.pyc differ diff --git a/dmidas/__pycache__/blocks.cpython-312.pyc b/dmidas/__pycache__/blocks.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d3b0f7bf7792269970d29dd44d0de6b9c528f7fa Binary files /dev/null and b/dmidas/__pycache__/blocks.cpython-312.pyc differ diff --git a/dmidas/__pycache__/dpt_depth.cpython-310.pyc b/dmidas/__pycache__/dpt_depth.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d15e7cb45f1b910e3b252c7e145fd95229451e35 Binary files /dev/null and b/dmidas/__pycache__/dpt_depth.cpython-310.pyc differ diff --git a/dmidas/__pycache__/dpt_depth.cpython-311.pyc b/dmidas/__pycache__/dpt_depth.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dee293c7ea10e936c8f7defb5395fd8a7ad082ff Binary files /dev/null and b/dmidas/__pycache__/dpt_depth.cpython-311.pyc differ diff --git a/dmidas/__pycache__/dpt_depth.cpython-312.pyc b/dmidas/__pycache__/dpt_depth.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2a2899a4846b4ce81dd6af78f12e95ab8e35e8ee Binary files /dev/null and b/dmidas/__pycache__/dpt_depth.cpython-312.pyc differ diff --git a/dmidas/__pycache__/midas_net.cpython-310.pyc b/dmidas/__pycache__/midas_net.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5cb686e7e1c5edef3ea998135b0f7782f59c5fdd Binary files /dev/null and b/dmidas/__pycache__/midas_net.cpython-310.pyc differ diff --git a/dmidas/__pycache__/midas_net.cpython-311.pyc b/dmidas/__pycache__/midas_net.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..df5a174864a29f8229bb67c2ca39e094e3f7dbeb Binary files /dev/null and b/dmidas/__pycache__/midas_net.cpython-311.pyc differ diff --git a/dmidas/__pycache__/midas_net.cpython-312.pyc b/dmidas/__pycache__/midas_net.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..89fb59f11b2e425ffce91a1988d0447babf85beb Binary files /dev/null and b/dmidas/__pycache__/midas_net.cpython-312.pyc differ diff --git a/dmidas/__pycache__/midas_net_custom.cpython-310.pyc b/dmidas/__pycache__/midas_net_custom.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e11d0c11ceec6d7638852fcbf729350ba5ac328d Binary files /dev/null and b/dmidas/__pycache__/midas_net_custom.cpython-310.pyc differ diff --git a/dmidas/__pycache__/midas_net_custom.cpython-311.pyc b/dmidas/__pycache__/midas_net_custom.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..822532ec1897727ca15c1cb22d9347726c6f6e93 Binary files /dev/null and b/dmidas/__pycache__/midas_net_custom.cpython-311.pyc differ diff --git a/dmidas/__pycache__/midas_net_custom.cpython-312.pyc b/dmidas/__pycache__/midas_net_custom.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..00176ad3505a8aa01b0535fd6170116b13a1f312 Binary files /dev/null and b/dmidas/__pycache__/midas_net_custom.cpython-312.pyc differ diff --git a/dmidas/__pycache__/transforms.cpython-310.pyc b/dmidas/__pycache__/transforms.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7b59664f048b604384e0c8890ffe57a37d2bb723 Binary files /dev/null and b/dmidas/__pycache__/transforms.cpython-310.pyc differ diff --git a/dmidas/__pycache__/transforms.cpython-311.pyc b/dmidas/__pycache__/transforms.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..627f0e7fc52a054ce7374167ee6a6f5850d61c9b Binary files /dev/null and b/dmidas/__pycache__/transforms.cpython-311.pyc differ diff --git a/dmidas/__pycache__/transforms.cpython-312.pyc b/dmidas/__pycache__/transforms.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cf44067ef79057b318be12b52569d681045cb058 Binary files /dev/null and b/dmidas/__pycache__/transforms.cpython-312.pyc differ diff --git a/dmidas/backbones/__pycache__/beit.cpython-310.pyc b/dmidas/backbones/__pycache__/beit.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c32b39e4dbaa58f63201d4e2220d3234012301ee Binary files /dev/null and b/dmidas/backbones/__pycache__/beit.cpython-310.pyc differ diff --git a/dmidas/backbones/__pycache__/beit.cpython-311.pyc b/dmidas/backbones/__pycache__/beit.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..29a576887d4d6a1b8bd0510fcd45407e8995a2ea Binary files /dev/null and b/dmidas/backbones/__pycache__/beit.cpython-311.pyc differ diff --git a/dmidas/backbones/__pycache__/beit.cpython-312.pyc b/dmidas/backbones/__pycache__/beit.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a54927af78c868a42eb29918ed45c3fb1e6125b7 Binary files /dev/null and b/dmidas/backbones/__pycache__/beit.cpython-312.pyc differ diff --git a/dmidas/backbones/__pycache__/levit.cpython-310.pyc b/dmidas/backbones/__pycache__/levit.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3714d18c70352bb2cb62de1df361314445774822 Binary files /dev/null and b/dmidas/backbones/__pycache__/levit.cpython-310.pyc differ diff --git a/dmidas/backbones/__pycache__/levit.cpython-311.pyc b/dmidas/backbones/__pycache__/levit.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..888304a6600fd0b0ec617becf26fd513177ce205 Binary files /dev/null and b/dmidas/backbones/__pycache__/levit.cpython-311.pyc differ diff --git a/dmidas/backbones/__pycache__/levit.cpython-312.pyc b/dmidas/backbones/__pycache__/levit.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..887bda0c6346f25152fa0ee3e9825bb902c3f66b Binary files /dev/null and b/dmidas/backbones/__pycache__/levit.cpython-312.pyc differ diff --git a/dmidas/backbones/__pycache__/next_vit.cpython-310.pyc b/dmidas/backbones/__pycache__/next_vit.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0fef7c92abcc7f3054b00dd9c2d9b1a3c7c48c0a Binary files /dev/null and b/dmidas/backbones/__pycache__/next_vit.cpython-310.pyc differ diff --git a/dmidas/backbones/__pycache__/next_vit.cpython-311.pyc b/dmidas/backbones/__pycache__/next_vit.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6501e5f78c681b167edc347e1dc9897b7b04d644 Binary files /dev/null and b/dmidas/backbones/__pycache__/next_vit.cpython-311.pyc differ diff --git a/dmidas/backbones/__pycache__/next_vit.cpython-312.pyc b/dmidas/backbones/__pycache__/next_vit.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..da0282677b67e70d09cadab2ce3bf608d474b2fb Binary files /dev/null and b/dmidas/backbones/__pycache__/next_vit.cpython-312.pyc differ diff --git a/dmidas/backbones/__pycache__/swin.cpython-310.pyc b/dmidas/backbones/__pycache__/swin.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b72ac6694547d4a8ad783f408a5fbbb7da8beaa9 Binary files /dev/null and b/dmidas/backbones/__pycache__/swin.cpython-310.pyc differ diff --git a/dmidas/backbones/__pycache__/swin.cpython-311.pyc b/dmidas/backbones/__pycache__/swin.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..94bb1b19f51054ff21607724f59eaed7b8e3f337 Binary files /dev/null and b/dmidas/backbones/__pycache__/swin.cpython-311.pyc differ diff --git a/dmidas/backbones/__pycache__/swin.cpython-312.pyc b/dmidas/backbones/__pycache__/swin.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ac851996589d66a1a90a17541dab225e6722ecf9 Binary files /dev/null and b/dmidas/backbones/__pycache__/swin.cpython-312.pyc differ diff --git a/dmidas/backbones/__pycache__/swin2.cpython-310.pyc b/dmidas/backbones/__pycache__/swin2.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cfd6c16baa8b9f1ddf370672d24bfd6202329a82 Binary files /dev/null and b/dmidas/backbones/__pycache__/swin2.cpython-310.pyc differ diff --git a/dmidas/backbones/__pycache__/swin2.cpython-311.pyc b/dmidas/backbones/__pycache__/swin2.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9c7503cc8fddbb85ee63e9fd2f1cc6633a5c5cfd Binary files /dev/null and b/dmidas/backbones/__pycache__/swin2.cpython-311.pyc differ diff --git a/dmidas/backbones/__pycache__/swin2.cpython-312.pyc b/dmidas/backbones/__pycache__/swin2.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a3ca10c6069a787e7fc58dc0e1f1ee0bdd7dc6b9 Binary files /dev/null and b/dmidas/backbones/__pycache__/swin2.cpython-312.pyc differ diff --git a/dmidas/backbones/__pycache__/swin_common.cpython-310.pyc b/dmidas/backbones/__pycache__/swin_common.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..32d499cf72ec466c0acb14b9efa3ca1845b2da60 Binary files /dev/null and b/dmidas/backbones/__pycache__/swin_common.cpython-310.pyc differ diff --git a/dmidas/backbones/__pycache__/swin_common.cpython-311.pyc b/dmidas/backbones/__pycache__/swin_common.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8a260f9ea68e207dfb5cd2f19c389445650ff7e2 Binary files /dev/null and b/dmidas/backbones/__pycache__/swin_common.cpython-311.pyc differ diff --git a/dmidas/backbones/__pycache__/swin_common.cpython-312.pyc b/dmidas/backbones/__pycache__/swin_common.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..18d0e68916e7e4c62d2275b8439f7e0ea63a9a17 Binary files /dev/null and b/dmidas/backbones/__pycache__/swin_common.cpython-312.pyc differ diff --git a/dmidas/backbones/__pycache__/utils.cpython-310.pyc b/dmidas/backbones/__pycache__/utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..af5ed22066e75be37bba8ab127900081dacfde91 Binary files /dev/null and b/dmidas/backbones/__pycache__/utils.cpython-310.pyc differ diff --git a/dmidas/backbones/__pycache__/utils.cpython-311.pyc b/dmidas/backbones/__pycache__/utils.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..50fae0d8e179fa0cb2b59c5e303ca01f17701b07 Binary files /dev/null and b/dmidas/backbones/__pycache__/utils.cpython-311.pyc differ diff --git a/dmidas/backbones/__pycache__/utils.cpython-312.pyc b/dmidas/backbones/__pycache__/utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0ce2fa8b54736aafddb5819e946130769e2f8b47 Binary files /dev/null and b/dmidas/backbones/__pycache__/utils.cpython-312.pyc differ diff --git a/dmidas/backbones/__pycache__/vit.cpython-310.pyc b/dmidas/backbones/__pycache__/vit.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f82d0ecfc8bdfdb6e8e9317c84010c917867c908 Binary files /dev/null and b/dmidas/backbones/__pycache__/vit.cpython-310.pyc differ diff --git a/dmidas/backbones/__pycache__/vit.cpython-311.pyc b/dmidas/backbones/__pycache__/vit.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6c61d89ce0b16b1df6247e747c8a2df3cf31a420 Binary files /dev/null and b/dmidas/backbones/__pycache__/vit.cpython-311.pyc differ diff --git a/dmidas/backbones/__pycache__/vit.cpython-312.pyc b/dmidas/backbones/__pycache__/vit.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ecfce21bfb9dad364062b9ae3b114564174a1d2b Binary files /dev/null and b/dmidas/backbones/__pycache__/vit.cpython-312.pyc differ diff --git a/dmidas/backbones/beit.py b/dmidas/backbones/beit.py new file mode 100644 index 0000000000000000000000000000000000000000..77b9e7ebd54506d579a9c0527f62efde1ed929b9 --- /dev/null +++ b/dmidas/backbones/beit.py @@ -0,0 +1,198 @@ +import timm +import torch +import types + +import numpy as np +import torch.nn.functional as F + +from .utils import forward_adapted_unflatten, make_backbone_default +from timm.models.beit import gen_relative_position_index +from torch.utils.checkpoint import checkpoint +from typing import Optional + + +def forward_beit(pretrained, x): + return forward_adapted_unflatten(pretrained, x, "forward_features") + + +def patch_embed_forward(self, x): + """ + Modification of timm.models.layers.patch_embed.py: PatchEmbed.forward to support arbitrary window sizes. + """ + x = self.proj(x) + if self.flatten: + x = x.flatten(2).transpose(1, 2) + x = self.norm(x) + return x + + +def _get_rel_pos_bias(self, window_size): + """ + Modification of timm.models.beit.py: Attention._get_rel_pos_bias to support arbitrary window sizes. + """ + old_height = 2 * self.window_size[0] - 1 + old_width = 2 * self.window_size[1] - 1 + + new_height = 2 * window_size[0] - 1 + new_width = 2 * window_size[1] - 1 + + old_relative_position_bias_table = self.relative_position_bias_table + + old_num_relative_distance = self.num_relative_distance + new_num_relative_distance = new_height * new_width + 3 + + old_sub_table = old_relative_position_bias_table[:old_num_relative_distance - 3] + + old_sub_table = old_sub_table.reshape(1, old_width, old_height, -1).permute(0, 3, 1, 2) + new_sub_table = F.interpolate(old_sub_table, size=(int(new_height),int(new_width)), mode="bilinear") + new_sub_table = new_sub_table.permute(0, 2, 3, 1).reshape(new_num_relative_distance - 3, -1) + + new_relative_position_bias_table = torch.cat( + [new_sub_table, old_relative_position_bias_table[old_num_relative_distance - 3:]]) + + key = str(window_size[1]) + "," + str(window_size[0]) + if key not in self.relative_position_indices.keys(): + self.relative_position_indices[key] = gen_relative_position_index(window_size) + + relative_position_bias = new_relative_position_bias_table[ + self.relative_position_indices[key].view(-1)].view( + window_size[0] * window_size[1] + 1, + window_size[0] * window_size[1] + 1, -1) # Wh*Ww,Wh*Ww,nH + relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww + return relative_position_bias.unsqueeze(0) + + +def attention_forward(self, x, resolution, shared_rel_pos_bias: Optional[torch.Tensor] = None): + """ + Modification of timm.models.beit.py: Attention.forward to support arbitrary window sizes. + """ + B, N, C = x.shape + + qkv_bias = torch.cat((self.q_bias, self.k_bias, self.v_bias)) if self.q_bias is not None else None + qkv = F.linear(input=x, weight=self.qkv.weight, bias=qkv_bias) + qkv = qkv.reshape(B, N, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4) + q, k, v = qkv.unbind(0) # make torchscript happy (cannot use tensor as tuple) + + q = q * self.scale + attn = (q @ k.transpose(-2, -1)) + + if self.relative_position_bias_table is not None: + window_size = tuple(np.array(resolution) // 16) + attn = attn + self._get_rel_pos_bias(window_size) + if shared_rel_pos_bias is not None: + attn = attn + shared_rel_pos_bias + + attn = attn.softmax(dim=-1) + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(B, N, -1) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +def block_forward(self, x, resolution, shared_rel_pos_bias: Optional[torch.Tensor] = None): + """ + Modification of timm.models.beit.py: Block.forward to support arbitrary window sizes. + """ + if hasattr(self, 'drop_path1') and not hasattr(self, 'drop_path'): + self.drop_path = self.drop_path1 + if self.gamma_1 is None: + x = x + self.drop_path(self.attn(self.norm1(x), resolution, shared_rel_pos_bias=shared_rel_pos_bias)) + x = x + self.drop_path(self.mlp(self.norm2(x))) + else: + x = x + self.drop_path(self.gamma_1 * self.attn(self.norm1(x), resolution, + shared_rel_pos_bias=shared_rel_pos_bias)) + x = x + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x))) + return x + + +def beit_forward_features(self, x): + """ + Modification of timm.models.beit.py: Beit.forward_features to support arbitrary window sizes. + """ + resolution = x.shape[2:] + + x = self.patch_embed(x) + x = torch.cat((self.cls_token.expand(x.shape[0], -1, -1), x), dim=1) + if self.pos_embed is not None: + x = x + self.pos_embed + x = self.pos_drop(x) + + rel_pos_bias = self.rel_pos_bias() if self.rel_pos_bias is not None else None + for blk in self.blocks: + if self.grad_checkpointing and not torch.jit.is_scripting(): + x = checkpoint(blk, x, shared_rel_pos_bias=rel_pos_bias) + else: + x = blk(x, resolution, shared_rel_pos_bias=rel_pos_bias) + x = self.norm(x) + return x + + +def _make_beit_backbone( + model, + features=[96, 192, 384, 768], + size=[384, 384], + hooks=[0, 4, 8, 11], + vit_features=768, + use_readout="ignore", + start_index=1, + start_index_readout=1, +): + backbone = make_backbone_default(model, features, size, hooks, vit_features, use_readout, start_index, + start_index_readout) + + backbone.model.patch_embed.forward = types.MethodType(patch_embed_forward, backbone.model.patch_embed) + backbone.model.forward_features = types.MethodType(beit_forward_features, backbone.model) + + for block in backbone.model.blocks: + attn = block.attn + attn._get_rel_pos_bias = types.MethodType(_get_rel_pos_bias, attn) + attn.forward = types.MethodType(attention_forward, attn) + attn.relative_position_indices = {} + + block.forward = types.MethodType(block_forward, block) + + return backbone + + +def _make_pretrained_beitl16_512(pretrained, use_readout="ignore", hooks=None): + model = timm.create_model("beit_large_patch16_512", pretrained=pretrained) + + hooks = [5, 11, 17, 23] if hooks is None else hooks + + features = [256, 512, 1024, 1024] + + return _make_beit_backbone( + model, + features=features, + size=[512, 512], + hooks=hooks, + vit_features=1024, + use_readout=use_readout, + ) + + +def _make_pretrained_beitl16_384(pretrained, use_readout="ignore", hooks=None): + model = timm.create_model("beit_large_patch16_384", pretrained=pretrained) + + hooks = [5, 11, 17, 23] if hooks is None else hooks + return _make_beit_backbone( + model, + features=[256, 512, 1024, 1024], + hooks=hooks, + vit_features=1024, + use_readout=use_readout, + ) + + +def _make_pretrained_beitb16_384(pretrained, use_readout="ignore", hooks=None): + model = timm.create_model("beit_base_patch16_384", pretrained=pretrained) + + hooks = [2, 5, 8, 11] if hooks is None else hooks + return _make_beit_backbone( + model, + features=[96, 192, 384, 768], + hooks=hooks, + use_readout=use_readout, + ) diff --git a/dmidas/backbones/levit.py b/dmidas/backbones/levit.py new file mode 100644 index 0000000000000000000000000000000000000000..e6a7770fb76f085a0f3b6015902797c5805bba01 --- /dev/null +++ b/dmidas/backbones/levit.py @@ -0,0 +1,106 @@ +import timm +import torch +import torch.nn as nn +import numpy as np + +from .utils import activations, get_activation, Transpose + + +def forward_levit(pretrained, x): + pretrained.model.forward_features(x) + + layer_1 = pretrained.activations["1"] + layer_2 = pretrained.activations["2"] + layer_3 = pretrained.activations["3"] + + layer_1 = pretrained.act_postprocess1(layer_1) + layer_2 = pretrained.act_postprocess2(layer_2) + layer_3 = pretrained.act_postprocess3(layer_3) + + return layer_1, layer_2, layer_3 + + +def _make_levit_backbone( + model, + hooks=[3, 11, 21], + patch_grid=[14, 14] +): + pretrained = nn.Module() + + pretrained.model = model + pretrained.model.blocks[hooks[0]].register_forward_hook(get_activation("1")) + pretrained.model.blocks[hooks[1]].register_forward_hook(get_activation("2")) + pretrained.model.blocks[hooks[2]].register_forward_hook(get_activation("3")) + + pretrained.activations = activations + + patch_grid_size = np.array(patch_grid, dtype=int) + + pretrained.act_postprocess1 = nn.Sequential( + Transpose(1, 2), + nn.Unflatten(2, torch.Size(patch_grid_size.tolist())) + ) + pretrained.act_postprocess2 = nn.Sequential( + Transpose(1, 2), + nn.Unflatten(2, torch.Size((np.ceil(patch_grid_size / 2).astype(int)).tolist())) + ) + pretrained.act_postprocess3 = nn.Sequential( + Transpose(1, 2), + nn.Unflatten(2, torch.Size((np.ceil(patch_grid_size / 4).astype(int)).tolist())) + ) + + return pretrained + + +class ConvTransposeNorm(nn.Sequential): + """ + Modification of + https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/levit.py: ConvNorm + such that ConvTranspose2d is used instead of Conv2d. + """ + + def __init__( + self, in_chs, out_chs, kernel_size=1, stride=1, pad=0, dilation=1, + groups=1, bn_weight_init=1): + super().__init__() + self.add_module('c', + nn.ConvTranspose2d(in_chs, out_chs, kernel_size, stride, pad, dilation, groups, bias=False)) + self.add_module('bn', nn.BatchNorm2d(out_chs)) + + nn.init.constant_(self.bn.weight, bn_weight_init) + + @torch.no_grad() + def fuse(self): + c, bn = self._modules.values() + w = bn.weight / (bn.running_var + bn.eps) ** 0.5 + w = c.weight * w[:, None, None, None] + b = bn.bias - bn.running_mean * bn.weight / (bn.running_var + bn.eps) ** 0.5 + m = nn.ConvTranspose2d( + w.size(1), w.size(0), w.shape[2:], stride=self.c.stride, + padding=self.c.padding, dilation=self.c.dilation, groups=self.c.groups) + m.weight.data.copy_(w) + m.bias.data.copy_(b) + return m + + +def stem_b4_transpose(in_chs, out_chs, activation): + """ + Modification of + https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/levit.py: stem_b16 + such that ConvTranspose2d is used instead of Conv2d and stem is also reduced to the half. + """ + return nn.Sequential( + ConvTransposeNorm(in_chs, out_chs, 3, 2, 1), + activation(), + ConvTransposeNorm(out_chs, out_chs // 2, 3, 2, 1), + activation()) + + +def _make_pretrained_levit_384(pretrained, hooks=None): + model = timm.create_model("levit_384", pretrained=pretrained) + + hooks = [3, 11, 21] if hooks == None else hooks + return _make_levit_backbone( + model, + hooks=hooks + ) diff --git a/dmidas/backbones/next_vit.py b/dmidas/backbones/next_vit.py new file mode 100644 index 0000000000000000000000000000000000000000..add68bd82d5799abadcaa1cd731110b6ac54c623 --- /dev/null +++ b/dmidas/backbones/next_vit.py @@ -0,0 +1,483 @@ +import timm + +import torch.nn as nn + +from pathlib import Path +from .utils import activations, forward_default, get_activation + +# thygate: just dropped the file in place here together with the single function import merge_pre_bn from Next_ViT repo which is no longer required : + +#file = open( Path.joinpath(Path.cwd(), "/extensions/stable-diffusion-webui-depthmap-script/midas/externals/Next_ViT/classification/nextvit.py"), "r") +#source_code = file.read().replace(" utils", " externals.Next_ViT.classification.utils") +#exec(source_code) + +#start of file : Next_ViT/classification/nextvit.py : + +# Copyright (c) ByteDance Inc. All rights reserved. +from functools import partial + +import torch +import torch.utils.checkpoint as checkpoint +from einops import rearrange +from timm.models.layers import DropPath, trunc_normal_ +from timm.models.registry import register_model +from torch import nn + +# function from Next_ViT/classification/utils.py : merge_pre_bn +# copied here to get rid of Next_ViT repo dependancy +def merge_pre_bn(module, pre_bn_1, pre_bn_2=None): + """ Merge pre BN to reduce inference runtime. + """ + weight = module.weight.data + if module.bias is None: + zeros = torch.zeros(module.out_channels, device=weight.device).type(weight.type()) + module.bias = nn.Parameter(zeros) + bias = module.bias.data + if pre_bn_2 is None: + assert pre_bn_1.track_running_stats is True, "Unsupport bn_module.track_running_stats is False" + assert pre_bn_1.affine is True, "Unsupport bn_module.affine is False" + + scale_invstd = pre_bn_1.running_var.add(pre_bn_1.eps).pow(-0.5) + extra_weight = scale_invstd * pre_bn_1.weight + extra_bias = pre_bn_1.bias - pre_bn_1.weight * pre_bn_1.running_mean * scale_invstd + else: + assert pre_bn_1.track_running_stats is True, "Unsupport bn_module.track_running_stats is False" + assert pre_bn_1.affine is True, "Unsupport bn_module.affine is False" + + assert pre_bn_2.track_running_stats is True, "Unsupport bn_module.track_running_stats is False" + assert pre_bn_2.affine is True, "Unsupport bn_module.affine is False" + + scale_invstd_1 = pre_bn_1.running_var.add(pre_bn_1.eps).pow(-0.5) + scale_invstd_2 = pre_bn_2.running_var.add(pre_bn_2.eps).pow(-0.5) + + extra_weight = scale_invstd_1 * pre_bn_1.weight * scale_invstd_2 * pre_bn_2.weight + extra_bias = scale_invstd_2 * pre_bn_2.weight *(pre_bn_1.bias - pre_bn_1.weight * pre_bn_1.running_mean * scale_invstd_1 - pre_bn_2.running_mean) + pre_bn_2.bias + + if isinstance(module, nn.Linear): + extra_bias = weight @ extra_bias + weight.mul_(extra_weight.view(1, weight.size(1)).expand_as(weight)) + elif isinstance(module, nn.Conv2d): + assert weight.shape[2] == 1 and weight.shape[3] == 1 + weight = weight.reshape(weight.shape[0], weight.shape[1]) + extra_bias = weight @ extra_bias + weight.mul_(extra_weight.view(1, weight.size(1)).expand_as(weight)) + weight = weight.reshape(weight.shape[0], weight.shape[1], 1, 1) + bias.add_(extra_bias) + + module.weight.data = weight + module.bias.data = bias + + + +NORM_EPS = 1e-5 + + +class ConvBNReLU(nn.Module): + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride, + groups=1): + super(ConvBNReLU, self).__init__() + self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, + padding=1, groups=groups, bias=False) + self.norm = nn.BatchNorm2d(out_channels, eps=NORM_EPS) + self.act = nn.ReLU(inplace=True) + + def forward(self, x): + x = self.conv(x) + x = self.norm(x) + x = self.act(x) + return x + + +def _make_divisible(v, divisor, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +class PatchEmbed(nn.Module): + def __init__(self, + in_channels, + out_channels, + stride=1): + super(PatchEmbed, self).__init__() + norm_layer = partial(nn.BatchNorm2d, eps=NORM_EPS) + if stride == 2: + self.avgpool = nn.AvgPool2d((2, 2), stride=2, ceil_mode=True, count_include_pad=False) + self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, bias=False) + self.norm = norm_layer(out_channels) + elif in_channels != out_channels: + self.avgpool = nn.Identity() + self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, bias=False) + self.norm = norm_layer(out_channels) + else: + self.avgpool = nn.Identity() + self.conv = nn.Identity() + self.norm = nn.Identity() + + def forward(self, x): + return self.norm(self.conv(self.avgpool(x))) + + +class MHCA(nn.Module): + """ + Multi-Head Convolutional Attention + """ + def __init__(self, out_channels, head_dim): + super(MHCA, self).__init__() + norm_layer = partial(nn.BatchNorm2d, eps=NORM_EPS) + self.group_conv3x3 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, + padding=1, groups=out_channels // head_dim, bias=False) + self.norm = norm_layer(out_channels) + self.act = nn.ReLU(inplace=True) + self.projection = nn.Conv2d(out_channels, out_channels, kernel_size=1, bias=False) + + def forward(self, x): + out = self.group_conv3x3(x) + out = self.norm(out) + out = self.act(out) + out = self.projection(out) + return out + + +class Mlp(nn.Module): + def __init__(self, in_features, out_features=None, mlp_ratio=None, drop=0., bias=True): + super().__init__() + out_features = out_features or in_features + hidden_dim = _make_divisible(in_features * mlp_ratio, 32) + self.conv1 = nn.Conv2d(in_features, hidden_dim, kernel_size=1, bias=bias) + self.act = nn.ReLU(inplace=True) + self.conv2 = nn.Conv2d(hidden_dim, out_features, kernel_size=1, bias=bias) + self.drop = nn.Dropout(drop) + + def merge_bn(self, pre_norm): + merge_pre_bn(self.conv1, pre_norm) + + def forward(self, x): + x = self.conv1(x) + x = self.act(x) + x = self.drop(x) + x = self.conv2(x) + x = self.drop(x) + return x + + +class NCB(nn.Module): + """ + Next Convolution Block + """ + def __init__(self, in_channels, out_channels, stride=1, path_dropout=0, + drop=0, head_dim=32, mlp_ratio=3): + super(NCB, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + norm_layer = partial(nn.BatchNorm2d, eps=NORM_EPS) + assert out_channels % head_dim == 0 + + self.patch_embed = PatchEmbed(in_channels, out_channels, stride) + self.mhca = MHCA(out_channels, head_dim) + self.attention_path_dropout = DropPath(path_dropout) + + self.norm = norm_layer(out_channels) + self.mlp = Mlp(out_channels, mlp_ratio=mlp_ratio, drop=drop, bias=True) + self.mlp_path_dropout = DropPath(path_dropout) + self.is_bn_merged = False + + def merge_bn(self): + if not self.is_bn_merged: + self.mlp.merge_bn(self.norm) + self.is_bn_merged = True + + def forward(self, x): + x = self.patch_embed(x) + x = x + self.attention_path_dropout(self.mhca(x)) + if not torch.onnx.is_in_onnx_export() and not self.is_bn_merged: + out = self.norm(x) + else: + out = x + x = x + self.mlp_path_dropout(self.mlp(out)) + return x + + +class E_MHSA(nn.Module): + """ + Efficient Multi-Head Self Attention + """ + def __init__(self, dim, out_dim=None, head_dim=32, qkv_bias=True, qk_scale=None, + attn_drop=0, proj_drop=0., sr_ratio=1): + super().__init__() + self.dim = dim + self.out_dim = out_dim if out_dim is not None else dim + self.num_heads = self.dim // head_dim + self.scale = qk_scale or head_dim ** -0.5 + self.q = nn.Linear(dim, self.dim, bias=qkv_bias) + self.k = nn.Linear(dim, self.dim, bias=qkv_bias) + self.v = nn.Linear(dim, self.dim, bias=qkv_bias) + self.proj = nn.Linear(self.dim, self.out_dim) + self.attn_drop = nn.Dropout(attn_drop) + self.proj_drop = nn.Dropout(proj_drop) + + self.sr_ratio = sr_ratio + self.N_ratio = sr_ratio ** 2 + if sr_ratio > 1: + self.sr = nn.AvgPool1d(kernel_size=self.N_ratio, stride=self.N_ratio) + self.norm = nn.BatchNorm1d(dim, eps=NORM_EPS) + self.is_bn_merged = False + + def merge_bn(self, pre_bn): + merge_pre_bn(self.q, pre_bn) + if self.sr_ratio > 1: + merge_pre_bn(self.k, pre_bn, self.norm) + merge_pre_bn(self.v, pre_bn, self.norm) + else: + merge_pre_bn(self.k, pre_bn) + merge_pre_bn(self.v, pre_bn) + self.is_bn_merged = True + + def forward(self, x): + B, N, C = x.shape + q = self.q(x) + q = q.reshape(B, N, self.num_heads, int(C // self.num_heads)).permute(0, 2, 1, 3) + + if self.sr_ratio > 1: + x_ = x.transpose(1, 2) + x_ = self.sr(x_) + if not torch.onnx.is_in_onnx_export() and not self.is_bn_merged: + x_ = self.norm(x_) + x_ = x_.transpose(1, 2) + k = self.k(x_) + k = k.reshape(B, -1, self.num_heads, int(C // self.num_heads)).permute(0, 2, 3, 1) + v = self.v(x_) + v = v.reshape(B, -1, self.num_heads, int(C // self.num_heads)).permute(0, 2, 1, 3) + else: + k = self.k(x) + k = k.reshape(B, -1, self.num_heads, int(C // self.num_heads)).permute(0, 2, 3, 1) + v = self.v(x) + v = v.reshape(B, -1, self.num_heads, int(C // self.num_heads)).permute(0, 2, 1, 3) + attn = (q @ k) * self.scale + + attn = attn.softmax(dim=-1) + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(B, N, C) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class NTB(nn.Module): + """ + Next Transformer Block + """ + def __init__( + self, in_channels, out_channels, path_dropout, stride=1, sr_ratio=1, + mlp_ratio=2, head_dim=32, mix_block_ratio=0.75, attn_drop=0, drop=0, + ): + super(NTB, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.mix_block_ratio = mix_block_ratio + norm_func = partial(nn.BatchNorm2d, eps=NORM_EPS) + + self.mhsa_out_channels = _make_divisible(int(out_channels * mix_block_ratio), 32) + self.mhca_out_channels = out_channels - self.mhsa_out_channels + + self.patch_embed = PatchEmbed(in_channels, self.mhsa_out_channels, stride) + self.norm1 = norm_func(self.mhsa_out_channels) + self.e_mhsa = E_MHSA(self.mhsa_out_channels, head_dim=head_dim, sr_ratio=sr_ratio, + attn_drop=attn_drop, proj_drop=drop) + self.mhsa_path_dropout = DropPath(path_dropout * mix_block_ratio) + + self.projection = PatchEmbed(self.mhsa_out_channels, self.mhca_out_channels, stride=1) + self.mhca = MHCA(self.mhca_out_channels, head_dim=head_dim) + self.mhca_path_dropout = DropPath(path_dropout * (1 - mix_block_ratio)) + + self.norm2 = norm_func(out_channels) + self.mlp = Mlp(out_channels, mlp_ratio=mlp_ratio, drop=drop) + self.mlp_path_dropout = DropPath(path_dropout) + + self.is_bn_merged = False + + def merge_bn(self): + if not self.is_bn_merged: + self.e_mhsa.merge_bn(self.norm1) + self.mlp.merge_bn(self.norm2) + self.is_bn_merged = True + + def forward(self, x): + x = self.patch_embed(x) + B, C, H, W = x.shape + if not torch.onnx.is_in_onnx_export() and not self.is_bn_merged: + out = self.norm1(x) + else: + out = x + out = rearrange(out, "b c h w -> b (h w) c") # b n c + out = self.mhsa_path_dropout(self.e_mhsa(out)) + x = x + rearrange(out, "b (h w) c -> b c h w", h=H) + + out = self.projection(x) + out = out + self.mhca_path_dropout(self.mhca(out)) + x = torch.cat([x, out], dim=1) + + if not torch.onnx.is_in_onnx_export() and not self.is_bn_merged: + out = self.norm2(x) + else: + out = x + x = x + self.mlp_path_dropout(self.mlp(out)) + return x + + +class NextViT(nn.Module): + def __init__(self, stem_chs, depths, path_dropout, attn_drop=0, drop=0, num_classes=1000, + strides=[1, 2, 2, 2], sr_ratios=[8, 4, 2, 1], head_dim=32, mix_block_ratio=0.75, + use_checkpoint=False): + super(NextViT, self).__init__() + self.use_checkpoint = use_checkpoint + + self.stage_out_channels = [[96] * (depths[0]), + [192] * (depths[1] - 1) + [256], + [384, 384, 384, 384, 512] * (depths[2] // 5), + [768] * (depths[3] - 1) + [1024]] + + # Next Hybrid Strategy + self.stage_block_types = [[NCB] * depths[0], + [NCB] * (depths[1] - 1) + [NTB], + [NCB, NCB, NCB, NCB, NTB] * (depths[2] // 5), + [NCB] * (depths[3] - 1) + [NTB]] + + self.stem = nn.Sequential( + ConvBNReLU(3, stem_chs[0], kernel_size=3, stride=2), + ConvBNReLU(stem_chs[0], stem_chs[1], kernel_size=3, stride=1), + ConvBNReLU(stem_chs[1], stem_chs[2], kernel_size=3, stride=1), + ConvBNReLU(stem_chs[2], stem_chs[2], kernel_size=3, stride=2), + ) + input_channel = stem_chs[-1] + features = [] + idx = 0 + dpr = [x.item() for x in torch.linspace(0, path_dropout, sum(depths))] # stochastic depth decay rule + for stage_id in range(len(depths)): + numrepeat = depths[stage_id] + output_channels = self.stage_out_channels[stage_id] + block_types = self.stage_block_types[stage_id] + for block_id in range(numrepeat): + if strides[stage_id] == 2 and block_id == 0: + stride = 2 + else: + stride = 1 + output_channel = output_channels[block_id] + block_type = block_types[block_id] + if block_type is NCB: + layer = NCB(input_channel, output_channel, stride=stride, path_dropout=dpr[idx + block_id], + drop=drop, head_dim=head_dim) + features.append(layer) + elif block_type is NTB: + layer = NTB(input_channel, output_channel, path_dropout=dpr[idx + block_id], stride=stride, + sr_ratio=sr_ratios[stage_id], head_dim=head_dim, mix_block_ratio=mix_block_ratio, + attn_drop=attn_drop, drop=drop) + features.append(layer) + input_channel = output_channel + idx += numrepeat + self.features = nn.Sequential(*features) + + self.norm = nn.BatchNorm2d(output_channel, eps=NORM_EPS) + + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + self.proj_head = nn.Sequential( + nn.Linear(output_channel, num_classes), + ) + + self.stage_out_idx = [sum(depths[:idx + 1]) - 1 for idx in range(len(depths))] + print('initialize_weights...') + self._initialize_weights() + + def merge_bn(self): + self.eval() + for idx, module in self.named_modules(): + if isinstance(module, NCB) or isinstance(module, NTB): + module.merge_bn() + + def _initialize_weights(self): + for n, m in self.named_modules(): + if isinstance(m, (nn.BatchNorm2d, nn.GroupNorm, nn.LayerNorm, nn.BatchNorm1d)): + nn.init.constant_(m.weight, 1.0) + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=.02) + if hasattr(m, 'bias') and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.Conv2d): + trunc_normal_(m.weight, std=.02) + if hasattr(m, 'bias') and m.bias is not None: + nn.init.constant_(m.bias, 0) + + def forward(self, x): + x = self.stem(x) + for idx, layer in enumerate(self.features): + if self.use_checkpoint: + x = checkpoint.checkpoint(layer, x) + else: + x = layer(x) + x = self.norm(x) + x = self.avgpool(x) + x = torch.flatten(x, 1) + x = self.proj_head(x) + return x + + +@register_model +def nextvit_small(pretrained=False, pretrained_cfg=None, **kwargs): + model = NextViT(stem_chs=[64, 32, 64], depths=[3, 4, 10, 3], path_dropout=0.1, **kwargs) + return model + + +@register_model +def nextvit_base(pretrained=False, pretrained_cfg=None, **kwargs): + model = NextViT(stem_chs=[64, 32, 64], depths=[3, 4, 20, 3], path_dropout=0.2, **kwargs) + return model + + +@register_model +def nextvit_large(pretrained=False, pretrained_cfg=None, **kwargs): + model = NextViT(stem_chs=[64, 32, 64], depths=[3, 4, 30, 3], path_dropout=0.2, **kwargs) + return model + +# end of Next_ViT/classification/nextvit.py + + +def forward_next_vit(pretrained, x): + return forward_default(pretrained, x, "forward") + + +def _make_next_vit_backbone( + model, + hooks=[2, 6, 36, 39], +): + pretrained = nn.Module() + + pretrained.model = model + pretrained.model.features[hooks[0]].register_forward_hook(get_activation("1")) + pretrained.model.features[hooks[1]].register_forward_hook(get_activation("2")) + pretrained.model.features[hooks[2]].register_forward_hook(get_activation("3")) + pretrained.model.features[hooks[3]].register_forward_hook(get_activation("4")) + + pretrained.activations = activations + + return pretrained + + +def _make_pretrained_next_vit_large_6m(hooks=None): + model = timm.create_model("nextvit_large") + + hooks = [2, 6, 36, 39] if hooks == None else hooks + return _make_next_vit_backbone( + model, + hooks=hooks, + ) diff --git a/dmidas/backbones/swin.py b/dmidas/backbones/swin.py new file mode 100644 index 0000000000000000000000000000000000000000..7477c70cb0109c875ce58ba9b2dbd42e12b2cbe1 --- /dev/null +++ b/dmidas/backbones/swin.py @@ -0,0 +1,13 @@ +import timm + +from .swin_common import _make_swin_backbone + + +def _make_pretrained_swinl12_384(pretrained, hooks=None): + model = timm.create_model("swin_large_patch4_window12_384", pretrained=pretrained) + + hooks = [1, 1, 17, 1] if hooks == None else hooks + return _make_swin_backbone( + model, + hooks=hooks + ) diff --git a/dmidas/backbones/swin2.py b/dmidas/backbones/swin2.py new file mode 100644 index 0000000000000000000000000000000000000000..9b0ab3ccb244fc2d71cb01b96a2d6f0544a041ca --- /dev/null +++ b/dmidas/backbones/swin2.py @@ -0,0 +1,34 @@ +import timm + +from .swin_common import _make_swin_backbone + + +def _make_pretrained_swin2l24_384(pretrained, hooks=None): + model = timm.create_model("swinv2_large_window12to24_192to384_22kft1k", pretrained=pretrained) + + hooks = [1, 1, 17, 1] if hooks == None else hooks + return _make_swin_backbone( + model, + hooks=hooks + ) + + +def _make_pretrained_swin2b24_384(pretrained, hooks=None): + model = timm.create_model("swinv2_base_window12to24_192to384_22kft1k", pretrained=pretrained) + + hooks = [1, 1, 17, 1] if hooks == None else hooks + return _make_swin_backbone( + model, + hooks=hooks + ) + + +def _make_pretrained_swin2t16_256(pretrained, hooks=None): + model = timm.create_model("swinv2_tiny_window16_256", pretrained=pretrained) + + hooks = [1, 1, 5, 1] if hooks == None else hooks + return _make_swin_backbone( + model, + hooks=hooks, + patch_grid=[64, 64] + ) diff --git a/dmidas/backbones/swin_common.py b/dmidas/backbones/swin_common.py new file mode 100644 index 0000000000000000000000000000000000000000..e61c3633fafcd496505ce209f8152f0aeee0572c --- /dev/null +++ b/dmidas/backbones/swin_common.py @@ -0,0 +1,52 @@ +import torch + +import torch.nn as nn +import numpy as np + +from .utils import activations, forward_default, get_activation, Transpose + + +def forward_swin(pretrained, x): + return forward_default(pretrained, x) + + +def _make_swin_backbone( + model, + hooks=[1, 1, 17, 1], + patch_grid=[96, 96] +): + pretrained = nn.Module() + + pretrained.model = model + pretrained.model.layers[0].blocks[hooks[0]].register_forward_hook(get_activation("1")) + pretrained.model.layers[1].blocks[hooks[1]].register_forward_hook(get_activation("2")) + pretrained.model.layers[2].blocks[hooks[2]].register_forward_hook(get_activation("3")) + pretrained.model.layers[3].blocks[hooks[3]].register_forward_hook(get_activation("4")) + + pretrained.activations = activations + + if hasattr(model, "patch_grid"): + used_patch_grid = model.patch_grid + else: + used_patch_grid = patch_grid + + patch_grid_size = np.array(used_patch_grid, dtype=int) + + pretrained.act_postprocess1 = nn.Sequential( + Transpose(1, 2), + nn.Unflatten(2, torch.Size(patch_grid_size.tolist())) + ) + pretrained.act_postprocess2 = nn.Sequential( + Transpose(1, 2), + nn.Unflatten(2, torch.Size((patch_grid_size // 2).tolist())) + ) + pretrained.act_postprocess3 = nn.Sequential( + Transpose(1, 2), + nn.Unflatten(2, torch.Size((patch_grid_size // 4).tolist())) + ) + pretrained.act_postprocess4 = nn.Sequential( + Transpose(1, 2), + nn.Unflatten(2, torch.Size((patch_grid_size // 8).tolist())) + ) + + return pretrained diff --git a/dmidas/backbones/utils.py b/dmidas/backbones/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..b334c3c55d5ee1da23d481de8b7555c5e9093754 --- /dev/null +++ b/dmidas/backbones/utils.py @@ -0,0 +1,249 @@ +import torch + +import torch.nn as nn + + +class Slice(nn.Module): + def __init__(self, start_index=1): + super(Slice, self).__init__() + self.start_index = start_index + + def forward(self, x): + return x[:, self.start_index:] + + +class AddReadout(nn.Module): + def __init__(self, start_index=1): + super(AddReadout, self).__init__() + self.start_index = start_index + + def forward(self, x): + if self.start_index == 2: + readout = (x[:, 0] + x[:, 1]) / 2 + else: + readout = x[:, 0] + return x[:, self.start_index:] + readout.unsqueeze(1) + + +class ProjectReadout(nn.Module): + def __init__(self, in_features, start_index=1): + super(ProjectReadout, self).__init__() + self.start_index = start_index + + self.project = nn.Sequential(nn.Linear(2 * in_features, in_features), nn.GELU()) + + def forward(self, x): + readout = x[:, 0].unsqueeze(1).expand_as(x[:, self.start_index:]) + features = torch.cat((x[:, self.start_index:], readout), -1) + + return self.project(features) + + +class Transpose(nn.Module): + def __init__(self, dim0, dim1): + super(Transpose, self).__init__() + self.dim0 = dim0 + self.dim1 = dim1 + + def forward(self, x): + x = x.transpose(self.dim0, self.dim1) + return x + + +activations = {} + + +def get_activation(name): + def hook(model, input, output): + activations[name] = output + + return hook + + +def forward_default(pretrained, x, function_name="forward_features"): + exec(f"pretrained.model.{function_name}(x)") + + layer_1 = pretrained.activations["1"] + layer_2 = pretrained.activations["2"] + layer_3 = pretrained.activations["3"] + layer_4 = pretrained.activations["4"] + + if hasattr(pretrained, "act_postprocess1"): + layer_1 = pretrained.act_postprocess1(layer_1) + if hasattr(pretrained, "act_postprocess2"): + layer_2 = pretrained.act_postprocess2(layer_2) + if hasattr(pretrained, "act_postprocess3"): + layer_3 = pretrained.act_postprocess3(layer_3) + if hasattr(pretrained, "act_postprocess4"): + layer_4 = pretrained.act_postprocess4(layer_4) + + return layer_1, layer_2, layer_3, layer_4 + + +def forward_adapted_unflatten(pretrained, x, function_name="forward_features"): + b, c, h, w = x.shape + + exec(f"glob = pretrained.model.{function_name}(x)") + + layer_1 = pretrained.activations["1"] + layer_2 = pretrained.activations["2"] + layer_3 = pretrained.activations["3"] + layer_4 = pretrained.activations["4"] + + layer_1 = pretrained.act_postprocess1[0:2](layer_1) + layer_2 = pretrained.act_postprocess2[0:2](layer_2) + layer_3 = pretrained.act_postprocess3[0:2](layer_3) + layer_4 = pretrained.act_postprocess4[0:2](layer_4) + + unflatten = nn.Sequential( + nn.Unflatten( + 2, + torch.Size( + [ + h // pretrained.model.patch_size[1], + w // pretrained.model.patch_size[0], + ] + ), + ) + ) + + if layer_1.ndim == 3: + layer_1 = unflatten(layer_1) + if layer_2.ndim == 3: + layer_2 = unflatten(layer_2) + if layer_3.ndim == 3: + layer_3 = unflatten(layer_3) + if layer_4.ndim == 3: + layer_4 = unflatten(layer_4) + + layer_1 = pretrained.act_postprocess1[3: len(pretrained.act_postprocess1)](layer_1) + layer_2 = pretrained.act_postprocess2[3: len(pretrained.act_postprocess2)](layer_2) + layer_3 = pretrained.act_postprocess3[3: len(pretrained.act_postprocess3)](layer_3) + layer_4 = pretrained.act_postprocess4[3: len(pretrained.act_postprocess4)](layer_4) + + return layer_1, layer_2, layer_3, layer_4 + + +def get_readout_oper(vit_features, features, use_readout, start_index=1): + if use_readout == "ignore": + readout_oper = [Slice(start_index)] * len(features) + elif use_readout == "add": + readout_oper = [AddReadout(start_index)] * len(features) + elif use_readout == "project": + readout_oper = [ + ProjectReadout(vit_features, start_index) for out_feat in features + ] + else: + assert ( + False + ), "wrong operation for readout token, use_readout can be 'ignore', 'add', or 'project'" + + return readout_oper + + +def make_backbone_default( + model, + features=[96, 192, 384, 768], + size=[384, 384], + hooks=[2, 5, 8, 11], + vit_features=768, + use_readout="ignore", + start_index=1, + start_index_readout=1, +): + pretrained = nn.Module() + + pretrained.model = model + pretrained.model.blocks[hooks[0]].register_forward_hook(get_activation("1")) + pretrained.model.blocks[hooks[1]].register_forward_hook(get_activation("2")) + pretrained.model.blocks[hooks[2]].register_forward_hook(get_activation("3")) + pretrained.model.blocks[hooks[3]].register_forward_hook(get_activation("4")) + + pretrained.activations = activations + + readout_oper = get_readout_oper(vit_features, features, use_readout, start_index_readout) + + # 32, 48, 136, 384 + pretrained.act_postprocess1 = nn.Sequential( + readout_oper[0], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[0], + kernel_size=1, + stride=1, + padding=0, + ), + nn.ConvTranspose2d( + in_channels=features[0], + out_channels=features[0], + kernel_size=4, + stride=4, + padding=0, + bias=True, + dilation=1, + groups=1, + ), + ) + + pretrained.act_postprocess2 = nn.Sequential( + readout_oper[1], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[1], + kernel_size=1, + stride=1, + padding=0, + ), + nn.ConvTranspose2d( + in_channels=features[1], + out_channels=features[1], + kernel_size=2, + stride=2, + padding=0, + bias=True, + dilation=1, + groups=1, + ), + ) + + pretrained.act_postprocess3 = nn.Sequential( + readout_oper[2], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[2], + kernel_size=1, + stride=1, + padding=0, + ), + ) + + pretrained.act_postprocess4 = nn.Sequential( + readout_oper[3], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[3], + kernel_size=1, + stride=1, + padding=0, + ), + nn.Conv2d( + in_channels=features[3], + out_channels=features[3], + kernel_size=3, + stride=2, + padding=1, + ), + ) + + pretrained.model.start_index = start_index + pretrained.model.patch_size = [16, 16] + + return pretrained diff --git a/dmidas/backbones/vit.py b/dmidas/backbones/vit.py new file mode 100644 index 0000000000000000000000000000000000000000..27e103c6e383da4fa6ea10e2ec5e19b486be0193 --- /dev/null +++ b/dmidas/backbones/vit.py @@ -0,0 +1,221 @@ +import torch +import torch.nn as nn +import timm +import types +import math +import torch.nn.functional as F + +from .utils import (activations, forward_adapted_unflatten, get_activation, get_readout_oper, + make_backbone_default, Transpose) + + +def forward_vit(pretrained, x): + return forward_adapted_unflatten(pretrained, x, "forward_flex") + + +def _resize_pos_embed(self, posemb, gs_h, gs_w): + posemb_tok, posemb_grid = ( + posemb[:, : self.start_index], + posemb[0, self.start_index:], + ) + + gs_old = int(math.sqrt(len(posemb_grid))) + + posemb_grid = posemb_grid.reshape(1, gs_old, gs_old, -1).permute(0, 3, 1, 2) + posemb_grid = F.interpolate(posemb_grid, size=(gs_h, gs_w), mode="bilinear") + posemb_grid = posemb_grid.permute(0, 2, 3, 1).reshape(1, gs_h * gs_w, -1) + + posemb = torch.cat([posemb_tok, posemb_grid], dim=1) + + return posemb + + +def forward_flex(self, x): + b, c, h, w = x.shape + + pos_embed = self._resize_pos_embed( + self.pos_embed, h // self.patch_size[1], w // self.patch_size[0] + ) + + B = x.shape[0] + + if hasattr(self.patch_embed, "backbone"): + x = self.patch_embed.backbone(x) + if isinstance(x, (list, tuple)): + x = x[-1] # last feature if backbone outputs list/tuple of features + + x = self.patch_embed.proj(x).flatten(2).transpose(1, 2) + + if getattr(self, "dist_token", None) is not None: + cls_tokens = self.cls_token.expand( + B, -1, -1 + ) # stole cls_tokens impl from Phil Wang, thanks + dist_token = self.dist_token.expand(B, -1, -1) + x = torch.cat((cls_tokens, dist_token, x), dim=1) + else: + if self.no_embed_class: + x = x + pos_embed + cls_tokens = self.cls_token.expand( + B, -1, -1 + ) # stole cls_tokens impl from Phil Wang, thanks + x = torch.cat((cls_tokens, x), dim=1) + + if not self.no_embed_class: + x = x + pos_embed + x = self.pos_drop(x) + + for blk in self.blocks: + x = blk(x) + + x = self.norm(x) + + return x + + +def _make_vit_b16_backbone( + model, + features=[96, 192, 384, 768], + size=[384, 384], + hooks=[2, 5, 8, 11], + vit_features=768, + use_readout="ignore", + start_index=1, + start_index_readout=1, +): + pretrained = make_backbone_default(model, features, size, hooks, vit_features, use_readout, start_index, + start_index_readout) + + # We inject this function into the VisionTransformer instances so that + # we can use it with interpolated position embeddings without modifying the library source. + pretrained.model.forward_flex = types.MethodType(forward_flex, pretrained.model) + pretrained.model._resize_pos_embed = types.MethodType( + _resize_pos_embed, pretrained.model + ) + + return pretrained + + +def _make_pretrained_vitl16_384(pretrained, use_readout="ignore", hooks=None): + model = timm.create_model("vit_large_patch16_384", pretrained=pretrained) + + hooks = [5, 11, 17, 23] if hooks == None else hooks + return _make_vit_b16_backbone( + model, + features=[256, 512, 1024, 1024], + hooks=hooks, + vit_features=1024, + use_readout=use_readout, + ) + + +def _make_pretrained_vitb16_384(pretrained, use_readout="ignore", hooks=None): + model = timm.create_model("vit_base_patch16_384", pretrained=pretrained) + + hooks = [2, 5, 8, 11] if hooks == None else hooks + return _make_vit_b16_backbone( + model, features=[96, 192, 384, 768], hooks=hooks, use_readout=use_readout + ) + + +def _make_vit_b_rn50_backbone( + model, + features=[256, 512, 768, 768], + size=[384, 384], + hooks=[0, 1, 8, 11], + vit_features=768, + patch_size=[16, 16], + number_stages=2, + use_vit_only=False, + use_readout="ignore", + start_index=1, +): + pretrained = nn.Module() + + pretrained.model = model + + used_number_stages = 0 if use_vit_only else number_stages + for s in range(used_number_stages): + pretrained.model.patch_embed.backbone.stages[s].register_forward_hook( + get_activation(str(s + 1)) + ) + for s in range(used_number_stages, 4): + pretrained.model.blocks[hooks[s]].register_forward_hook(get_activation(str(s + 1))) + + pretrained.activations = activations + + readout_oper = get_readout_oper(vit_features, features, use_readout, start_index) + + for s in range(used_number_stages): + value = nn.Sequential(nn.Identity(), nn.Identity(), nn.Identity()) + exec(f"pretrained.act_postprocess{s + 1}=value") + for s in range(used_number_stages, 4): + if s < number_stages: + final_layer = nn.ConvTranspose2d( + in_channels=features[s], + out_channels=features[s], + kernel_size=4 // (2 ** s), + stride=4 // (2 ** s), + padding=0, + bias=True, + dilation=1, + groups=1, + ) + elif s > number_stages: + final_layer = nn.Conv2d( + in_channels=features[3], + out_channels=features[3], + kernel_size=3, + stride=2, + padding=1, + ) + else: + final_layer = None + + layers = [ + readout_oper[s], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[s], + kernel_size=1, + stride=1, + padding=0, + ), + ] + if final_layer is not None: + layers.append(final_layer) + + value = nn.Sequential(*layers) + exec(f"pretrained.act_postprocess{s + 1}=value") + + pretrained.model.start_index = start_index + pretrained.model.patch_size = patch_size + + # We inject this function into the VisionTransformer instances so that + # we can use it with interpolated position embeddings without modifying the library source. + pretrained.model.forward_flex = types.MethodType(forward_flex, pretrained.model) + + # We inject this function into the VisionTransformer instances so that + # we can use it with interpolated position embeddings without modifying the library source. + pretrained.model._resize_pos_embed = types.MethodType( + _resize_pos_embed, pretrained.model + ) + + return pretrained + + +def _make_pretrained_vitb_rn50_384( + pretrained, use_readout="ignore", hooks=None, use_vit_only=False +): + model = timm.create_model("vit_base_resnet50_384", pretrained=pretrained) + + hooks = [0, 1, 8, 11] if hooks == None else hooks + return _make_vit_b_rn50_backbone( + model, + features=[256, 512, 768, 768], + size=[384, 384], + hooks=hooks, + use_vit_only=use_vit_only, + use_readout=use_readout, + ) diff --git a/dmidas/base_model.py b/dmidas/base_model.py new file mode 100644 index 0000000000000000000000000000000000000000..27fe93ec5bfcfae3f8e78392ca617cdb6adc9b11 --- /dev/null +++ b/dmidas/base_model.py @@ -0,0 +1,16 @@ +import torch + + +class BaseModel(torch.nn.Module): + def load(self, path): + """Load model from file. + + Args: + path (str): file path + """ + parameters = torch.load(path, map_location=torch.device('cpu')) + + if "optimizer" in parameters: + parameters = parameters["model"] + + self.load_state_dict(parameters) diff --git a/dmidas/blocks.py b/dmidas/blocks.py new file mode 100644 index 0000000000000000000000000000000000000000..81af207479decb5c0f50dceb4fa49dc6b02136b0 --- /dev/null +++ b/dmidas/blocks.py @@ -0,0 +1,442 @@ +import torch +import torch.nn as nn + +from .backbones.beit import ( + _make_pretrained_beitl16_512, + _make_pretrained_beitl16_384, + _make_pretrained_beitb16_384, + forward_beit, +) +from .backbones.swin_common import ( + forward_swin, +) +from .backbones.swin2 import ( + _make_pretrained_swin2l24_384, + _make_pretrained_swin2b24_384, + _make_pretrained_swin2t16_256, +) +from .backbones.swin import ( + _make_pretrained_swinl12_384, +) +from .backbones.next_vit import ( + _make_pretrained_next_vit_large_6m, + forward_next_vit, +) +from .backbones.levit import ( + _make_pretrained_levit_384, + forward_levit, +) +from .backbones.vit import ( + _make_pretrained_vitb_rn50_384, + _make_pretrained_vitl16_384, + _make_pretrained_vitb16_384, + forward_vit, +) + +def _make_encoder(backbone, features, use_pretrained, groups=1, expand=False, exportable=True, hooks=None, + use_vit_only=False, use_readout="ignore", in_features=[96, 256, 512, 1024]): + if backbone == "beitl16_512": + pretrained = _make_pretrained_beitl16_512( + use_pretrained, hooks=hooks, use_readout=use_readout + ) + scratch = _make_scratch( + [256, 512, 1024, 1024], features, groups=groups, expand=expand + ) # BEiT_512-L (backbone) + elif backbone == "beitl16_384": + pretrained = _make_pretrained_beitl16_384( + use_pretrained, hooks=hooks, use_readout=use_readout + ) + scratch = _make_scratch( + [256, 512, 1024, 1024], features, groups=groups, expand=expand + ) # BEiT_384-L (backbone) + elif backbone == "beitb16_384": + pretrained = _make_pretrained_beitb16_384( + use_pretrained, hooks=hooks, use_readout=use_readout + ) + scratch = _make_scratch( + [96, 192, 384, 768], features, groups=groups, expand=expand + ) # BEiT_384-B (backbone) + elif backbone == "swin2l24_384": + pretrained = _make_pretrained_swin2l24_384( + use_pretrained, hooks=hooks + ) + scratch = _make_scratch( + [192, 384, 768, 1536], features, groups=groups, expand=expand + ) # Swin2-L/12to24 (backbone) + elif backbone == "swin2b24_384": + pretrained = _make_pretrained_swin2b24_384( + use_pretrained, hooks=hooks + ) + scratch = _make_scratch( + [128, 256, 512, 1024], features, groups=groups, expand=expand + ) # Swin2-B/12to24 (backbone) + elif backbone == "swin2t16_256": + pretrained = _make_pretrained_swin2t16_256( + use_pretrained, hooks=hooks + ) + scratch = _make_scratch( + [96, 192, 384, 768], features, groups=groups, expand=expand + ) # Swin2-T/16 (backbone) + elif backbone == "swinl12_384": + pretrained = _make_pretrained_swinl12_384( + use_pretrained, hooks=hooks + ) + scratch = _make_scratch( + [192, 384, 768, 1536], features, groups=groups, expand=expand + ) # Swin-L/12 (backbone) + elif backbone == "next_vit_large_6m": + pretrained = _make_pretrained_next_vit_large_6m(hooks=hooks) + scratch = _make_scratch( + in_features, features, groups=groups, expand=expand + ) # Next-ViT-L on ImageNet-1K-6M (backbone) + elif backbone == "levit_384": + pretrained = _make_pretrained_levit_384( + use_pretrained, hooks=hooks + ) + scratch = _make_scratch( + [384, 512, 768], features, groups=groups, expand=expand + ) # LeViT 384 (backbone) + elif backbone == "vitl16_384": + pretrained = _make_pretrained_vitl16_384( + use_pretrained, hooks=hooks, use_readout=use_readout + ) + scratch = _make_scratch( + [256, 512, 1024, 1024], features, groups=groups, expand=expand + ) # ViT-L/16 - 85.0% Top1 (backbone) + elif backbone == "vitb_rn50_384": + pretrained = _make_pretrained_vitb_rn50_384( + use_pretrained, + hooks=hooks, + use_vit_only=use_vit_only, + use_readout=use_readout, + ) + scratch = _make_scratch( + [256, 512, 768, 768], features, groups=groups, expand=expand + ) # ViT-H/16 - 85.0% Top1 (backbone) + elif backbone == "vitb16_384": + pretrained = _make_pretrained_vitb16_384( + use_pretrained, hooks=hooks, use_readout=use_readout + ) + scratch = _make_scratch( + [96, 192, 384, 768], features, groups=groups, expand=expand + ) # ViT-B/16 - 84.6% Top1 (backbone) + elif backbone == "resnext101_wsl": + pretrained = _make_pretrained_resnext101_wsl(use_pretrained) + scratch = _make_scratch([256, 512, 1024, 2048], features, groups=groups, expand=expand) # efficientnet_lite3 + elif backbone == "efficientnet_lite3": + pretrained = _make_pretrained_efficientnet_lite3(use_pretrained, exportable=exportable) + scratch = _make_scratch([32, 48, 136, 384], features, groups=groups, expand=expand) # efficientnet_lite3 + else: + print(f"Backbone '{backbone}' not implemented") + assert False + + return pretrained, scratch + + +def _make_scratch(in_shape, out_shape, groups=1, expand=False): + scratch = nn.Module() + + out_shape1 = out_shape + out_shape2 = out_shape + out_shape3 = out_shape + if len(in_shape) >= 4: + out_shape4 = out_shape + + if expand: + out_shape1 = out_shape + out_shape2 = out_shape*2 + out_shape3 = out_shape*4 + if len(in_shape) >= 4: + out_shape4 = out_shape*8 + + scratch.layer1_rn = nn.Conv2d( + in_shape[0], out_shape1, kernel_size=3, stride=1, padding=1, bias=False, groups=groups + ) + scratch.layer2_rn = nn.Conv2d( + in_shape[1], out_shape2, kernel_size=3, stride=1, padding=1, bias=False, groups=groups + ) + scratch.layer3_rn = nn.Conv2d( + in_shape[2], out_shape3, kernel_size=3, stride=1, padding=1, bias=False, groups=groups + ) + if len(in_shape) >= 4: + scratch.layer4_rn = nn.Conv2d( + in_shape[3], out_shape4, kernel_size=3, stride=1, padding=1, bias=False, groups=groups + ) + + return scratch + + +def _make_pretrained_efficientnet_lite3(use_pretrained, exportable=False): + efficientnet = torch.hub.load( + "rwightman/gen-efficientnet-pytorch", + "tf_efficientnet_lite3", + pretrained=use_pretrained, + exportable=exportable + ) + return _make_efficientnet_backbone(efficientnet) + + +def _make_efficientnet_backbone(effnet): + pretrained = nn.Module() + + pretrained.layer1 = nn.Sequential( + effnet.conv_stem, effnet.bn1, effnet.act1, *effnet.blocks[0:2] + ) + pretrained.layer2 = nn.Sequential(*effnet.blocks[2:3]) + pretrained.layer3 = nn.Sequential(*effnet.blocks[3:5]) + pretrained.layer4 = nn.Sequential(*effnet.blocks[5:9]) + + return pretrained + + +def _make_resnet_backbone(resnet): + pretrained = nn.Module() + pretrained.layer1 = nn.Sequential( + resnet.conv1, resnet.bn1, resnet.relu, resnet.maxpool, resnet.layer1 + ) + + pretrained.layer2 = resnet.layer2 + pretrained.layer3 = resnet.layer3 + pretrained.layer4 = resnet.layer4 + + return pretrained + + +def _make_pretrained_resnext101_wsl(use_pretrained): + resnet = torch.hub.load("facebookresearch/WSL-Images", "resnext101_32x8d_wsl") + return _make_resnet_backbone(resnet) + + + +class Interpolate(nn.Module): + """Interpolation module. + """ + + def __init__(self, scale_factor, mode, align_corners=False): + """Init. + + Args: + scale_factor (float): scaling + mode (str): interpolation mode + """ + super(Interpolate, self).__init__() + + self.interp = nn.functional.interpolate + self.scale_factor = scale_factor + self.mode = mode + self.align_corners = align_corners + + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input + + Returns: + tensor: interpolated data + """ + + x = self.interp( + x, scale_factor=self.scale_factor, mode=self.mode, align_corners=self.align_corners + ) + + return x + + +class ResidualConvUnit(nn.Module): + """Residual convolution module. + """ + + def __init__(self, features): + """Init. + + Args: + features (int): number of features + """ + super().__init__() + + self.conv1 = nn.Conv2d( + features, features, kernel_size=3, stride=1, padding=1, bias=True + ) + + self.conv2 = nn.Conv2d( + features, features, kernel_size=3, stride=1, padding=1, bias=True + ) + + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input + + Returns: + tensor: output + """ + out = self.relu(x) + out = self.conv1(out) + out = self.relu(out) + out = self.conv2(out) + + return out + x + + +class FeatureFusionBlock(nn.Module): + """Feature fusion block. + """ + + def __init__(self, features): + """Init. + + Args: + features (int): number of features + """ + super(FeatureFusionBlock, self).__init__() + + self.resConfUnit1 = ResidualConvUnit(features) + self.resConfUnit2 = ResidualConvUnit(features) + + def forward(self, *xs): + """Forward pass. + + Returns: + tensor: output + """ + output = xs[0] + + if len(xs) == 2: + output += self.resConfUnit1(xs[1]) + + output = self.resConfUnit2(output) + + output = nn.functional.interpolate( + output, scale_factor=2, mode="bilinear", align_corners=True + ) + + return output + + + + +class ResidualConvUnit_custom(nn.Module): + """Residual convolution module. + """ + + def __init__(self, features, activation, bn): + """Init. + + Args: + features (int): number of features + """ + super().__init__() + + self.bn = bn + + self.groups=1 + + self.conv1 = nn.Conv2d( + features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups + ) + + self.conv2 = nn.Conv2d( + features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups + ) + + if self.bn==True: + self.bn1 = nn.BatchNorm2d(features) + self.bn2 = nn.BatchNorm2d(features) + + self.activation = activation + + self.skip_add = nn.quantized.FloatFunctional() + + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input + + Returns: + tensor: output + """ + + out = self.activation(x) + out = self.conv1(out) + if self.bn==True: + out = self.bn1(out) + + out = self.activation(out) + out = self.conv2(out) + if self.bn==True: + out = self.bn2(out) + + if self.groups > 1: + out = self.conv_merge(out) + + return self.skip_add.add(out, x) + + # return out + x + + +class FeatureFusionBlock_custom(nn.Module): + """Feature fusion block. + """ + + def __init__(self, features, activation, deconv=False, bn=False, expand=False, align_corners=True, size=None): + """Init. + + Args: + features (int): number of features + """ + super(FeatureFusionBlock_custom, self).__init__() + + self.deconv = deconv + self.align_corners = align_corners + + self.groups=1 + + self.expand = expand + out_features = features + if self.expand==True: + out_features = features//2 + + self.out_conv = nn.Conv2d(features, out_features, kernel_size=1, stride=1, padding=0, bias=True, groups=1) + + self.resConfUnit1 = ResidualConvUnit_custom(features, activation, bn) + self.resConfUnit2 = ResidualConvUnit_custom(features, activation, bn) + + self.skip_add = nn.quantized.FloatFunctional() + + self.size=size + + def forward(self, *xs, size=None): + """Forward pass. + + Returns: + tensor: output + """ + output = xs[0] + + if len(xs) == 2: + res = self.resConfUnit1(xs[1]) + output = self.skip_add.add(output, res) + # output += res + + output = self.resConfUnit2(output) + + if (size is None) and (self.size is None): + modifier = {"scale_factor": 2} + elif size is None: + modifier = {"size": self.size} + else: + modifier = {"size": size} + + output = nn.functional.interpolate( + output, **modifier, mode="bilinear", align_corners=self.align_corners + ) + + output = self.out_conv(output) + + return output + diff --git a/dmidas/dpt_depth.py b/dmidas/dpt_depth.py new file mode 100644 index 0000000000000000000000000000000000000000..6af466f7794ec1d0aa9753e0a97b87b173151e66 --- /dev/null +++ b/dmidas/dpt_depth.py @@ -0,0 +1,166 @@ +import torch +import torch.nn as nn + +from .base_model import BaseModel +from .blocks import ( + FeatureFusionBlock_custom, + Interpolate, + _make_encoder, + forward_beit, + forward_swin, + forward_next_vit, + forward_levit, + forward_vit, +) +from .backbones.levit import stem_b4_transpose +from timm.models.layers import get_act_layer + + +def _make_fusion_block(features, use_bn, size = None): + return FeatureFusionBlock_custom( + features, + nn.ReLU(False), + deconv=False, + bn=use_bn, + expand=False, + align_corners=True, + size=size, + ) + + +class DPT(BaseModel): + def __init__( + self, + head, + features=256, + backbone="vitb_rn50_384", + readout="project", + channels_last=False, + use_bn=False, + **kwargs + ): + + super(DPT, self).__init__() + + self.channels_last = channels_last + + # For the Swin, Swin 2, LeViT and Next-ViT Transformers, the hierarchical architectures prevent setting the + # hooks freely. Instead, the hooks have to be chosen according to the ranges specified in the comments. + hooks = { + "beitl16_512": [5, 11, 17, 23], + "beitl16_384": [5, 11, 17, 23], + "beitb16_384": [2, 5, 8, 11], + "swin2l24_384": [1, 1, 17, 1], # Allowed ranges: [0, 1], [0, 1], [ 0, 17], [ 0, 1] + "swin2b24_384": [1, 1, 17, 1], # [0, 1], [0, 1], [ 0, 17], [ 0, 1] + "swin2t16_256": [1, 1, 5, 1], # [0, 1], [0, 1], [ 0, 5], [ 0, 1] + "swinl12_384": [1, 1, 17, 1], # [0, 1], [0, 1], [ 0, 17], [ 0, 1] + "next_vit_large_6m": [2, 6, 36, 39], # [0, 2], [3, 6], [ 7, 36], [37, 39] + "levit_384": [3, 11, 21], # [0, 3], [6, 11], [14, 21] + "vitb_rn50_384": [0, 1, 8, 11], + "vitb16_384": [2, 5, 8, 11], + "vitl16_384": [5, 11, 17, 23], + }[backbone] + + if "next_vit" in backbone: + in_features = { + "next_vit_large_6m": [96, 256, 512, 1024], + }[backbone] + else: + in_features = None + + # Instantiate backbone and reassemble blocks + self.pretrained, self.scratch = _make_encoder( + backbone, + features, + False, # Set to true of you want to train from scratch, uses ImageNet weights + groups=1, + expand=False, + exportable=False, + hooks=hooks, + use_readout=readout, + in_features=in_features, + ) + + self.number_layers = len(hooks) if hooks is not None else 4 + size_refinenet3 = None + self.scratch.stem_transpose = None + + if "beit" in backbone: + self.forward_transformer = forward_beit + elif "swin" in backbone: + self.forward_transformer = forward_swin + elif "next_vit" in backbone: + self.forward_transformer = forward_next_vit + elif "levit" in backbone: + self.forward_transformer = forward_levit + size_refinenet3 = 7 + self.scratch.stem_transpose = stem_b4_transpose(256, 128, get_act_layer("hard_swish")) + else: + self.forward_transformer = forward_vit + + self.scratch.refinenet1 = _make_fusion_block(features, use_bn) + self.scratch.refinenet2 = _make_fusion_block(features, use_bn) + self.scratch.refinenet3 = _make_fusion_block(features, use_bn, size_refinenet3) + if self.number_layers >= 4: + self.scratch.refinenet4 = _make_fusion_block(features, use_bn) + + self.scratch.output_conv = head + + + def forward(self, x): + if self.channels_last == True: + x.contiguous(memory_format=torch.channels_last) + + layers = self.forward_transformer(self.pretrained, x) + if self.number_layers == 3: + layer_1, layer_2, layer_3 = layers + else: + layer_1, layer_2, layer_3, layer_4 = layers + + layer_1_rn = self.scratch.layer1_rn(layer_1) + layer_2_rn = self.scratch.layer2_rn(layer_2) + layer_3_rn = self.scratch.layer3_rn(layer_3) + if self.number_layers >= 4: + layer_4_rn = self.scratch.layer4_rn(layer_4) + + if self.number_layers == 3: + path_3 = self.scratch.refinenet3(layer_3_rn, size=layer_2_rn.shape[2:]) + else: + path_4 = self.scratch.refinenet4(layer_4_rn, size=layer_3_rn.shape[2:]) + path_3 = self.scratch.refinenet3(path_4, layer_3_rn, size=layer_2_rn.shape[2:]) + path_2 = self.scratch.refinenet2(path_3, layer_2_rn, size=layer_1_rn.shape[2:]) + path_1 = self.scratch.refinenet1(path_2, layer_1_rn) + + if self.scratch.stem_transpose is not None: + path_1 = self.scratch.stem_transpose(path_1) + + out = self.scratch.output_conv(path_1) + + return out + + +class DPTDepthModel(DPT): + def __init__(self, path=None, non_negative=True, **kwargs): + features = kwargs["features"] if "features" in kwargs else 256 + head_features_1 = kwargs["head_features_1"] if "head_features_1" in kwargs else features + head_features_2 = kwargs["head_features_2"] if "head_features_2" in kwargs else 32 + kwargs.pop("head_features_1", None) + kwargs.pop("head_features_2", None) + + head = nn.Sequential( + nn.Conv2d(head_features_1, head_features_1 // 2, kernel_size=3, stride=1, padding=1), + Interpolate(scale_factor=2, mode="bilinear", align_corners=True), + nn.Conv2d(head_features_1 // 2, head_features_2, kernel_size=3, stride=1, padding=1), + nn.ReLU(True), + nn.Conv2d(head_features_2, 1, kernel_size=1, stride=1, padding=0), + nn.ReLU(True) if non_negative else nn.Identity(), + nn.Identity(), + ) + + super().__init__(head, **kwargs) + + if path is not None: + self.load(path) + + def forward(self, x): + return super().forward(x).squeeze(dim=1) diff --git a/dmidas/midas_net.py b/dmidas/midas_net.py new file mode 100644 index 0000000000000000000000000000000000000000..e10487c8fd4d3b50a4d9cf9bb72ce0a4734894c2 --- /dev/null +++ b/dmidas/midas_net.py @@ -0,0 +1,76 @@ +"""MidashNet: Network for monocular depth estimation trained by mixing several datasets. +This file contains code that is adapted from +https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py +""" +import torch +import torch.nn as nn + +from .base_model import BaseModel +from .blocks import FeatureFusionBlock, Interpolate, _make_encoder + + +class MidasNet(BaseModel): + """Network for monocular depth estimation. + """ + + def __init__(self, path=None, features=256, non_negative=True): + """Init. + + Args: + path (str, optional): Path to saved model. Defaults to None. + features (int, optional): Number of features. Defaults to 256. + backbone (str, optional): Backbone network for encoder. Defaults to resnet50 + """ + print("Loading weights: ", path) + + super(MidasNet, self).__init__() + + use_pretrained = False if path is None else True + + self.pretrained, self.scratch = _make_encoder(backbone="resnext101_wsl", features=features, use_pretrained=use_pretrained) + + self.scratch.refinenet4 = FeatureFusionBlock(features) + self.scratch.refinenet3 = FeatureFusionBlock(features) + self.scratch.refinenet2 = FeatureFusionBlock(features) + self.scratch.refinenet1 = FeatureFusionBlock(features) + + self.scratch.output_conv = nn.Sequential( + nn.Conv2d(features, 128, kernel_size=3, stride=1, padding=1), + Interpolate(scale_factor=2, mode="bilinear"), + nn.Conv2d(128, 32, kernel_size=3, stride=1, padding=1), + nn.ReLU(True), + nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0), + nn.ReLU(True) if non_negative else nn.Identity(), + ) + + if path: + self.load(path) + + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input data (image) + + Returns: + tensor: depth + """ + + layer_1 = self.pretrained.layer1(x) + layer_2 = self.pretrained.layer2(layer_1) + layer_3 = self.pretrained.layer3(layer_2) + layer_4 = self.pretrained.layer4(layer_3) + + layer_1_rn = self.scratch.layer1_rn(layer_1) + layer_2_rn = self.scratch.layer2_rn(layer_2) + layer_3_rn = self.scratch.layer3_rn(layer_3) + layer_4_rn = self.scratch.layer4_rn(layer_4) + + path_4 = self.scratch.refinenet4(layer_4_rn) + path_3 = self.scratch.refinenet3(path_4, layer_3_rn) + path_2 = self.scratch.refinenet2(path_3, layer_2_rn) + path_1 = self.scratch.refinenet1(path_2, layer_1_rn) + + out = self.scratch.output_conv(path_1) + + return torch.squeeze(out, dim=1) diff --git a/dmidas/midas_net_custom.py b/dmidas/midas_net_custom.py new file mode 100644 index 0000000000000000000000000000000000000000..3a8df446c55f057deb6db828db276ae9c0e2308d --- /dev/null +++ b/dmidas/midas_net_custom.py @@ -0,0 +1,128 @@ +"""MidashNet: Network for monocular depth estimation trained by mixing several datasets. +This file contains code that is adapted from +https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py +""" +import torch +import torch.nn as nn + +from .base_model import BaseModel +from .blocks import FeatureFusionBlock, FeatureFusionBlock_custom, Interpolate, _make_encoder + + +class MidasNet_small(BaseModel): + """Network for monocular depth estimation. + """ + + def __init__(self, path=None, features=64, backbone="efficientnet_lite3", non_negative=True, exportable=True, channels_last=False, align_corners=True, + blocks={'expand': True}): + """Init. + + Args: + path (str, optional): Path to saved model. Defaults to None. + features (int, optional): Number of features. Defaults to 256. + backbone (str, optional): Backbone network for encoder. Defaults to resnet50 + """ + print("Loading weights: ", path) + + super(MidasNet_small, self).__init__() + + use_pretrained = False if path else True + + self.channels_last = channels_last + self.blocks = blocks + self.backbone = backbone + + self.groups = 1 + + features1=features + features2=features + features3=features + features4=features + self.expand = False + if "expand" in self.blocks and self.blocks['expand'] == True: + self.expand = True + features1=features + features2=features*2 + features3=features*4 + features4=features*8 + + self.pretrained, self.scratch = _make_encoder(self.backbone, features, use_pretrained, groups=self.groups, expand=self.expand, exportable=exportable) + + self.scratch.activation = nn.ReLU(False) + + self.scratch.refinenet4 = FeatureFusionBlock_custom(features4, self.scratch.activation, deconv=False, bn=False, expand=self.expand, align_corners=align_corners) + self.scratch.refinenet3 = FeatureFusionBlock_custom(features3, self.scratch.activation, deconv=False, bn=False, expand=self.expand, align_corners=align_corners) + self.scratch.refinenet2 = FeatureFusionBlock_custom(features2, self.scratch.activation, deconv=False, bn=False, expand=self.expand, align_corners=align_corners) + self.scratch.refinenet1 = FeatureFusionBlock_custom(features1, self.scratch.activation, deconv=False, bn=False, align_corners=align_corners) + + + self.scratch.output_conv = nn.Sequential( + nn.Conv2d(features, features//2, kernel_size=3, stride=1, padding=1, groups=self.groups), + Interpolate(scale_factor=2, mode="bilinear"), + nn.Conv2d(features//2, 32, kernel_size=3, stride=1, padding=1), + self.scratch.activation, + nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0), + nn.ReLU(True) if non_negative else nn.Identity(), + nn.Identity(), + ) + + if path: + self.load(path) + + + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input data (image) + + Returns: + tensor: depth + """ + if self.channels_last==True: + print("self.channels_last = ", self.channels_last) + x.contiguous(memory_format=torch.channels_last) + + + layer_1 = self.pretrained.layer1(x) + layer_2 = self.pretrained.layer2(layer_1) + layer_3 = self.pretrained.layer3(layer_2) + layer_4 = self.pretrained.layer4(layer_3) + + layer_1_rn = self.scratch.layer1_rn(layer_1) + layer_2_rn = self.scratch.layer2_rn(layer_2) + layer_3_rn = self.scratch.layer3_rn(layer_3) + layer_4_rn = self.scratch.layer4_rn(layer_4) + + + path_4 = self.scratch.refinenet4(layer_4_rn) + path_3 = self.scratch.refinenet3(path_4, layer_3_rn) + path_2 = self.scratch.refinenet2(path_3, layer_2_rn) + path_1 = self.scratch.refinenet1(path_2, layer_1_rn) + + out = self.scratch.output_conv(path_1) + + return torch.squeeze(out, dim=1) + + + +def fuse_model(m): + prev_previous_type = nn.Identity() + prev_previous_name = '' + previous_type = nn.Identity() + previous_name = '' + for name, module in m.named_modules(): + if prev_previous_type == nn.Conv2d and previous_type == nn.BatchNorm2d and type(module) == nn.ReLU: + # print("FUSED ", prev_previous_name, previous_name, name) + torch.quantization.fuse_modules(m, [prev_previous_name, previous_name, name], inplace=True) + elif prev_previous_type == nn.Conv2d and previous_type == nn.BatchNorm2d: + # print("FUSED ", prev_previous_name, previous_name) + torch.quantization.fuse_modules(m, [prev_previous_name, previous_name], inplace=True) + # elif previous_type == nn.Conv2d and type(module) == nn.ReLU: + # print("FUSED ", previous_name, name) + # torch.quantization.fuse_modules(m, [previous_name, name], inplace=True) + + prev_previous_type = previous_type + prev_previous_name = previous_name + previous_type = type(module) + previous_name = name \ No newline at end of file diff --git a/dmidas/model_loader.py b/dmidas/model_loader.py new file mode 100644 index 0000000000000000000000000000000000000000..6c6db0d7d421cebba21da140eeb6eb09b8906196 --- /dev/null +++ b/dmidas/model_loader.py @@ -0,0 +1,242 @@ +import cv2 +import torch + +from midas.dpt_depth import DPTDepthModel +from midas.midas_net import MidasNet +from midas.midas_net_custom import MidasNet_small +from midas.transforms import Resize, NormalizeImage, PrepareForNet + +from torchvision.transforms import Compose + +default_models = { + "dpt_beit_large_512": "weights/dpt_beit_large_512.pt", + "dpt_beit_large_384": "weights/dpt_beit_large_384.pt", + "dpt_beit_base_384": "weights/dpt_beit_base_384.pt", + "dpt_swin2_large_384": "weights/dpt_swin2_large_384.pt", + "dpt_swin2_base_384": "weights/dpt_swin2_base_384.pt", + "dpt_swin2_tiny_256": "weights/dpt_swin2_tiny_256.pt", + "dpt_swin_large_384": "weights/dpt_swin_large_384.pt", + "dpt_next_vit_large_384": "weights/dpt_next_vit_large_384.pt", + "dpt_levit_224": "weights/dpt_levit_224.pt", + "dpt_large_384": "weights/dpt_large_384.pt", + "dpt_hybrid_384": "weights/dpt_hybrid_384.pt", + "midas_v21_384": "weights/midas_v21_384.pt", + "midas_v21_small_256": "weights/midas_v21_small_256.pt", + "openvino_midas_v21_small_256": "weights/openvino_midas_v21_small_256.xml", +} + + +def load_model(device, model_path, model_type="dpt_large_384", optimize=True, height=None, square=False): + """Load the specified network. + + Args: + device (device): the torch device used + model_path (str): path to saved model + model_type (str): the type of the model to be loaded + optimize (bool): optimize the model to half-integer on CUDA? + height (int): inference encoder image height + square (bool): resize to a square resolution? + + Returns: + The loaded network, the transform which prepares images as input to the network and the dimensions of the + network input + """ + if "openvino" in model_type: + from openvino.runtime import Core + + keep_aspect_ratio = not square + + if model_type == "dpt_beit_large_512": + model = DPTDepthModel( + path=model_path, + backbone="beitl16_512", + non_negative=True, + ) + net_w, net_h = 512, 512 + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + elif model_type == "dpt_beit_large_384": + model = DPTDepthModel( + path=model_path, + backbone="beitl16_384", + non_negative=True, + ) + net_w, net_h = 384, 384 + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + elif model_type == "dpt_beit_base_384": + model = DPTDepthModel( + path=model_path, + backbone="beitb16_384", + non_negative=True, + ) + net_w, net_h = 384, 384 + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + elif model_type == "dpt_swin2_large_384": + model = DPTDepthModel( + path=model_path, + backbone="swin2l24_384", + non_negative=True, + ) + net_w, net_h = 384, 384 + keep_aspect_ratio = False + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + elif model_type == "dpt_swin2_base_384": + model = DPTDepthModel( + path=model_path, + backbone="swin2b24_384", + non_negative=True, + ) + net_w, net_h = 384, 384 + keep_aspect_ratio = False + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + elif model_type == "dpt_swin2_tiny_256": + model = DPTDepthModel( + path=model_path, + backbone="swin2t16_256", + non_negative=True, + ) + net_w, net_h = 256, 256 + keep_aspect_ratio = False + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + elif model_type == "dpt_swin_large_384": + model = DPTDepthModel( + path=model_path, + backbone="swinl12_384", + non_negative=True, + ) + net_w, net_h = 384, 384 + keep_aspect_ratio = False + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + elif model_type == "dpt_next_vit_large_384": + model = DPTDepthModel( + path=model_path, + backbone="next_vit_large_6m", + non_negative=True, + ) + net_w, net_h = 384, 384 + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + # We change the notation from dpt_levit_224 (MiDaS notation) to levit_384 (timm notation) here, where the 224 refers + # to the resolution 224x224 used by LeViT and 384 is the first entry of the embed_dim, see _cfg and model_cfgs of + # https://github.com/rwightman/pytorch-image-models/blob/main/timm/models/levit.py + # (commit id: 927f031293a30afb940fff0bee34b85d9c059b0e) + elif model_type == "dpt_levit_224": + model = DPTDepthModel( + path=model_path, + backbone="levit_384", + non_negative=True, + head_features_1=64, + head_features_2=8, + ) + net_w, net_h = 224, 224 + keep_aspect_ratio = False + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + elif model_type == "dpt_large_384": + model = DPTDepthModel( + path=model_path, + backbone="vitl16_384", + non_negative=True, + ) + net_w, net_h = 384, 384 + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + elif model_type == "dpt_hybrid_384": + model = DPTDepthModel( + path=model_path, + backbone="vitb_rn50_384", + non_negative=True, + ) + net_w, net_h = 384, 384 + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + elif model_type == "midas_v21_384": + model = MidasNet(model_path, non_negative=True) + net_w, net_h = 384, 384 + resize_mode = "upper_bound" + normalization = NormalizeImage( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] + ) + + elif model_type == "midas_v21_small_256": + model = MidasNet_small(model_path, features=64, backbone="efficientnet_lite3", exportable=True, + non_negative=True, blocks={'expand': True}) + net_w, net_h = 256, 256 + resize_mode = "upper_bound" + normalization = NormalizeImage( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] + ) + + elif model_type == "openvino_midas_v21_small_256": + ie = Core() + uncompiled_model = ie.read_model(model=model_path) + model = ie.compile_model(uncompiled_model, "CPU") + net_w, net_h = 256, 256 + resize_mode = "upper_bound" + normalization = NormalizeImage( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] + ) + + else: + print(f"model_type '{model_type}' not implemented, use: --model_type large") + assert False + + if not "openvino" in model_type: + print("Model loaded, number of parameters = {:.0f}M".format(sum(p.numel() for p in model.parameters()) / 1e6)) + else: + print("Model loaded, optimized with OpenVINO") + + if "openvino" in model_type: + keep_aspect_ratio = False + + if height is not None: + net_w, net_h = height, height + + transform = Compose( + [ + Resize( + net_w, + net_h, + resize_target=None, + keep_aspect_ratio=keep_aspect_ratio, + ensure_multiple_of=32, + resize_method=resize_mode, + image_interpolation_method=cv2.INTER_CUBIC, + ), + normalization, + PrepareForNet(), + ] + ) + + if not "openvino" in model_type: + model.eval() + + if optimize and (device == torch.device("cuda")): + if not "openvino" in model_type: + model = model.to(memory_format=torch.channels_last) + model = model.half() + else: + print("Error: OpenVINO models are already optimized. No optimization to half-float possible.") + exit() + + if not "openvino" in model_type: + model.to(device) + + return model, transform, net_w, net_h diff --git a/dmidas/transforms.py b/dmidas/transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..37d68afcebf67defadd7dfb2ff5494f257876575 --- /dev/null +++ b/dmidas/transforms.py @@ -0,0 +1,234 @@ +import numpy as np +import cv2 +import math + + +def apply_min_size(sample, size, image_interpolation_method=cv2.INTER_AREA): + """Rezise the sample to ensure the given size. Keeps aspect ratio. + + Args: + sample (dict): sample + size (tuple): image size + + Returns: + tuple: new size + """ + shape = list(sample["disparity"].shape) + + if shape[0] >= size[0] and shape[1] >= size[1]: + return sample + + scale = [0, 0] + scale[0] = size[0] / shape[0] + scale[1] = size[1] / shape[1] + + scale = max(scale) + + shape[0] = math.ceil(scale * shape[0]) + shape[1] = math.ceil(scale * shape[1]) + + # resize + sample["image"] = cv2.resize( + sample["image"], tuple(shape[::-1]), interpolation=image_interpolation_method + ) + + sample["disparity"] = cv2.resize( + sample["disparity"], tuple(shape[::-1]), interpolation=cv2.INTER_NEAREST + ) + sample["mask"] = cv2.resize( + sample["mask"].astype(np.float32), + tuple(shape[::-1]), + interpolation=cv2.INTER_NEAREST, + ) + sample["mask"] = sample["mask"].astype(bool) + + return tuple(shape) + + +class Resize(object): + """Resize sample to given size (width, height). + """ + + def __init__( + self, + width, + height, + resize_target=True, + keep_aspect_ratio=False, + ensure_multiple_of=1, + resize_method="lower_bound", + image_interpolation_method=cv2.INTER_AREA, + ): + """Init. + + Args: + width (int): desired output width + height (int): desired output height + resize_target (bool, optional): + True: Resize the full sample (image, mask, target). + False: Resize image only. + Defaults to True. + keep_aspect_ratio (bool, optional): + True: Keep the aspect ratio of the input sample. + Output sample might not have the given width and height, and + resize behaviour depends on the parameter 'resize_method'. + Defaults to False. + ensure_multiple_of (int, optional): + Output width and height is constrained to be multiple of this parameter. + Defaults to 1. + resize_method (str, optional): + "lower_bound": Output will be at least as large as the given size. + "upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.) + "minimal": Scale as least as possible. (Output size might be smaller than given size.) + Defaults to "lower_bound". + """ + self.__width = width + self.__height = height + + self.__resize_target = resize_target + self.__keep_aspect_ratio = keep_aspect_ratio + self.__multiple_of = ensure_multiple_of + self.__resize_method = resize_method + self.__image_interpolation_method = image_interpolation_method + + def constrain_to_multiple_of(self, x, min_val=0, max_val=None): + y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int) + + if max_val is not None and y > max_val: + y = (np.floor(x / self.__multiple_of) * self.__multiple_of).astype(int) + + if y < min_val: + y = (np.ceil(x / self.__multiple_of) * self.__multiple_of).astype(int) + + return y + + def get_size(self, width, height): + # determine new height and width + scale_height = self.__height / height + scale_width = self.__width / width + + if self.__keep_aspect_ratio: + if self.__resize_method == "lower_bound": + # scale such that output size is lower bound + if scale_width > scale_height: + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + elif self.__resize_method == "upper_bound": + # scale such that output size is upper bound + if scale_width < scale_height: + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + elif self.__resize_method == "minimal": + # scale as least as possbile + if abs(1 - scale_width) < abs(1 - scale_height): + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + else: + raise ValueError( + f"resize_method {self.__resize_method} not implemented" + ) + + if self.__resize_method == "lower_bound": + new_height = self.constrain_to_multiple_of( + scale_height * height, min_val=self.__height + ) + new_width = self.constrain_to_multiple_of( + scale_width * width, min_val=self.__width + ) + elif self.__resize_method == "upper_bound": + new_height = self.constrain_to_multiple_of( + scale_height * height, max_val=self.__height + ) + new_width = self.constrain_to_multiple_of( + scale_width * width, max_val=self.__width + ) + elif self.__resize_method == "minimal": + new_height = self.constrain_to_multiple_of(scale_height * height) + new_width = self.constrain_to_multiple_of(scale_width * width) + else: + raise ValueError(f"resize_method {self.__resize_method} not implemented") + + return (new_width, new_height) + + def __call__(self, sample): + width, height = self.get_size( + sample["image"].shape[1], sample["image"].shape[0] + ) + + # resize sample + sample["image"] = cv2.resize( + sample["image"], + (width, height), + interpolation=self.__image_interpolation_method, + ) + + if self.__resize_target: + if "disparity" in sample: + sample["disparity"] = cv2.resize( + sample["disparity"], + (width, height), + interpolation=cv2.INTER_NEAREST, + ) + + if "depth" in sample: + sample["depth"] = cv2.resize( + sample["depth"], (width, height), interpolation=cv2.INTER_NEAREST + ) + + sample["mask"] = cv2.resize( + sample["mask"].astype(np.float32), + (width, height), + interpolation=cv2.INTER_NEAREST, + ) + sample["mask"] = sample["mask"].astype(bool) + + return sample + + +class NormalizeImage(object): + """Normlize image by given mean and std. + """ + + def __init__(self, mean, std): + self.__mean = mean + self.__std = std + + def __call__(self, sample): + sample["image"] = (sample["image"] - self.__mean) / self.__std + + return sample + + +class PrepareForNet(object): + """Prepare sample for usage as network input. + """ + + def __init__(self): + pass + + def __call__(self, sample): + image = np.transpose(sample["image"], (2, 0, 1)) + sample["image"] = np.ascontiguousarray(image).astype(np.float32) + + if "mask" in sample: + sample["mask"] = sample["mask"].astype(np.float32) + sample["mask"] = np.ascontiguousarray(sample["mask"]) + + if "disparity" in sample: + disparity = sample["disparity"].astype(np.float32) + sample["disparity"] = np.ascontiguousarray(disparity) + + if "depth" in sample: + depth = sample["depth"].astype(np.float32) + sample["depth"] = np.ascontiguousarray(depth) + + return sample diff --git a/dzoedepth/LICENSE b/dzoedepth/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..dd574602c8ef4225bd99433e7570a7e2dc573443 --- /dev/null +++ b/dzoedepth/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Intelligent Systems Lab Org + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/dzoedepth/__init__.py b/dzoedepth/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/dzoedepth/__pycache__/__init__.cpython-310.pyc b/dzoedepth/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..131769143443b8d8b722072a372a1333c0983427 Binary files /dev/null and b/dzoedepth/__pycache__/__init__.cpython-310.pyc differ diff --git a/dzoedepth/__pycache__/__init__.cpython-311.pyc b/dzoedepth/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..04929eb1d7256f38822f77d7b73ee90c5c47fb6c Binary files /dev/null and b/dzoedepth/__pycache__/__init__.cpython-311.pyc differ diff --git a/dzoedepth/__pycache__/__init__.cpython-312.pyc b/dzoedepth/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..97372f7af62859c4e47c4c981880d1a926cb520d Binary files /dev/null and b/dzoedepth/__pycache__/__init__.cpython-312.pyc differ diff --git a/dzoedepth/data/__init__.py b/dzoedepth/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1ae1a1e4e86d9a5b14586cd006ed43d2bbc9b4a6 --- /dev/null +++ b/dzoedepth/data/__init__.py @@ -0,0 +1,24 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + diff --git a/dzoedepth/data/data_mono.py b/dzoedepth/data/data_mono.py new file mode 100644 index 0000000000000000000000000000000000000000..f58c46ba36d9c73c1bffe468ef0721d172fb38cb --- /dev/null +++ b/dzoedepth/data/data_mono.py @@ -0,0 +1,573 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +# This file is partly inspired from BTS (https://github.com/cleinc/bts/blob/master/pytorch/bts_dataloader.py); author: Jin Han Lee + +import itertools +import os +import random + +import numpy as np +import cv2 +import torch +import torch.nn as nn +import torch.utils.data.distributed +from zoedepth.utils.easydict import EasyDict as edict +from PIL import Image, ImageOps +from torch.utils.data import DataLoader, Dataset +from torchvision import transforms + +from zoedepth.utils.config import change_dataset + +from .ddad import get_ddad_loader +from .diml_indoor_test import get_diml_indoor_loader +from .diml_outdoor_test import get_diml_outdoor_loader +from .diode import get_diode_loader +from .hypersim import get_hypersim_loader +from .ibims import get_ibims_loader +from .sun_rgbd_loader import get_sunrgbd_loader +from .vkitti import get_vkitti_loader +from .vkitti2 import get_vkitti2_loader + +from .preprocess import CropParams, get_white_border, get_black_border + + +def _is_pil_image(img): + return isinstance(img, Image.Image) + + +def _is_numpy_image(img): + return isinstance(img, np.ndarray) and (img.ndim in {2, 3}) + + +def preprocessing_transforms(mode, **kwargs): + return transforms.Compose([ + ToTensor(mode=mode, **kwargs) + ]) + + +class DepthDataLoader(object): + def __init__(self, config, mode, device='cpu', transform=None, **kwargs): + """ + Data loader for depth datasets + + Args: + config (dict): Config dictionary. Refer to utils/config.py + mode (str): "train" or "online_eval" + device (str, optional): Device to load the data on. Defaults to 'cpu'. + transform (torchvision.transforms, optional): Transform to apply to the data. Defaults to None. + """ + + self.config = config + + if config.dataset == 'ibims': + self.data = get_ibims_loader(config, batch_size=1, num_workers=1) + return + + if config.dataset == 'sunrgbd': + self.data = get_sunrgbd_loader( + data_dir_root=config.sunrgbd_root, batch_size=1, num_workers=1) + return + + if config.dataset == 'diml_indoor': + self.data = get_diml_indoor_loader( + data_dir_root=config.diml_indoor_root, batch_size=1, num_workers=1) + return + + if config.dataset == 'diml_outdoor': + self.data = get_diml_outdoor_loader( + data_dir_root=config.diml_outdoor_root, batch_size=1, num_workers=1) + return + + if "diode" in config.dataset: + self.data = get_diode_loader( + config[config.dataset+"_root"], batch_size=1, num_workers=1) + return + + if config.dataset == 'hypersim_test': + self.data = get_hypersim_loader( + config.hypersim_test_root, batch_size=1, num_workers=1) + return + + if config.dataset == 'vkitti': + self.data = get_vkitti_loader( + config.vkitti_root, batch_size=1, num_workers=1) + return + + if config.dataset == 'vkitti2': + self.data = get_vkitti2_loader( + config.vkitti2_root, batch_size=1, num_workers=1) + return + + if config.dataset == 'ddad': + self.data = get_ddad_loader(config.ddad_root, resize_shape=( + 352, 1216), batch_size=1, num_workers=1) + return + + img_size = self.config.get("img_size", None) + img_size = img_size if self.config.get( + "do_input_resize", False) else None + + if transform is None: + transform = preprocessing_transforms(mode, size=img_size) + + if mode == 'train': + + Dataset = DataLoadPreprocess + self.training_samples = Dataset( + config, mode, transform=transform, device=device) + + if config.distributed: + self.train_sampler = torch.utils.data.distributed.DistributedSampler( + self.training_samples) + else: + self.train_sampler = None + + self.data = DataLoader(self.training_samples, + batch_size=config.batch_size, + shuffle=(self.train_sampler is None), + num_workers=config.workers, + pin_memory=True, + persistent_workers=True, + # prefetch_factor=2, + sampler=self.train_sampler) + + elif mode == 'online_eval': + self.testing_samples = DataLoadPreprocess( + config, mode, transform=transform) + if config.distributed: # redundant. here only for readability and to be more explicit + # Give whole test set to all processes (and report evaluation only on one) regardless + self.eval_sampler = None + else: + self.eval_sampler = None + self.data = DataLoader(self.testing_samples, 1, + shuffle=kwargs.get("shuffle_test", False), + num_workers=1, + pin_memory=False, + sampler=self.eval_sampler) + + elif mode == 'test': + self.testing_samples = DataLoadPreprocess( + config, mode, transform=transform) + self.data = DataLoader(self.testing_samples, + 1, shuffle=False, num_workers=1) + + else: + print( + 'mode should be one of \'train, test, online_eval\'. Got {}'.format(mode)) + + +def repetitive_roundrobin(*iterables): + """ + cycles through iterables but sample wise + first yield first sample from first iterable then first sample from second iterable and so on + then second sample from first iterable then second sample from second iterable and so on + + If one iterable is shorter than the others, it is repeated until all iterables are exhausted + repetitive_roundrobin('ABC', 'D', 'EF') --> A D E B D F C D E + """ + # Repetitive roundrobin + iterables_ = [iter(it) for it in iterables] + exhausted = [False] * len(iterables) + while not all(exhausted): + for i, it in enumerate(iterables_): + try: + yield next(it) + except StopIteration: + exhausted[i] = True + iterables_[i] = itertools.cycle(iterables[i]) + # First elements may get repeated if one iterable is shorter than the others + yield next(iterables_[i]) + + +class RepetitiveRoundRobinDataLoader(object): + def __init__(self, *dataloaders): + self.dataloaders = dataloaders + + def __iter__(self): + return repetitive_roundrobin(*self.dataloaders) + + def __len__(self): + # First samples get repeated, thats why the plus one + return len(self.dataloaders) * (max(len(dl) for dl in self.dataloaders) + 1) + + +class MixedNYUKITTI(object): + def __init__(self, config, mode, device='cpu', **kwargs): + config = edict(config) + config.workers = config.workers // 2 + self.config = config + nyu_conf = change_dataset(edict(config), 'nyu') + kitti_conf = change_dataset(edict(config), 'kitti') + + # make nyu default for testing + self.config = config = nyu_conf + img_size = self.config.get("img_size", None) + img_size = img_size if self.config.get( + "do_input_resize", False) else None + if mode == 'train': + nyu_loader = DepthDataLoader( + nyu_conf, mode, device=device, transform=preprocessing_transforms(mode, size=img_size)).data + kitti_loader = DepthDataLoader( + kitti_conf, mode, device=device, transform=preprocessing_transforms(mode, size=img_size)).data + # It has been changed to repetitive roundrobin + self.data = RepetitiveRoundRobinDataLoader( + nyu_loader, kitti_loader) + else: + self.data = DepthDataLoader(nyu_conf, mode, device=device).data + + +def remove_leading_slash(s): + if s[0] == '/' or s[0] == '\\': + return s[1:] + return s + + +class CachedReader: + def __init__(self, shared_dict=None): + if shared_dict: + self._cache = shared_dict + else: + self._cache = {} + + def open(self, fpath): + im = self._cache.get(fpath, None) + if im is None: + im = self._cache[fpath] = Image.open(fpath) + return im + + +class ImReader: + def __init__(self): + pass + + # @cache + def open(self, fpath): + return Image.open(fpath) + + +class DataLoadPreprocess(Dataset): + def __init__(self, config, mode, transform=None, is_for_online_eval=False, **kwargs): + self.config = config + if mode == 'online_eval': + with open(config.filenames_file_eval, 'r') as f: + self.filenames = f.readlines() + else: + with open(config.filenames_file, 'r') as f: + self.filenames = f.readlines() + + self.mode = mode + self.transform = transform + self.to_tensor = ToTensor(mode) + self.is_for_online_eval = is_for_online_eval + if config.use_shared_dict: + self.reader = CachedReader(config.shared_dict) + else: + self.reader = ImReader() + + def postprocess(self, sample): + return sample + + def __getitem__(self, idx): + sample_path = self.filenames[idx] + focal = float(sample_path.split()[2]) + sample = {} + + if self.mode == 'train': + if self.config.dataset == 'kitti' and self.config.use_right and random.random() > 0.5: + image_path = os.path.join( + self.config.data_path, remove_leading_slash(sample_path.split()[3])) + depth_path = os.path.join( + self.config.gt_path, remove_leading_slash(sample_path.split()[4])) + else: + image_path = os.path.join( + self.config.data_path, remove_leading_slash(sample_path.split()[0])) + depth_path = os.path.join( + self.config.gt_path, remove_leading_slash(sample_path.split()[1])) + + image = self.reader.open(image_path) + depth_gt = self.reader.open(depth_path) + w, h = image.size + + if self.config.do_kb_crop: + height = image.height + width = image.width + top_margin = int(height - 352) + left_margin = int((width - 1216) / 2) + depth_gt = depth_gt.crop( + (left_margin, top_margin, left_margin + 1216, top_margin + 352)) + image = image.crop( + (left_margin, top_margin, left_margin + 1216, top_margin + 352)) + + # Avoid blank boundaries due to pixel registration? + # Train images have white border. Test images have black border. + if self.config.dataset == 'nyu' and self.config.avoid_boundary: + # print("Avoiding Blank Boundaries!") + # We just crop and pad again with reflect padding to original size + # original_size = image.size + crop_params = get_white_border(np.array(image, dtype=np.uint8)) + image = image.crop((crop_params.left, crop_params.top, crop_params.right, crop_params.bottom)) + depth_gt = depth_gt.crop((crop_params.left, crop_params.top, crop_params.right, crop_params.bottom)) + + # Use reflect padding to fill the blank + image = np.array(image) + image = np.pad(image, ((crop_params.top, h - crop_params.bottom), (crop_params.left, w - crop_params.right), (0, 0)), mode='reflect') + image = Image.fromarray(image) + + depth_gt = np.array(depth_gt) + depth_gt = np.pad(depth_gt, ((crop_params.top, h - crop_params.bottom), (crop_params.left, w - crop_params.right)), 'constant', constant_values=0) + depth_gt = Image.fromarray(depth_gt) + + + if self.config.do_random_rotate and (self.config.aug): + random_angle = (random.random() - 0.5) * 2 * self.config.degree + image = self.rotate_image(image, random_angle) + depth_gt = self.rotate_image( + depth_gt, random_angle, flag=Image.NEAREST) + + image = np.asarray(image, dtype=np.float32) / 255.0 + depth_gt = np.asarray(depth_gt, dtype=np.float32) + depth_gt = np.expand_dims(depth_gt, axis=2) + + if self.config.dataset == 'nyu': + depth_gt = depth_gt / 1000.0 + else: + depth_gt = depth_gt / 256.0 + + if self.config.aug and (self.config.random_crop): + image, depth_gt = self.random_crop( + image, depth_gt, self.config.input_height, self.config.input_width) + + if self.config.aug and self.config.random_translate: + # print("Random Translation!") + image, depth_gt = self.random_translate(image, depth_gt, self.config.max_translation) + + image, depth_gt = self.train_preprocess(image, depth_gt) + mask = np.logical_and(depth_gt > self.config.min_depth, + depth_gt < self.config.max_depth).squeeze()[None, ...] + sample = {'image': image, 'depth': depth_gt, 'focal': focal, + 'mask': mask, **sample} + + else: + if self.mode == 'online_eval': + data_path = self.config.data_path_eval + else: + data_path = self.config.data_path + + image_path = os.path.join( + data_path, remove_leading_slash(sample_path.split()[0])) + image = np.asarray(self.reader.open(image_path), + dtype=np.float32) / 255.0 + + if self.mode == 'online_eval': + gt_path = self.config.gt_path_eval + depth_path = os.path.join( + gt_path, remove_leading_slash(sample_path.split()[1])) + has_valid_depth = False + try: + depth_gt = self.reader.open(depth_path) + has_valid_depth = True + except IOError: + depth_gt = False + # print('Missing gt for {}'.format(image_path)) + + if has_valid_depth: + depth_gt = np.asarray(depth_gt, dtype=np.float32) + depth_gt = np.expand_dims(depth_gt, axis=2) + if self.config.dataset == 'nyu': + depth_gt = depth_gt / 1000.0 + else: + depth_gt = depth_gt / 256.0 + + mask = np.logical_and( + depth_gt >= self.config.min_depth, depth_gt <= self.config.max_depth).squeeze()[None, ...] + else: + mask = False + + if self.config.do_kb_crop: + height = image.shape[0] + width = image.shape[1] + top_margin = int(height - 352) + left_margin = int((width - 1216) / 2) + image = image[top_margin:top_margin + 352, + left_margin:left_margin + 1216, :] + if self.mode == 'online_eval' and has_valid_depth: + depth_gt = depth_gt[top_margin:top_margin + + 352, left_margin:left_margin + 1216, :] + + if self.mode == 'online_eval': + sample = {'image': image, 'depth': depth_gt, 'focal': focal, 'has_valid_depth': has_valid_depth, + 'image_path': sample_path.split()[0], 'depth_path': sample_path.split()[1], + 'mask': mask} + else: + sample = {'image': image, 'focal': focal} + + if (self.mode == 'train') or ('has_valid_depth' in sample and sample['has_valid_depth']): + mask = np.logical_and(depth_gt > self.config.min_depth, + depth_gt < self.config.max_depth).squeeze()[None, ...] + sample['mask'] = mask + + if self.transform: + sample = self.transform(sample) + + sample = self.postprocess(sample) + sample['dataset'] = self.config.dataset + sample = {**sample, 'image_path': sample_path.split()[0], 'depth_path': sample_path.split()[1]} + + return sample + + def rotate_image(self, image, angle, flag=Image.BILINEAR): + result = image.rotate(angle, resample=flag) + return result + + def random_crop(self, img, depth, height, width): + assert img.shape[0] >= height + assert img.shape[1] >= width + assert img.shape[0] == depth.shape[0] + assert img.shape[1] == depth.shape[1] + x = random.randint(0, img.shape[1] - width) + y = random.randint(0, img.shape[0] - height) + img = img[y:y + height, x:x + width, :] + depth = depth[y:y + height, x:x + width, :] + + return img, depth + + def random_translate(self, img, depth, max_t=20): + assert img.shape[0] == depth.shape[0] + assert img.shape[1] == depth.shape[1] + p = self.config.translate_prob + do_translate = random.random() + if do_translate > p: + return img, depth + x = random.randint(-max_t, max_t) + y = random.randint(-max_t, max_t) + M = np.float32([[1, 0, x], [0, 1, y]]) + # print(img.shape, depth.shape) + img = cv2.warpAffine(img, M, (img.shape[1], img.shape[0])) + depth = cv2.warpAffine(depth, M, (depth.shape[1], depth.shape[0])) + depth = depth.squeeze()[..., None] # add channel dim back. Affine warp removes it + # print("after", img.shape, depth.shape) + return img, depth + + def train_preprocess(self, image, depth_gt): + if self.config.aug: + # Random flipping + do_flip = random.random() + if do_flip > 0.5: + image = (image[:, ::-1, :]).copy() + depth_gt = (depth_gt[:, ::-1, :]).copy() + + # Random gamma, brightness, color augmentation + do_augment = random.random() + if do_augment > 0.5: + image = self.augment_image(image) + + return image, depth_gt + + def augment_image(self, image): + # gamma augmentation + gamma = random.uniform(0.9, 1.1) + image_aug = image ** gamma + + # brightness augmentation + if self.config.dataset == 'nyu': + brightness = random.uniform(0.75, 1.25) + else: + brightness = random.uniform(0.9, 1.1) + image_aug = image_aug * brightness + + # color augmentation + colors = np.random.uniform(0.9, 1.1, size=3) + white = np.ones((image.shape[0], image.shape[1])) + color_image = np.stack([white * colors[i] for i in range(3)], axis=2) + image_aug *= color_image + image_aug = np.clip(image_aug, 0, 1) + + return image_aug + + def __len__(self): + return len(self.filenames) + + +class ToTensor(object): + def __init__(self, mode, do_normalize=False, size=None): + self.mode = mode + self.normalize = transforms.Normalize( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if do_normalize else nn.Identity() + self.size = size + if size is not None: + self.resize = transforms.Resize(size=size) + else: + self.resize = nn.Identity() + + def __call__(self, sample): + image, focal = sample['image'], sample['focal'] + image = self.to_tensor(image) + image = self.normalize(image) + image = self.resize(image) + + if self.mode == 'test': + return {'image': image, 'focal': focal} + + depth = sample['depth'] + if self.mode == 'train': + depth = self.to_tensor(depth) + return {**sample, 'image': image, 'depth': depth, 'focal': focal} + else: + has_valid_depth = sample['has_valid_depth'] + image = self.resize(image) + return {**sample, 'image': image, 'depth': depth, 'focal': focal, 'has_valid_depth': has_valid_depth, + 'image_path': sample['image_path'], 'depth_path': sample['depth_path']} + + def to_tensor(self, pic): + if not (_is_pil_image(pic) or _is_numpy_image(pic)): + raise TypeError( + 'pic should be PIL Image or ndarray. Got {}'.format(type(pic))) + + if isinstance(pic, np.ndarray): + img = torch.from_numpy(pic.transpose((2, 0, 1))) + return img + + # handle PIL Image + if pic.mode == 'I': + img = torch.from_numpy(np.array(pic, np.int32, copy=False)) + elif pic.mode == 'I;16': + img = torch.from_numpy(np.array(pic, np.int16, copy=False)) + else: + img = torch.ByteTensor( + torch.ByteStorage.from_buffer(pic.tobytes())) + # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK + if pic.mode == 'YCbCr': + nchannel = 3 + elif pic.mode == 'I;16': + nchannel = 1 + else: + nchannel = len(pic.mode) + img = img.view(pic.size[1], pic.size[0], nchannel) + + img = img.transpose(0, 1).transpose(0, 2).contiguous() + if isinstance(img, torch.ByteTensor): + return img.float() + else: + return img diff --git a/dzoedepth/data/ddad.py b/dzoedepth/data/ddad.py new file mode 100644 index 0000000000000000000000000000000000000000..99e7b2a3137f80c5e1f19ef66ef4e6985334854c --- /dev/null +++ b/dzoedepth/data/ddad.py @@ -0,0 +1,117 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import os + +import numpy as np +import torch +from PIL import Image +from torch.utils.data import DataLoader, Dataset +from torchvision import transforms + + +class ToTensor(object): + def __init__(self, resize_shape): + # self.normalize = transforms.Normalize( + # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + self.normalize = lambda x : x + self.resize = transforms.Resize(resize_shape) + + def __call__(self, sample): + image, depth = sample['image'], sample['depth'] + image = self.to_tensor(image) + image = self.normalize(image) + depth = self.to_tensor(depth) + + image = self.resize(image) + + return {'image': image, 'depth': depth, 'dataset': "ddad"} + + def to_tensor(self, pic): + + if isinstance(pic, np.ndarray): + img = torch.from_numpy(pic.transpose((2, 0, 1))) + return img + + # # handle PIL Image + if pic.mode == 'I': + img = torch.from_numpy(np.array(pic, np.int32, copy=False)) + elif pic.mode == 'I;16': + img = torch.from_numpy(np.array(pic, np.int16, copy=False)) + else: + img = torch.ByteTensor( + torch.ByteStorage.from_buffer(pic.tobytes())) + # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK + if pic.mode == 'YCbCr': + nchannel = 3 + elif pic.mode == 'I;16': + nchannel = 1 + else: + nchannel = len(pic.mode) + img = img.view(pic.size[1], pic.size[0], nchannel) + + img = img.transpose(0, 1).transpose(0, 2).contiguous() + + if isinstance(img, torch.ByteTensor): + return img.float() + else: + return img + + +class DDAD(Dataset): + def __init__(self, data_dir_root, resize_shape): + import glob + + # image paths are of the form /{outleft, depthmap}/*.png + self.image_files = glob.glob(os.path.join(data_dir_root, '*.png')) + self.depth_files = [r.replace("_rgb.png", "_depth.npy") + for r in self.image_files] + self.transform = ToTensor(resize_shape) + + def __getitem__(self, idx): + + image_path = self.image_files[idx] + depth_path = self.depth_files[idx] + + image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0 + depth = np.load(depth_path) # meters + + # depth[depth > 8] = -1 + depth = depth[..., None] + + sample = dict(image=image, depth=depth) + sample = self.transform(sample) + + if idx == 0: + print(sample["image"].shape) + + return sample + + def __len__(self): + return len(self.image_files) + + +def get_ddad_loader(data_dir_root, resize_shape, batch_size=1, **kwargs): + dataset = DDAD(data_dir_root, resize_shape) + return DataLoader(dataset, batch_size, **kwargs) diff --git a/dzoedepth/data/diml_indoor_test.py b/dzoedepth/data/diml_indoor_test.py new file mode 100644 index 0000000000000000000000000000000000000000..1653a947da1cbdc9fc714d095776ee8ed9dd7c11 --- /dev/null +++ b/dzoedepth/data/diml_indoor_test.py @@ -0,0 +1,125 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import os + +import numpy as np +import torch +from PIL import Image +from torch.utils.data import DataLoader, Dataset +from torchvision import transforms + + +class ToTensor(object): + def __init__(self): + # self.normalize = transforms.Normalize( + # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + self.normalize = lambda x : x + self.resize = transforms.Resize((480, 640)) + + def __call__(self, sample): + image, depth = sample['image'], sample['depth'] + image = self.to_tensor(image) + image = self.normalize(image) + depth = self.to_tensor(depth) + + image = self.resize(image) + + return {'image': image, 'depth': depth, 'dataset': "diml_indoor"} + + def to_tensor(self, pic): + + if isinstance(pic, np.ndarray): + img = torch.from_numpy(pic.transpose((2, 0, 1))) + return img + + # # handle PIL Image + if pic.mode == 'I': + img = torch.from_numpy(np.array(pic, np.int32, copy=False)) + elif pic.mode == 'I;16': + img = torch.from_numpy(np.array(pic, np.int16, copy=False)) + else: + img = torch.ByteTensor( + torch.ByteStorage.from_buffer(pic.tobytes())) + # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK + if pic.mode == 'YCbCr': + nchannel = 3 + elif pic.mode == 'I;16': + nchannel = 1 + else: + nchannel = len(pic.mode) + img = img.view(pic.size[1], pic.size[0], nchannel) + + img = img.transpose(0, 1).transpose(0, 2).contiguous() + if isinstance(img, torch.ByteTensor): + return img.float() + else: + return img + + +class DIML_Indoor(Dataset): + def __init__(self, data_dir_root): + import glob + + # image paths are of the form /{HR, LR}//{color, depth_filled}/*.png + self.image_files = glob.glob(os.path.join( + data_dir_root, "LR", '*', 'color', '*.png')) + self.depth_files = [r.replace("color", "depth_filled").replace( + "_c.png", "_depth_filled.png") for r in self.image_files] + self.transform = ToTensor() + + def __getitem__(self, idx): + image_path = self.image_files[idx] + depth_path = self.depth_files[idx] + + image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0 + depth = np.asarray(Image.open(depth_path), + dtype='uint16') / 1000.0 # mm to meters + + # print(np.shape(image)) + # print(np.shape(depth)) + + # depth[depth > 8] = -1 + depth = depth[..., None] + + sample = dict(image=image, depth=depth) + + # return sample + sample = self.transform(sample) + + if idx == 0: + print(sample["image"].shape) + + return sample + + def __len__(self): + return len(self.image_files) + + +def get_diml_indoor_loader(data_dir_root, batch_size=1, **kwargs): + dataset = DIML_Indoor(data_dir_root) + return DataLoader(dataset, batch_size, **kwargs) + +# get_diml_indoor_loader(data_dir_root="datasets/diml/indoor/test/HR") +# get_diml_indoor_loader(data_dir_root="datasets/diml/indoor/test/LR") diff --git a/dzoedepth/data/diml_outdoor_test.py b/dzoedepth/data/diml_outdoor_test.py new file mode 100644 index 0000000000000000000000000000000000000000..9d29b72ceab85f337d4e29c185d606fb9b084867 --- /dev/null +++ b/dzoedepth/data/diml_outdoor_test.py @@ -0,0 +1,114 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import os + +import numpy as np +import torch +from PIL import Image +from torch.utils.data import DataLoader, Dataset +from torchvision import transforms + + +class ToTensor(object): + def __init__(self): + # self.normalize = transforms.Normalize( + # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + self.normalize = lambda x : x + + def __call__(self, sample): + image, depth = sample['image'], sample['depth'] + image = self.to_tensor(image) + image = self.normalize(image) + depth = self.to_tensor(depth) + + return {'image': image, 'depth': depth, 'dataset': "diml_outdoor"} + + def to_tensor(self, pic): + + if isinstance(pic, np.ndarray): + img = torch.from_numpy(pic.transpose((2, 0, 1))) + return img + + # # handle PIL Image + if pic.mode == 'I': + img = torch.from_numpy(np.array(pic, np.int32, copy=False)) + elif pic.mode == 'I;16': + img = torch.from_numpy(np.array(pic, np.int16, copy=False)) + else: + img = torch.ByteTensor( + torch.ByteStorage.from_buffer(pic.tobytes())) + # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK + if pic.mode == 'YCbCr': + nchannel = 3 + elif pic.mode == 'I;16': + nchannel = 1 + else: + nchannel = len(pic.mode) + img = img.view(pic.size[1], pic.size[0], nchannel) + + img = img.transpose(0, 1).transpose(0, 2).contiguous() + if isinstance(img, torch.ByteTensor): + return img.float() + else: + return img + + +class DIML_Outdoor(Dataset): + def __init__(self, data_dir_root): + import glob + + # image paths are of the form /{outleft, depthmap}/*.png + self.image_files = glob.glob(os.path.join( + data_dir_root, "*", 'outleft', '*.png')) + self.depth_files = [r.replace("outleft", "depthmap") + for r in self.image_files] + self.transform = ToTensor() + + def __getitem__(self, idx): + image_path = self.image_files[idx] + depth_path = self.depth_files[idx] + + image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0 + depth = np.asarray(Image.open(depth_path), + dtype='uint16') / 1000.0 # mm to meters + + # depth[depth > 8] = -1 + depth = depth[..., None] + + sample = dict(image=image, depth=depth, dataset="diml_outdoor") + + # return sample + return self.transform(sample) + + def __len__(self): + return len(self.image_files) + + +def get_diml_outdoor_loader(data_dir_root, batch_size=1, **kwargs): + dataset = DIML_Outdoor(data_dir_root) + return DataLoader(dataset, batch_size, **kwargs) + +# get_diml_outdoor_loader(data_dir_root="datasets/diml/outdoor/test/HR") +# get_diml_outdoor_loader(data_dir_root="datasets/diml/outdoor/test/LR") diff --git a/dzoedepth/data/diode.py b/dzoedepth/data/diode.py new file mode 100644 index 0000000000000000000000000000000000000000..5f5d0f49562cc001b4f936f73773240d8b83cfac --- /dev/null +++ b/dzoedepth/data/diode.py @@ -0,0 +1,125 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import os + +import numpy as np +import torch +from PIL import Image +from torch.utils.data import DataLoader, Dataset +from torchvision import transforms + + +class ToTensor(object): + def __init__(self): + # self.normalize = transforms.Normalize( + # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + self.normalize = lambda x : x + self.resize = transforms.Resize(480) + + def __call__(self, sample): + image, depth = sample['image'], sample['depth'] + image = self.to_tensor(image) + image = self.normalize(image) + depth = self.to_tensor(depth) + + image = self.resize(image) + + return {'image': image, 'depth': depth, 'dataset': "diode"} + + def to_tensor(self, pic): + + if isinstance(pic, np.ndarray): + img = torch.from_numpy(pic.transpose((2, 0, 1))) + return img + + # # handle PIL Image + if pic.mode == 'I': + img = torch.from_numpy(np.array(pic, np.int32, copy=False)) + elif pic.mode == 'I;16': + img = torch.from_numpy(np.array(pic, np.int16, copy=False)) + else: + img = torch.ByteTensor( + torch.ByteStorage.from_buffer(pic.tobytes())) + # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK + if pic.mode == 'YCbCr': + nchannel = 3 + elif pic.mode == 'I;16': + nchannel = 1 + else: + nchannel = len(pic.mode) + img = img.view(pic.size[1], pic.size[0], nchannel) + + img = img.transpose(0, 1).transpose(0, 2).contiguous() + + if isinstance(img, torch.ByteTensor): + return img.float() + else: + return img + + +class DIODE(Dataset): + def __init__(self, data_dir_root): + import glob + + # image paths are of the form /scene_#/scan_#/*.png + self.image_files = glob.glob( + os.path.join(data_dir_root, '*', '*', '*.png')) + self.depth_files = [r.replace(".png", "_depth.npy") + for r in self.image_files] + self.depth_mask_files = [ + r.replace(".png", "_depth_mask.npy") for r in self.image_files] + self.transform = ToTensor() + + def __getitem__(self, idx): + image_path = self.image_files[idx] + depth_path = self.depth_files[idx] + depth_mask_path = self.depth_mask_files[idx] + + image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0 + depth = np.load(depth_path) # in meters + valid = np.load(depth_mask_path) # binary + + # depth[depth > 8] = -1 + # depth = depth[..., None] + + sample = dict(image=image, depth=depth, valid=valid) + + # return sample + sample = self.transform(sample) + + if idx == 0: + print(sample["image"].shape) + + return sample + + def __len__(self): + return len(self.image_files) + + +def get_diode_loader(data_dir_root, batch_size=1, **kwargs): + dataset = DIODE(data_dir_root) + return DataLoader(dataset, batch_size, **kwargs) + +# get_diode_loader(data_dir_root="datasets/diode/val/outdoor") diff --git a/dzoedepth/data/hypersim.py b/dzoedepth/data/hypersim.py new file mode 100644 index 0000000000000000000000000000000000000000..55393293b31e89007fbbfbf33bafd23cd4b1c7dd --- /dev/null +++ b/dzoedepth/data/hypersim.py @@ -0,0 +1,138 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import glob +import os + +import h5py +import numpy as np +import torch +from PIL import Image +from torch.utils.data import DataLoader, Dataset +from torchvision import transforms + + +def hypersim_distance_to_depth(npyDistance): + intWidth, intHeight, fltFocal = 1024, 768, 886.81 + + npyImageplaneX = np.linspace((-0.5 * intWidth) + 0.5, (0.5 * intWidth) - 0.5, intWidth).reshape( + 1, intWidth).repeat(intHeight, 0).astype(np.float32)[:, :, None] + npyImageplaneY = np.linspace((-0.5 * intHeight) + 0.5, (0.5 * intHeight) - 0.5, + intHeight).reshape(intHeight, 1).repeat(intWidth, 1).astype(np.float32)[:, :, None] + npyImageplaneZ = np.full([intHeight, intWidth, 1], fltFocal, np.float32) + npyImageplane = np.concatenate( + [npyImageplaneX, npyImageplaneY, npyImageplaneZ], 2) + + npyDepth = npyDistance / np.linalg.norm(npyImageplane, 2, 2) * fltFocal + return npyDepth + + +class ToTensor(object): + def __init__(self): + # self.normalize = transforms.Normalize( + # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + self.normalize = lambda x: x + self.resize = transforms.Resize((480, 640)) + + def __call__(self, sample): + image, depth = sample['image'], sample['depth'] + image = self.to_tensor(image) + image = self.normalize(image) + depth = self.to_tensor(depth) + + image = self.resize(image) + + return {'image': image, 'depth': depth, 'dataset': "hypersim"} + + def to_tensor(self, pic): + + if isinstance(pic, np.ndarray): + img = torch.from_numpy(pic.transpose((2, 0, 1))) + return img + + # # handle PIL Image + if pic.mode == 'I': + img = torch.from_numpy(np.array(pic, np.int32, copy=False)) + elif pic.mode == 'I;16': + img = torch.from_numpy(np.array(pic, np.int16, copy=False)) + else: + img = torch.ByteTensor( + torch.ByteStorage.from_buffer(pic.tobytes())) + # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK + if pic.mode == 'YCbCr': + nchannel = 3 + elif pic.mode == 'I;16': + nchannel = 1 + else: + nchannel = len(pic.mode) + img = img.view(pic.size[1], pic.size[0], nchannel) + + img = img.transpose(0, 1).transpose(0, 2).contiguous() + if isinstance(img, torch.ByteTensor): + return img.float() + else: + return img + + +class HyperSim(Dataset): + def __init__(self, data_dir_root): + # image paths are of the form //images/scene_cam_#_final_preview/*.tonemap.jpg + # depth paths are of the form //images/scene_cam_#_final_preview/*.depth_meters.hdf5 + self.image_files = glob.glob(os.path.join( + data_dir_root, '*', 'images', 'scene_cam_*_final_preview', '*.tonemap.jpg')) + self.depth_files = [r.replace("_final_preview", "_geometry_hdf5").replace( + ".tonemap.jpg", ".depth_meters.hdf5") for r in self.image_files] + self.transform = ToTensor() + + def __getitem__(self, idx): + image_path = self.image_files[idx] + depth_path = self.depth_files[idx] + + image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0 + + # depth from hdf5 + depth_fd = h5py.File(depth_path, "r") + # in meters (Euclidean distance) + distance_meters = np.array(depth_fd['dataset']) + depth = hypersim_distance_to_depth( + distance_meters) # in meters (planar depth) + + # depth[depth > 8] = -1 + depth = depth[..., None] + + sample = dict(image=image, depth=depth) + sample = self.transform(sample) + + if idx == 0: + print(sample["image"].shape) + + return sample + + def __len__(self): + return len(self.image_files) + + +def get_hypersim_loader(data_dir_root, batch_size=1, **kwargs): + dataset = HyperSim(data_dir_root) + return DataLoader(dataset, batch_size, **kwargs) diff --git a/dzoedepth/data/ibims.py b/dzoedepth/data/ibims.py new file mode 100644 index 0000000000000000000000000000000000000000..af461a745b312d205e0f7619a4554ae24f471398 --- /dev/null +++ b/dzoedepth/data/ibims.py @@ -0,0 +1,81 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import os + +import numpy as np +import torch +from PIL import Image +from torch.utils.data import DataLoader, Dataset +from torchvision import transforms as T + + +class iBims(Dataset): + def __init__(self, config): + root_folder = config.ibims_root + with open(os.path.join(root_folder, "imagelist.txt"), 'r') as f: + imglist = f.read().split() + + samples = [] + for basename in imglist: + img_path = os.path.join(root_folder, 'rgb', basename + ".png") + depth_path = os.path.join(root_folder, 'depth', basename + ".png") + valid_mask_path = os.path.join( + root_folder, 'mask_invalid', basename+".png") + transp_mask_path = os.path.join( + root_folder, 'mask_transp', basename+".png") + + samples.append( + (img_path, depth_path, valid_mask_path, transp_mask_path)) + + self.samples = samples + # self.normalize = T.Normalize( + # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + self.normalize = lambda x : x + + def __getitem__(self, idx): + img_path, depth_path, valid_mask_path, transp_mask_path = self.samples[idx] + + img = np.asarray(Image.open(img_path), dtype=np.float32) / 255.0 + depth = np.asarray(Image.open(depth_path), + dtype=np.uint16).astype('float')*50.0/65535 + + mask_valid = np.asarray(Image.open(valid_mask_path)) + mask_transp = np.asarray(Image.open(transp_mask_path)) + + # depth = depth * mask_valid * mask_transp + depth = np.where(mask_valid * mask_transp, depth, -1) + + img = torch.from_numpy(img).permute(2, 0, 1) + img = self.normalize(img) + depth = torch.from_numpy(depth).unsqueeze(0) + return dict(image=img, depth=depth, image_path=img_path, depth_path=depth_path, dataset='ibims') + + def __len__(self): + return len(self.samples) + + +def get_ibims_loader(config, batch_size=1, **kwargs): + dataloader = DataLoader(iBims(config), batch_size=batch_size, **kwargs) + return dataloader diff --git a/dzoedepth/data/preprocess.py b/dzoedepth/data/preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..fde0bc861eb600223b455203b1aecf66e7abe185 --- /dev/null +++ b/dzoedepth/data/preprocess.py @@ -0,0 +1,154 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import numpy as np +from dataclasses import dataclass +from typing import Tuple, List + +# dataclass to store the crop parameters +@dataclass +class CropParams: + top: int + bottom: int + left: int + right: int + + + +def get_border_params(rgb_image, tolerance=0.1, cut_off=20, value=0, level_diff_threshold=5, channel_axis=-1, min_border=5) -> CropParams: + gray_image = np.mean(rgb_image, axis=channel_axis) + h, w = gray_image.shape + + + def num_value_pixels(arr): + return np.sum(np.abs(arr - value) < level_diff_threshold) + + def is_above_tolerance(arr, total_pixels): + return (num_value_pixels(arr) / total_pixels) > tolerance + + # Crop top border until number of value pixels become below tolerance + top = min_border + while is_above_tolerance(gray_image[top, :], w) and top < h-1: + top += 1 + if top > cut_off: + break + + # Crop bottom border until number of value pixels become below tolerance + bottom = h - min_border + while is_above_tolerance(gray_image[bottom, :], w) and bottom > 0: + bottom -= 1 + if h - bottom > cut_off: + break + + # Crop left border until number of value pixels become below tolerance + left = min_border + while is_above_tolerance(gray_image[:, left], h) and left < w-1: + left += 1 + if left > cut_off: + break + + # Crop right border until number of value pixels become below tolerance + right = w - min_border + while is_above_tolerance(gray_image[:, right], h) and right > 0: + right -= 1 + if w - right > cut_off: + break + + + return CropParams(top, bottom, left, right) + + +def get_white_border(rgb_image, value=255, **kwargs) -> CropParams: + """Crops the white border of the RGB. + + Args: + rgb: RGB image, shape (H, W, 3). + Returns: + Crop parameters. + """ + if value == 255: + # assert range of values in rgb image is [0, 255] + assert np.max(rgb_image) <= 255 and np.min(rgb_image) >= 0, "RGB image values are not in range [0, 255]." + assert rgb_image.max() > 1, "RGB image values are not in range [0, 255]." + elif value == 1: + # assert range of values in rgb image is [0, 1] + assert np.max(rgb_image) <= 1 and np.min(rgb_image) >= 0, "RGB image values are not in range [0, 1]." + + return get_border_params(rgb_image, value=value, **kwargs) + +def get_black_border(rgb_image, **kwargs) -> CropParams: + """Crops the black border of the RGB. + + Args: + rgb: RGB image, shape (H, W, 3). + + Returns: + Crop parameters. + """ + + return get_border_params(rgb_image, value=0, **kwargs) + +def crop_image(image: np.ndarray, crop_params: CropParams) -> np.ndarray: + """Crops the image according to the crop parameters. + + Args: + image: RGB or depth image, shape (H, W, 3) or (H, W). + crop_params: Crop parameters. + + Returns: + Cropped image. + """ + return image[crop_params.top:crop_params.bottom, crop_params.left:crop_params.right] + +def crop_images(*images: np.ndarray, crop_params: CropParams) -> Tuple[np.ndarray]: + """Crops the images according to the crop parameters. + + Args: + images: RGB or depth images, shape (H, W, 3) or (H, W). + crop_params: Crop parameters. + + Returns: + Cropped images. + """ + return tuple(crop_image(image, crop_params) for image in images) + +def crop_black_or_white_border(rgb_image, *other_images: np.ndarray, tolerance=0.1, cut_off=20, level_diff_threshold=5) -> Tuple[np.ndarray]: + """Crops the white and black border of the RGB and depth images. + + Args: + rgb: RGB image, shape (H, W, 3). This image is used to determine the border. + other_images: The other images to crop according to the border of the RGB image. + Returns: + Cropped RGB and other images. + """ + # crop black border + crop_params = get_black_border(rgb_image, tolerance=tolerance, cut_off=cut_off, level_diff_threshold=level_diff_threshold) + cropped_images = crop_images(rgb_image, *other_images, crop_params=crop_params) + + # crop white border + crop_params = get_white_border(cropped_images[0], tolerance=tolerance, cut_off=cut_off, level_diff_threshold=level_diff_threshold) + cropped_images = crop_images(*cropped_images, crop_params=crop_params) + + return cropped_images + \ No newline at end of file diff --git a/dzoedepth/data/sun_rgbd_loader.py b/dzoedepth/data/sun_rgbd_loader.py new file mode 100644 index 0000000000000000000000000000000000000000..bbe2a3762c9e9bf564b61a386d48319b5e92baa8 --- /dev/null +++ b/dzoedepth/data/sun_rgbd_loader.py @@ -0,0 +1,106 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import os + +import numpy as np +import torch +from PIL import Image +from torch.utils.data import DataLoader, Dataset +from torchvision import transforms + + +class ToTensor(object): + def __init__(self): + # self.normalize = transforms.Normalize( + # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + self.normalize = lambda x : x + + def __call__(self, sample): + image, depth = sample['image'], sample['depth'] + image = self.to_tensor(image) + image = self.normalize(image) + depth = self.to_tensor(depth) + + return {'image': image, 'depth': depth, 'dataset': "sunrgbd"} + + def to_tensor(self, pic): + + if isinstance(pic, np.ndarray): + img = torch.from_numpy(pic.transpose((2, 0, 1))) + return img + + # # handle PIL Image + if pic.mode == 'I': + img = torch.from_numpy(np.array(pic, np.int32, copy=False)) + elif pic.mode == 'I;16': + img = torch.from_numpy(np.array(pic, np.int16, copy=False)) + else: + img = torch.ByteTensor( + torch.ByteStorage.from_buffer(pic.tobytes())) + # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK + if pic.mode == 'YCbCr': + nchannel = 3 + elif pic.mode == 'I;16': + nchannel = 1 + else: + nchannel = len(pic.mode) + img = img.view(pic.size[1], pic.size[0], nchannel) + + img = img.transpose(0, 1).transpose(0, 2).contiguous() + if isinstance(img, torch.ByteTensor): + return img.float() + else: + return img + + +class SunRGBD(Dataset): + def __init__(self, data_dir_root): + # test_file_dirs = loadmat(train_test_file)['alltest'].squeeze() + # all_test = [t[0].replace("/n/fs/sun3d/data/", "") for t in test_file_dirs] + # self.all_test = [os.path.join(data_dir_root, t) for t in all_test] + import glob + self.image_files = glob.glob( + os.path.join(data_dir_root, 'rgb', 'rgb', '*')) + self.depth_files = [ + r.replace("rgb/rgb", "gt/gt").replace("jpg", "png") for r in self.image_files] + self.transform = ToTensor() + + def __getitem__(self, idx): + image_path = self.image_files[idx] + depth_path = self.depth_files[idx] + + image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0 + depth = np.asarray(Image.open(depth_path), dtype='uint16') / 1000.0 + depth[depth > 8] = -1 + depth = depth[..., None] + return self.transform(dict(image=image, depth=depth)) + + def __len__(self): + return len(self.image_files) + + +def get_sunrgbd_loader(data_dir_root, batch_size=1, **kwargs): + dataset = SunRGBD(data_dir_root) + return DataLoader(dataset, batch_size, **kwargs) diff --git a/dzoedepth/data/transforms.py b/dzoedepth/data/transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..a2c637b025e5bea72706c5aa0892f14aa1935669 --- /dev/null +++ b/dzoedepth/data/transforms.py @@ -0,0 +1,481 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import math +import random + +import cv2 +import numpy as np + + +class RandomFliplr(object): + """Horizontal flip of the sample with given probability. + """ + + def __init__(self, probability=0.5): + """Init. + + Args: + probability (float, optional): Flip probability. Defaults to 0.5. + """ + self.__probability = probability + + def __call__(self, sample): + prob = random.random() + + if prob < self.__probability: + for k, v in sample.items(): + if len(v.shape) >= 2: + sample[k] = np.fliplr(v).copy() + + return sample + + +def apply_min_size(sample, size, image_interpolation_method=cv2.INTER_AREA): + """Rezise the sample to ensure the given size. Keeps aspect ratio. + + Args: + sample (dict): sample + size (tuple): image size + + Returns: + tuple: new size + """ + shape = list(sample["disparity"].shape) + + if shape[0] >= size[0] and shape[1] >= size[1]: + return sample + + scale = [0, 0] + scale[0] = size[0] / shape[0] + scale[1] = size[1] / shape[1] + + scale = max(scale) + + shape[0] = math.ceil(scale * shape[0]) + shape[1] = math.ceil(scale * shape[1]) + + # resize + sample["image"] = cv2.resize( + sample["image"], tuple(shape[::-1]), interpolation=image_interpolation_method + ) + + sample["disparity"] = cv2.resize( + sample["disparity"], tuple(shape[::-1]), interpolation=cv2.INTER_NEAREST + ) + sample["mask"] = cv2.resize( + sample["mask"].astype(np.float32), + tuple(shape[::-1]), + interpolation=cv2.INTER_NEAREST, + ) + sample["mask"] = sample["mask"].astype(bool) + + return tuple(shape) + + +class RandomCrop(object): + """Get a random crop of the sample with the given size (width, height). + """ + + def __init__( + self, + width, + height, + resize_if_needed=False, + image_interpolation_method=cv2.INTER_AREA, + ): + """Init. + + Args: + width (int): output width + height (int): output height + resize_if_needed (bool, optional): If True, sample might be upsampled to ensure + that a crop of size (width, height) is possbile. Defaults to False. + """ + self.__size = (height, width) + self.__resize_if_needed = resize_if_needed + self.__image_interpolation_method = image_interpolation_method + + def __call__(self, sample): + + shape = sample["disparity"].shape + + if self.__size[0] > shape[0] or self.__size[1] > shape[1]: + if self.__resize_if_needed: + shape = apply_min_size( + sample, self.__size, self.__image_interpolation_method + ) + else: + raise Exception( + "Output size {} bigger than input size {}.".format( + self.__size, shape + ) + ) + + offset = ( + np.random.randint(shape[0] - self.__size[0] + 1), + np.random.randint(shape[1] - self.__size[1] + 1), + ) + + for k, v in sample.items(): + if k == "code" or k == "basis": + continue + + if len(sample[k].shape) >= 2: + sample[k] = v[ + offset[0]: offset[0] + self.__size[0], + offset[1]: offset[1] + self.__size[1], + ] + + return sample + + +class Resize(object): + """Resize sample to given size (width, height). + """ + + def __init__( + self, + width, + height, + resize_target=True, + keep_aspect_ratio=False, + ensure_multiple_of=1, + resize_method="lower_bound", + image_interpolation_method=cv2.INTER_AREA, + letter_box=False, + ): + """Init. + + Args: + width (int): desired output width + height (int): desired output height + resize_target (bool, optional): + True: Resize the full sample (image, mask, target). + False: Resize image only. + Defaults to True. + keep_aspect_ratio (bool, optional): + True: Keep the aspect ratio of the input sample. + Output sample might not have the given width and height, and + resize behaviour depends on the parameter 'resize_method'. + Defaults to False. + ensure_multiple_of (int, optional): + Output width and height is constrained to be multiple of this parameter. + Defaults to 1. + resize_method (str, optional): + "lower_bound": Output will be at least as large as the given size. + "upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.) + "minimal": Scale as least as possible. (Output size might be smaller than given size.) + Defaults to "lower_bound". + """ + self.__width = width + self.__height = height + + self.__resize_target = resize_target + self.__keep_aspect_ratio = keep_aspect_ratio + self.__multiple_of = ensure_multiple_of + self.__resize_method = resize_method + self.__image_interpolation_method = image_interpolation_method + self.__letter_box = letter_box + + def constrain_to_multiple_of(self, x, min_val=0, max_val=None): + y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int) + + if max_val is not None and y > max_val: + y = (np.floor(x / self.__multiple_of) + * self.__multiple_of).astype(int) + + if y < min_val: + y = (np.ceil(x / self.__multiple_of) + * self.__multiple_of).astype(int) + + return y + + def get_size(self, width, height): + # determine new height and width + scale_height = self.__height / height + scale_width = self.__width / width + + if self.__keep_aspect_ratio: + if self.__resize_method == "lower_bound": + # scale such that output size is lower bound + if scale_width > scale_height: + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + elif self.__resize_method == "upper_bound": + # scale such that output size is upper bound + if scale_width < scale_height: + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + elif self.__resize_method == "minimal": + # scale as least as possbile + if abs(1 - scale_width) < abs(1 - scale_height): + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + else: + raise ValueError( + f"resize_method {self.__resize_method} not implemented" + ) + + if self.__resize_method == "lower_bound": + new_height = self.constrain_to_multiple_of( + scale_height * height, min_val=self.__height + ) + new_width = self.constrain_to_multiple_of( + scale_width * width, min_val=self.__width + ) + elif self.__resize_method == "upper_bound": + new_height = self.constrain_to_multiple_of( + scale_height * height, max_val=self.__height + ) + new_width = self.constrain_to_multiple_of( + scale_width * width, max_val=self.__width + ) + elif self.__resize_method == "minimal": + new_height = self.constrain_to_multiple_of(scale_height * height) + new_width = self.constrain_to_multiple_of(scale_width * width) + else: + raise ValueError( + f"resize_method {self.__resize_method} not implemented") + + return (new_width, new_height) + + def make_letter_box(self, sample): + top = bottom = (self.__height - sample.shape[0]) // 2 + left = right = (self.__width - sample.shape[1]) // 2 + sample = cv2.copyMakeBorder( + sample, top, bottom, left, right, cv2.BORDER_CONSTANT, None, 0) + return sample + + def __call__(self, sample): + width, height = self.get_size( + sample["image"].shape[1], sample["image"].shape[0] + ) + + # resize sample + sample["image"] = cv2.resize( + sample["image"], + (width, height), + interpolation=self.__image_interpolation_method, + ) + + if self.__letter_box: + sample["image"] = self.make_letter_box(sample["image"]) + + if self.__resize_target: + if "disparity" in sample: + sample["disparity"] = cv2.resize( + sample["disparity"], + (width, height), + interpolation=cv2.INTER_NEAREST, + ) + + if self.__letter_box: + sample["disparity"] = self.make_letter_box( + sample["disparity"]) + + if "depth" in sample: + sample["depth"] = cv2.resize( + sample["depth"], (width, + height), interpolation=cv2.INTER_NEAREST + ) + + if self.__letter_box: + sample["depth"] = self.make_letter_box(sample["depth"]) + + sample["mask"] = cv2.resize( + sample["mask"].astype(np.float32), + (width, height), + interpolation=cv2.INTER_NEAREST, + ) + + if self.__letter_box: + sample["mask"] = self.make_letter_box(sample["mask"]) + + sample["mask"] = sample["mask"].astype(bool) + + return sample + + +class ResizeFixed(object): + def __init__(self, size): + self.__size = size + + def __call__(self, sample): + sample["image"] = cv2.resize( + sample["image"], self.__size[::-1], interpolation=cv2.INTER_LINEAR + ) + + sample["disparity"] = cv2.resize( + sample["disparity"], self.__size[::- + 1], interpolation=cv2.INTER_NEAREST + ) + + sample["mask"] = cv2.resize( + sample["mask"].astype(np.float32), + self.__size[::-1], + interpolation=cv2.INTER_NEAREST, + ) + sample["mask"] = sample["mask"].astype(bool) + + return sample + + +class Rescale(object): + """Rescale target values to the interval [0, max_val]. + If input is constant, values are set to max_val / 2. + """ + + def __init__(self, max_val=1.0, use_mask=True): + """Init. + + Args: + max_val (float, optional): Max output value. Defaults to 1.0. + use_mask (bool, optional): Only operate on valid pixels (mask == True). Defaults to True. + """ + self.__max_val = max_val + self.__use_mask = use_mask + + def __call__(self, sample): + disp = sample["disparity"] + + if self.__use_mask: + mask = sample["mask"] + else: + mask = np.ones_like(disp, dtype=bool) + + if np.sum(mask) == 0: + return sample + + min_val = np.min(disp[mask]) + max_val = np.max(disp[mask]) + + if max_val > min_val: + sample["disparity"][mask] = ( + (disp[mask] - min_val) / (max_val - min_val) * self.__max_val + ) + else: + sample["disparity"][mask] = np.ones_like( + disp[mask]) * self.__max_val / 2.0 + + return sample + + +# mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] +class NormalizeImage(object): + """Normlize image by given mean and std. + """ + + def __init__(self, mean, std): + self.__mean = mean + self.__std = std + + def __call__(self, sample): + sample["image"] = (sample["image"] - self.__mean) / self.__std + + return sample + + +class DepthToDisparity(object): + """Convert depth to disparity. Removes depth from sample. + """ + + def __init__(self, eps=1e-4): + self.__eps = eps + + def __call__(self, sample): + assert "depth" in sample + + sample["mask"][sample["depth"] < self.__eps] = False + + sample["disparity"] = np.zeros_like(sample["depth"]) + sample["disparity"][sample["depth"] >= self.__eps] = ( + 1.0 / sample["depth"][sample["depth"] >= self.__eps] + ) + + del sample["depth"] + + return sample + + +class DisparityToDepth(object): + """Convert disparity to depth. Removes disparity from sample. + """ + + def __init__(self, eps=1e-4): + self.__eps = eps + + def __call__(self, sample): + assert "disparity" in sample + + disp = np.abs(sample["disparity"]) + sample["mask"][disp < self.__eps] = False + + # print(sample["disparity"]) + # print(sample["mask"].sum()) + # exit() + + sample["depth"] = np.zeros_like(disp) + sample["depth"][disp >= self.__eps] = ( + 1.0 / disp[disp >= self.__eps] + ) + + del sample["disparity"] + + return sample + + +class PrepareForNet(object): + """Prepare sample for usage as network input. + """ + + def __init__(self): + pass + + def __call__(self, sample): + image = np.transpose(sample["image"], (2, 0, 1)) + sample["image"] = np.ascontiguousarray(image).astype(np.float32) + + if "mask" in sample: + sample["mask"] = sample["mask"].astype(np.float32) + sample["mask"] = np.ascontiguousarray(sample["mask"]) + + if "disparity" in sample: + disparity = sample["disparity"].astype(np.float32) + sample["disparity"] = np.ascontiguousarray(disparity) + + if "depth" in sample: + depth = sample["depth"].astype(np.float32) + sample["depth"] = np.ascontiguousarray(depth) + + return sample diff --git a/dzoedepth/data/vkitti.py b/dzoedepth/data/vkitti.py new file mode 100644 index 0000000000000000000000000000000000000000..f4ee8b5e4ea3a8e0ab3f1fccab338eb39b1d29b9 --- /dev/null +++ b/dzoedepth/data/vkitti.py @@ -0,0 +1,151 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import torch +from torch.utils.data import Dataset, DataLoader +from torchvision import transforms +import os + +from PIL import Image +import numpy as np +import cv2 + + +class ToTensor(object): + def __init__(self): + self.normalize = transforms.Normalize( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + # self.resize = transforms.Resize((375, 1242)) + + def __call__(self, sample): + image, depth = sample['image'], sample['depth'] + + image = self.to_tensor(image) + image = self.normalize(image) + depth = self.to_tensor(depth) + + # image = self.resize(image) + + return {'image': image, 'depth': depth, 'dataset': "vkitti"} + + def to_tensor(self, pic): + + if isinstance(pic, np.ndarray): + img = torch.from_numpy(pic.transpose((2, 0, 1))) + return img + + # # handle PIL Image + if pic.mode == 'I': + img = torch.from_numpy(np.array(pic, np.int32, copy=False)) + elif pic.mode == 'I;16': + img = torch.from_numpy(np.array(pic, np.int16, copy=False)) + else: + img = torch.ByteTensor( + torch.ByteStorage.from_buffer(pic.tobytes())) + # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK + if pic.mode == 'YCbCr': + nchannel = 3 + elif pic.mode == 'I;16': + nchannel = 1 + else: + nchannel = len(pic.mode) + img = img.view(pic.size[1], pic.size[0], nchannel) + + img = img.transpose(0, 1).transpose(0, 2).contiguous() + if isinstance(img, torch.ByteTensor): + return img.float() + else: + return img + + +class VKITTI(Dataset): + def __init__(self, data_dir_root, do_kb_crop=True): + import glob + # image paths are of the form /{HR, LR}//{color, depth_filled}/*.png + self.image_files = glob.glob(os.path.join( + data_dir_root, "test_color", '*.png')) + self.depth_files = [r.replace("test_color", "test_depth") + for r in self.image_files] + self.do_kb_crop = True + self.transform = ToTensor() + + def __getitem__(self, idx): + image_path = self.image_files[idx] + depth_path = self.depth_files[idx] + + image = Image.open(image_path) + depth = Image.open(depth_path) + depth = cv2.imread(depth_path, cv2.IMREAD_ANYCOLOR | + cv2.IMREAD_ANYDEPTH) + print("dpeth min max", depth.min(), depth.max()) + + # print(np.shape(image)) + # print(np.shape(depth)) + + # depth[depth > 8] = -1 + + if self.do_kb_crop and False: + height = image.height + width = image.width + top_margin = int(height - 352) + left_margin = int((width - 1216) / 2) + depth = depth.crop( + (left_margin, top_margin, left_margin + 1216, top_margin + 352)) + image = image.crop( + (left_margin, top_margin, left_margin + 1216, top_margin + 352)) + # uv = uv[:, top_margin:top_margin + 352, left_margin:left_margin + 1216] + + image = np.asarray(image, dtype=np.float32) / 255.0 + # depth = np.asarray(depth, dtype=np.uint16) /1. + depth = depth[..., None] + sample = dict(image=image, depth=depth) + + # return sample + sample = self.transform(sample) + + if idx == 0: + print(sample["image"].shape) + + return sample + + def __len__(self): + return len(self.image_files) + + +def get_vkitti_loader(data_dir_root, batch_size=1, **kwargs): + dataset = VKITTI(data_dir_root) + return DataLoader(dataset, batch_size, **kwargs) + + +if __name__ == "__main__": + loader = get_vkitti_loader( + data_dir_root="/home/bhatsf/shortcuts/datasets/vkitti_test") + print("Total files", len(loader.dataset)) + for i, sample in enumerate(loader): + print(sample["image"].shape) + print(sample["depth"].shape) + print(sample["dataset"]) + print(sample['depth'].min(), sample['depth'].max()) + if i > 5: + break diff --git a/dzoedepth/data/vkitti2.py b/dzoedepth/data/vkitti2.py new file mode 100644 index 0000000000000000000000000000000000000000..693693fcb581e046d15e505f00210a670d1a1e66 --- /dev/null +++ b/dzoedepth/data/vkitti2.py @@ -0,0 +1,187 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import os + +import cv2 +import numpy as np +import torch +from PIL import Image +from torch.utils.data import DataLoader, Dataset +from torchvision import transforms + + +class ToTensor(object): + def __init__(self): + # self.normalize = transforms.Normalize( + # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + self.normalize = lambda x: x + # self.resize = transforms.Resize((375, 1242)) + + def __call__(self, sample): + image, depth = sample['image'], sample['depth'] + + image = self.to_tensor(image) + image = self.normalize(image) + depth = self.to_tensor(depth) + + # image = self.resize(image) + + return {'image': image, 'depth': depth, 'dataset': "vkitti"} + + def to_tensor(self, pic): + + if isinstance(pic, np.ndarray): + img = torch.from_numpy(pic.transpose((2, 0, 1))) + return img + + # # handle PIL Image + if pic.mode == 'I': + img = torch.from_numpy(np.array(pic, np.int32, copy=False)) + elif pic.mode == 'I;16': + img = torch.from_numpy(np.array(pic, np.int16, copy=False)) + else: + img = torch.ByteTensor( + torch.ByteStorage.from_buffer(pic.tobytes())) + # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK + if pic.mode == 'YCbCr': + nchannel = 3 + elif pic.mode == 'I;16': + nchannel = 1 + else: + nchannel = len(pic.mode) + img = img.view(pic.size[1], pic.size[0], nchannel) + + img = img.transpose(0, 1).transpose(0, 2).contiguous() + if isinstance(img, torch.ByteTensor): + return img.float() + else: + return img + + +class VKITTI2(Dataset): + def __init__(self, data_dir_root, do_kb_crop=True, split="test"): + import glob + + # image paths are of the form /rgb///frames//Camera<0,1>/rgb_{}.jpg + self.image_files = glob.glob(os.path.join( + data_dir_root, "rgb", "**", "frames", "rgb", "Camera_0", '*.jpg'), recursive=True) + self.depth_files = [r.replace("/rgb/", "/depth/").replace( + "rgb_", "depth_").replace(".jpg", ".png") for r in self.image_files] + self.do_kb_crop = True + self.transform = ToTensor() + + # If train test split is not created, then create one. + # Split is such that 8% of the frames from each scene are used for testing. + if not os.path.exists(os.path.join(data_dir_root, "train.txt")): + import random + scenes = set([os.path.basename(os.path.dirname( + os.path.dirname(os.path.dirname(f)))) for f in self.image_files]) + train_files = [] + test_files = [] + for scene in scenes: + scene_files = [f for f in self.image_files if os.path.basename( + os.path.dirname(os.path.dirname(os.path.dirname(f)))) == scene] + random.shuffle(scene_files) + train_files.extend(scene_files[:int(len(scene_files) * 0.92)]) + test_files.extend(scene_files[int(len(scene_files) * 0.92):]) + with open(os.path.join(data_dir_root, "train.txt"), "w") as f: + f.write("\n".join(train_files)) + with open(os.path.join(data_dir_root, "test.txt"), "w") as f: + f.write("\n".join(test_files)) + + if split == "train": + with open(os.path.join(data_dir_root, "train.txt"), "r") as f: + self.image_files = f.read().splitlines() + self.depth_files = [r.replace("/rgb/", "/depth/").replace( + "rgb_", "depth_").replace(".jpg", ".png") for r in self.image_files] + elif split == "test": + with open(os.path.join(data_dir_root, "test.txt"), "r") as f: + self.image_files = f.read().splitlines() + self.depth_files = [r.replace("/rgb/", "/depth/").replace( + "rgb_", "depth_").replace(".jpg", ".png") for r in self.image_files] + + def __getitem__(self, idx): + image_path = self.image_files[idx] + depth_path = self.depth_files[idx] + + image = Image.open(image_path) + # depth = Image.open(depth_path) + depth = cv2.imread(depth_path, cv2.IMREAD_ANYCOLOR | + cv2.IMREAD_ANYDEPTH) / 100.0 # cm to m + depth = Image.fromarray(depth) + # print("dpeth min max", depth.min(), depth.max()) + + # print(np.shape(image)) + # print(np.shape(depth)) + + if self.do_kb_crop: + if idx == 0: + print("Using KB input crop") + height = image.height + width = image.width + top_margin = int(height - 352) + left_margin = int((width - 1216) / 2) + depth = depth.crop( + (left_margin, top_margin, left_margin + 1216, top_margin + 352)) + image = image.crop( + (left_margin, top_margin, left_margin + 1216, top_margin + 352)) + # uv = uv[:, top_margin:top_margin + 352, left_margin:left_margin + 1216] + + image = np.asarray(image, dtype=np.float32) / 255.0 + # depth = np.asarray(depth, dtype=np.uint16) /1. + depth = np.asarray(depth, dtype=np.float32) / 1. + depth[depth > 80] = -1 + + depth = depth[..., None] + sample = dict(image=image, depth=depth) + + # return sample + sample = self.transform(sample) + + if idx == 0: + print(sample["image"].shape) + + return sample + + def __len__(self): + return len(self.image_files) + + +def get_vkitti2_loader(data_dir_root, batch_size=1, **kwargs): + dataset = VKITTI2(data_dir_root) + return DataLoader(dataset, batch_size, **kwargs) + + +if __name__ == "__main__": + loader = get_vkitti2_loader( + data_dir_root="/home/bhatsf/shortcuts/datasets/vkitti2") + print("Total files", len(loader.dataset)) + for i, sample in enumerate(loader): + print(sample["image"].shape) + print(sample["depth"].shape) + print(sample["dataset"]) + print(sample['depth'].min(), sample['depth'].max()) + if i > 5: + break diff --git a/dzoedepth/models/__init__.py b/dzoedepth/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1ae1a1e4e86d9a5b14586cd006ed43d2bbc9b4a6 --- /dev/null +++ b/dzoedepth/models/__init__.py @@ -0,0 +1,24 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + diff --git a/dzoedepth/models/__pycache__/__init__.cpython-310.pyc b/dzoedepth/models/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c5a6fd80482823de2523fedc38925c3cf748ba74 Binary files /dev/null and b/dzoedepth/models/__pycache__/__init__.cpython-310.pyc differ diff --git a/dzoedepth/models/__pycache__/__init__.cpython-311.pyc b/dzoedepth/models/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e883546e7cbdfaa468dbf630eba602366f7773e2 Binary files /dev/null and b/dzoedepth/models/__pycache__/__init__.cpython-311.pyc differ diff --git a/dzoedepth/models/__pycache__/__init__.cpython-312.pyc b/dzoedepth/models/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e5cc63bad83f8b87b2bb5221c3a923c1a1eb6149 Binary files /dev/null and b/dzoedepth/models/__pycache__/__init__.cpython-312.pyc differ diff --git a/dzoedepth/models/__pycache__/builder.cpython-310.pyc b/dzoedepth/models/__pycache__/builder.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e5f72d3edd4203c95cb54ff9057a7154138b3ff6 Binary files /dev/null and b/dzoedepth/models/__pycache__/builder.cpython-310.pyc differ diff --git a/dzoedepth/models/__pycache__/builder.cpython-311.pyc b/dzoedepth/models/__pycache__/builder.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b48beaa9702452e4fc1df63435436624ab7b0320 Binary files /dev/null and b/dzoedepth/models/__pycache__/builder.cpython-311.pyc differ diff --git a/dzoedepth/models/__pycache__/builder.cpython-312.pyc b/dzoedepth/models/__pycache__/builder.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5ba2951403d4d799a74de2b5db3e13076e7af67c Binary files /dev/null and b/dzoedepth/models/__pycache__/builder.cpython-312.pyc differ diff --git a/dzoedepth/models/__pycache__/depth_model.cpython-310.pyc b/dzoedepth/models/__pycache__/depth_model.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..798fd2542ef51796497a14e624ed32e058e7409d Binary files /dev/null and b/dzoedepth/models/__pycache__/depth_model.cpython-310.pyc differ diff --git a/dzoedepth/models/__pycache__/depth_model.cpython-311.pyc b/dzoedepth/models/__pycache__/depth_model.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9eebe1dff7b855b240616bb07e5b2c803e341046 Binary files /dev/null and b/dzoedepth/models/__pycache__/depth_model.cpython-311.pyc differ diff --git a/dzoedepth/models/__pycache__/depth_model.cpython-312.pyc b/dzoedepth/models/__pycache__/depth_model.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3a6ff88e98b3aae33c8c83edc55ca7a98407a7a9 Binary files /dev/null and b/dzoedepth/models/__pycache__/depth_model.cpython-312.pyc differ diff --git a/dzoedepth/models/base_models/__init__.py b/dzoedepth/models/base_models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1ae1a1e4e86d9a5b14586cd006ed43d2bbc9b4a6 --- /dev/null +++ b/dzoedepth/models/base_models/__init__.py @@ -0,0 +1,24 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + diff --git a/dzoedepth/models/base_models/midas.py b/dzoedepth/models/base_models/midas.py new file mode 100644 index 0000000000000000000000000000000000000000..5f0f92ae473231f3fcc561762c733d636819688c --- /dev/null +++ b/dzoedepth/models/base_models/midas.py @@ -0,0 +1,379 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import torch +import torch.nn as nn +import numpy as np +from torchvision.transforms import Normalize + + +def denormalize(x): + """Reverses the imagenet normalization applied to the input. + + Args: + x (torch.Tensor - shape(N,3,H,W)): input tensor + + Returns: + torch.Tensor - shape(N,3,H,W): Denormalized input + """ + mean = torch.Tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(x.device) + std = torch.Tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(x.device) + return x * std + mean + +def get_activation(name, bank): + def hook(model, input, output): + bank[name] = output + return hook + + +class Resize(object): + """Resize sample to given size (width, height). + """ + + def __init__( + self, + width, + height, + resize_target=True, + keep_aspect_ratio=False, + ensure_multiple_of=1, + resize_method="lower_bound", + ): + """Init. + Args: + width (int): desired output width + height (int): desired output height + resize_target (bool, optional): + True: Resize the full sample (image, mask, target). + False: Resize image only. + Defaults to True. + keep_aspect_ratio (bool, optional): + True: Keep the aspect ratio of the input sample. + Output sample might not have the given width and height, and + resize behaviour depends on the parameter 'resize_method'. + Defaults to False. + ensure_multiple_of (int, optional): + Output width and height is constrained to be multiple of this parameter. + Defaults to 1. + resize_method (str, optional): + "lower_bound": Output will be at least as large as the given size. + "upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.) + "minimal": Scale as least as possible. (Output size might be smaller than given size.) + Defaults to "lower_bound". + """ + print("Params passed to Resize transform:") + print("\twidth: ", width) + print("\theight: ", height) + print("\tresize_target: ", resize_target) + print("\tkeep_aspect_ratio: ", keep_aspect_ratio) + print("\tensure_multiple_of: ", ensure_multiple_of) + print("\tresize_method: ", resize_method) + + self.__width = width + self.__height = height + + self.__keep_aspect_ratio = keep_aspect_ratio + self.__multiple_of = ensure_multiple_of + self.__resize_method = resize_method + + def constrain_to_multiple_of(self, x, min_val=0, max_val=None): + y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int) + + if max_val is not None and y > max_val: + y = (np.floor(x / self.__multiple_of) + * self.__multiple_of).astype(int) + + if y < min_val: + y = (np.ceil(x / self.__multiple_of) + * self.__multiple_of).astype(int) + + return y + + def get_size(self, width, height): + # determine new height and width + scale_height = self.__height / height + scale_width = self.__width / width + + if self.__keep_aspect_ratio: + if self.__resize_method == "lower_bound": + # scale such that output size is lower bound + if scale_width > scale_height: + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + elif self.__resize_method == "upper_bound": + # scale such that output size is upper bound + if scale_width < scale_height: + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + elif self.__resize_method == "minimal": + # scale as least as possbile + if abs(1 - scale_width) < abs(1 - scale_height): + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + else: + raise ValueError( + f"resize_method {self.__resize_method} not implemented" + ) + + if self.__resize_method == "lower_bound": + new_height = self.constrain_to_multiple_of( + scale_height * height, min_val=self.__height + ) + new_width = self.constrain_to_multiple_of( + scale_width * width, min_val=self.__width + ) + elif self.__resize_method == "upper_bound": + new_height = self.constrain_to_multiple_of( + scale_height * height, max_val=self.__height + ) + new_width = self.constrain_to_multiple_of( + scale_width * width, max_val=self.__width + ) + elif self.__resize_method == "minimal": + new_height = self.constrain_to_multiple_of(scale_height * height) + new_width = self.constrain_to_multiple_of(scale_width * width) + else: + raise ValueError( + f"resize_method {self.__resize_method} not implemented") + + return (new_width, new_height) + + def __call__(self, x): + width, height = self.get_size(*x.shape[-2:][::-1]) + return nn.functional.interpolate(x, (int(height), int(width)), mode='bilinear', align_corners=True) + +class PrepForMidas(object): + def __init__(self, resize_mode="minimal", keep_aspect_ratio=True, img_size=384, do_resize=True): + if isinstance(img_size, int): + img_size = (img_size, img_size) + net_h, net_w = img_size + self.normalization = Normalize( + mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + self.resizer = Resize(net_w, net_h, keep_aspect_ratio=keep_aspect_ratio, ensure_multiple_of=32, resize_method=resize_mode) \ + if do_resize else nn.Identity() + + def __call__(self, x): + return self.normalization(self.resizer(x)) + + +class MidasCore(nn.Module): + def __init__(self, midas, trainable=False, fetch_features=True, layer_names=('out_conv', 'l4_rn', 'r4', 'r3', 'r2', 'r1'), freeze_bn=False, keep_aspect_ratio=True, + img_size=384, **kwargs): + """Midas Base model used for multi-scale feature extraction. + + Args: + midas (torch.nn.Module): Midas model. + trainable (bool, optional): Train midas model. Defaults to False. + fetch_features (bool, optional): Extract multi-scale features. Defaults to True. + layer_names (tuple, optional): Layers used for feature extraction. Order = (head output features, last layer features, ...decoder features). Defaults to ('out_conv', 'l4_rn', 'r4', 'r3', 'r2', 'r1'). + freeze_bn (bool, optional): Freeze BatchNorm. Generally results in better finetuning performance. Defaults to False. + keep_aspect_ratio (bool, optional): Keep the aspect ratio of input images while resizing. Defaults to True. + img_size (int, tuple, optional): Input resolution. Defaults to 384. + """ + super().__init__() + self.core = midas + self.output_channels = None + self.core_out = {} + self.trainable = trainable + self.fetch_features = fetch_features + # midas.scratch.output_conv = nn.Identity() + self.handles = [] + # self.layer_names = ['out_conv','l4_rn', 'r4', 'r3', 'r2', 'r1'] + self.layer_names = layer_names + + self.set_trainable(trainable) + self.set_fetch_features(fetch_features) + + self.prep = PrepForMidas(keep_aspect_ratio=keep_aspect_ratio, + img_size=img_size, do_resize=kwargs.get('do_resize', True)) + + if freeze_bn: + self.freeze_bn() + + def set_trainable(self, trainable): + self.trainable = trainable + if trainable: + self.unfreeze() + else: + self.freeze() + return self + + def set_fetch_features(self, fetch_features): + self.fetch_features = fetch_features + if fetch_features: + if len(self.handles) == 0: + self.attach_hooks(self.core) + else: + self.remove_hooks() + return self + + def freeze(self): + for p in self.parameters(): + p.requires_grad = False + self.trainable = False + return self + + def unfreeze(self): + for p in self.parameters(): + p.requires_grad = True + self.trainable = True + return self + + def freeze_bn(self): + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d): + m.eval() + return self + + def forward(self, x, denorm=False, return_rel_depth=False): + with torch.no_grad(): + if denorm: + x = denormalize(x) + x = self.prep(x) + # print("Shape after prep: ", x.shape) + + with torch.set_grad_enabled(self.trainable): + + # print("Input size to Midascore", x.shape) + rel_depth = self.core(x) + # print("Output from midas shape", rel_depth.shape) + if not self.fetch_features: + return rel_depth + out = [self.core_out[k] for k in self.layer_names] + + if return_rel_depth: + return rel_depth, out + return out + + def get_rel_pos_params(self): + for name, p in self.core.pretrained.named_parameters(): + if "relative_position" in name: + yield p + + def get_enc_params_except_rel_pos(self): + for name, p in self.core.pretrained.named_parameters(): + if "relative_position" not in name: + yield p + + def freeze_encoder(self, freeze_rel_pos=False): + if freeze_rel_pos: + for p in self.core.pretrained.parameters(): + p.requires_grad = False + else: + for p in self.get_enc_params_except_rel_pos(): + p.requires_grad = False + return self + + def attach_hooks(self, midas): + if len(self.handles) > 0: + self.remove_hooks() + if "out_conv" in self.layer_names: + self.handles.append(list(midas.scratch.output_conv.children())[ + 3].register_forward_hook(get_activation("out_conv", self.core_out))) + if "r4" in self.layer_names: + self.handles.append(midas.scratch.refinenet4.register_forward_hook( + get_activation("r4", self.core_out))) + if "r3" in self.layer_names: + self.handles.append(midas.scratch.refinenet3.register_forward_hook( + get_activation("r3", self.core_out))) + if "r2" in self.layer_names: + self.handles.append(midas.scratch.refinenet2.register_forward_hook( + get_activation("r2", self.core_out))) + if "r1" in self.layer_names: + self.handles.append(midas.scratch.refinenet1.register_forward_hook( + get_activation("r1", self.core_out))) + if "l4_rn" in self.layer_names: + self.handles.append(midas.scratch.layer4_rn.register_forward_hook( + get_activation("l4_rn", self.core_out))) + + return self + + def remove_hooks(self): + for h in self.handles: + h.remove() + return self + + def __del__(self): + self.remove_hooks() + + def set_output_channels(self, model_type): + self.output_channels = MIDAS_SETTINGS[model_type] + + @staticmethod + def build(midas_model_type="DPT_BEiT_L_384", train_midas=False, use_pretrained_midas=True, fetch_features=False, freeze_bn=True, force_keep_ar=False, force_reload=False, **kwargs): + if midas_model_type not in MIDAS_SETTINGS: + raise ValueError( + f"Invalid model type: {midas_model_type}. Must be one of {list(MIDAS_SETTINGS.keys())}") + if "img_size" in kwargs: + kwargs = MidasCore.parse_img_size(kwargs) + img_size = kwargs.pop("img_size", [384, 384]) + print("img_size", img_size) + # TODO: use locally-bundled midas + # The repo should be changed back to isl-org/MiDaS once this MR lands + midas = torch.hub.load("semjon00/MiDaS", midas_model_type, + pretrained=use_pretrained_midas, force_reload=force_reload) + kwargs.update({'keep_aspect_ratio': force_keep_ar}) + midas_core = MidasCore(midas, trainable=train_midas, fetch_features=fetch_features, + freeze_bn=freeze_bn, img_size=img_size, **kwargs) + midas_core.set_output_channels(midas_model_type) + return midas_core + + @staticmethod + def build_from_config(config): + return MidasCore.build(**config) + + @staticmethod + def parse_img_size(config): + assert 'img_size' in config + if isinstance(config['img_size'], str): + assert "," in config['img_size'], "img_size should be a string with comma separated img_size=H,W" + config['img_size'] = list(map(int, config['img_size'].split(","))) + assert len( + config['img_size']) == 2, "img_size should be a string with comma separated img_size=H,W" + elif isinstance(config['img_size'], int): + config['img_size'] = [config['img_size'], config['img_size']] + else: + assert isinstance(config['img_size'], list) and len( + config['img_size']) == 2, "img_size should be a list of H,W" + return config + + +nchannels2models = { + tuple([256]*5): ["DPT_BEiT_L_384", "DPT_BEiT_L_512", "DPT_BEiT_B_384", "DPT_SwinV2_L_384", "DPT_SwinV2_B_384", "DPT_SwinV2_T_256", "DPT_Large", "DPT_Hybrid"], + (512, 256, 128, 64, 64): ["MiDaS_small"] +} + +# Model name to number of output channels +MIDAS_SETTINGS = {m: k for k, v in nchannels2models.items() + for m in v + } diff --git a/dzoedepth/models/builder.py b/dzoedepth/models/builder.py new file mode 100644 index 0000000000000000000000000000000000000000..4bd273e019bdcead1e1043d8ca283b6f16f60c0d --- /dev/null +++ b/dzoedepth/models/builder.py @@ -0,0 +1,51 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +from importlib import import_module +from dzoedepth.models.depth_model import DepthModel + +def build_model(config) -> DepthModel: + """Builds a model from a config. The model is specified by the model name and version in the config. The model is then constructed using the build_from_config function of the model interface. + This function should be used to construct models for training and evaluation. + + Args: + config (dict): Config dict. Config is constructed in utils/config.py. Each model has its own config file(s) saved in its root model folder. + + Returns: + torch.nn.Module: Model corresponding to name and version as specified in config + """ + module_name = f"dzoedepth.models.{config.model}" + try: + module = import_module(module_name) + except ModuleNotFoundError as e: + # print the original error message + print(e) + raise ValueError( + f"Model {config.model} not found. Refer above error for details.") from e + try: + get_version = getattr(module, "get_version") + except AttributeError as e: + raise ValueError( + f"Model {config.model} has no get_version function.") from e + return get_version(config.version_name).build_from_config(config) diff --git a/dzoedepth/models/depth_model.py b/dzoedepth/models/depth_model.py new file mode 100644 index 0000000000000000000000000000000000000000..19d57094d8919801963851fa7ed1befe0c842640 --- /dev/null +++ b/dzoedepth/models/depth_model.py @@ -0,0 +1,153 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from torchvision import transforms +import PIL.Image +from PIL import Image +from typing import Union + + +class DepthModel(nn.Module): + def __init__(self): + super().__init__() + self.device = 'cpu' + + def to(self, device) -> nn.Module: + self.device = device + return super().to(device) + + def forward(self, x, *args, **kwargs): + raise NotImplementedError + + def _infer(self, x: torch.Tensor): + """ + Inference interface for the model + Args: + x (torch.Tensor): input tensor of shape (b, c, h, w) + Returns: + torch.Tensor: output tensor of shape (b, 1, h, w) + """ + return self(x)['metric_depth'] + + def _infer_with_pad_aug(self, x: torch.Tensor, pad_input: bool=True, fh: float=3, fw: float=3, upsampling_mode: str='bicubic', padding_mode="reflect", **kwargs) -> torch.Tensor: + """ + Inference interface for the model with padding augmentation + Padding augmentation fixes the boundary artifacts in the output depth map. + Boundary artifacts are sometimes caused by the fact that the model is trained on NYU raw dataset which has a black or white border around the image. + This augmentation pads the input image and crops the prediction back to the original size / view. + + Note: This augmentation is not required for the models trained with 'avoid_boundary'=True. + Args: + x (torch.Tensor): input tensor of shape (b, c, h, w) + pad_input (bool, optional): whether to pad the input or not. Defaults to True. + fh (float, optional): height padding factor. The padding is calculated as sqrt(h/2) * fh. Defaults to 3. + fw (float, optional): width padding factor. The padding is calculated as sqrt(w/2) * fw. Defaults to 3. + upsampling_mode (str, optional): upsampling mode. Defaults to 'bicubic'. + padding_mode (str, optional): padding mode. Defaults to "reflect". + Returns: + torch.Tensor: output tensor of shape (b, 1, h, w) + """ + # assert x is nchw and c = 3 + assert x.dim() == 4, "x must be 4 dimensional, got {}".format(x.dim()) + assert x.shape[1] == 3, "x must have 3 channels, got {}".format(x.shape[1]) + + if pad_input: + assert fh > 0 or fw > 0, "atlease one of fh and fw must be greater than 0" + pad_h = int(np.sqrt(x.shape[2]/2) * fh) + pad_w = int(np.sqrt(x.shape[3]/2) * fw) + padding = [pad_w, pad_w] + if pad_h > 0: + padding += [pad_h, pad_h] + + x = F.pad(x, padding, mode=padding_mode, **kwargs) + out = self._infer(x) + if out.shape[-2:] != x.shape[-2:]: + out = F.interpolate(out, size=(x.shape[2], x.shape[3]), mode=upsampling_mode, align_corners=False) + if pad_input: + # crop to the original size, handling the case where pad_h and pad_w is 0 + if pad_h > 0: + out = out[:, :, pad_h:-pad_h,:] + if pad_w > 0: + out = out[:, :, :, pad_w:-pad_w] + return out + + def infer_with_flip_aug(self, x, pad_input: bool=True, **kwargs) -> torch.Tensor: + """ + Inference interface for the model with horizontal flip augmentation + Horizontal flip augmentation improves the accuracy of the model by averaging the output of the model with and without horizontal flip. + Args: + x (torch.Tensor): input tensor of shape (b, c, h, w) + pad_input (bool, optional): whether to use padding augmentation. Defaults to True. + Returns: + torch.Tensor: output tensor of shape (b, 1, h, w) + """ + # infer with horizontal flip and average + out = self._infer_with_pad_aug(x, pad_input=pad_input, **kwargs) + out_flip = self._infer_with_pad_aug(torch.flip(x, dims=[3]), pad_input=pad_input, **kwargs) + out = (out + torch.flip(out_flip, dims=[3])) / 2 + return out + + def infer(self, x, pad_input: bool=True, with_flip_aug: bool=True, **kwargs) -> torch.Tensor: + """ + Inference interface for the model + Args: + x (torch.Tensor): input tensor of shape (b, c, h, w) + pad_input (bool, optional): whether to use padding augmentation. Defaults to True. + with_flip_aug (bool, optional): whether to use horizontal flip augmentation. Defaults to True. + Returns: + torch.Tensor: output tensor of shape (b, 1, h, w) + """ + if with_flip_aug: + return self.infer_with_flip_aug(x, pad_input=pad_input, **kwargs) + else: + return self._infer_with_pad_aug(x, pad_input=pad_input, **kwargs) + + @torch.no_grad() + def infer_pil(self, pil_img, pad_input: bool=True, with_flip_aug: bool=True, output_type: str="numpy", **kwargs) -> Union[np.ndarray, PIL.Image.Image, torch.Tensor]: + """ + Inference interface for the model for PIL image + Args: + pil_img (PIL.Image.Image): input PIL image + pad_input (bool, optional): whether to use padding augmentation. Defaults to True. + with_flip_aug (bool, optional): whether to use horizontal flip augmentation. Defaults to True. + output_type (str, optional): output type. Supported values are 'numpy', 'pil' and 'tensor'. Defaults to "numpy". + """ + # dtype IS ADDED, NOT PRESENT IN THE MAINLINE + x = transforms.ToTensor()(pil_img).unsqueeze(0).to(device=self.device, dtype=next(self.parameters()).dtype) + out_tensor = self.infer(x, pad_input=pad_input, with_flip_aug=with_flip_aug, **kwargs) + if output_type == "numpy": + return out_tensor.squeeze().cpu().numpy() + elif output_type == "pil": + # uint16 is required for depth pil image + out_16bit_numpy = (out_tensor.squeeze().cpu().numpy()*256).astype(np.uint16) + return Image.fromarray(out_16bit_numpy) + elif output_type == "tensor": + return out_tensor.squeeze().cpu() + else: + raise ValueError(f"output_type {output_type} not supported. Supported values are 'numpy', 'pil' and 'tensor'") + \ No newline at end of file diff --git a/dzoedepth/models/layers/__init__.py b/dzoedepth/models/layers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/dzoedepth/models/layers/attractor.py b/dzoedepth/models/layers/attractor.py new file mode 100644 index 0000000000000000000000000000000000000000..b5e3473ca6e2271dc28666314cf8f92f52f7e3c6 --- /dev/null +++ b/dzoedepth/models/layers/attractor.py @@ -0,0 +1,208 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import torch +import torch.nn as nn + + +@torch.jit.script +def exp_attractor(dx, alpha: float = 300, gamma: int = 2): + """Exponential attractor: dc = exp(-alpha*|dx|^gamma) * dx , where dx = a - c, a = attractor point, c = bin center, dc = shift in bin centermmary for exp_attractor + + Args: + dx (torch.Tensor): The difference tensor dx = Ai - Cj, where Ai is the attractor point and Cj is the bin center. + alpha (float, optional): Proportional Attractor strength. Determines the absolute strength. Lower alpha = greater attraction. Defaults to 300. + gamma (int, optional): Exponential Attractor strength. Determines the "region of influence" and indirectly number of bin centers affected. Lower gamma = farther reach. Defaults to 2. + + Returns: + torch.Tensor : Delta shifts - dc; New bin centers = Old bin centers + dc + """ + return torch.exp(-alpha*(torch.abs(dx)**gamma)) * (dx) + + +@torch.jit.script +def inv_attractor(dx, alpha: float = 300, gamma: int = 2): + """Inverse attractor: dc = dx / (1 + alpha*dx^gamma), where dx = a - c, a = attractor point, c = bin center, dc = shift in bin center + This is the default one according to the accompanying paper. + + Args: + dx (torch.Tensor): The difference tensor dx = Ai - Cj, where Ai is the attractor point and Cj is the bin center. + alpha (float, optional): Proportional Attractor strength. Determines the absolute strength. Lower alpha = greater attraction. Defaults to 300. + gamma (int, optional): Exponential Attractor strength. Determines the "region of influence" and indirectly number of bin centers affected. Lower gamma = farther reach. Defaults to 2. + + Returns: + torch.Tensor: Delta shifts - dc; New bin centers = Old bin centers + dc + """ + return dx.div(1+alpha*dx.pow(gamma)) + + +class AttractorLayer(nn.Module): + def __init__(self, in_features, n_bins, n_attractors=16, mlp_dim=128, min_depth=1e-3, max_depth=10, + alpha=300, gamma=2, kind='sum', attractor_type='exp', memory_efficient=False): + """ + Attractor layer for bin centers. Bin centers are bounded on the interval (min_depth, max_depth) + """ + super().__init__() + + self.n_attractors = n_attractors + self.n_bins = n_bins + self.min_depth = min_depth + self.max_depth = max_depth + self.alpha = alpha + self.gamma = gamma + self.kind = kind + self.attractor_type = attractor_type + self.memory_efficient = memory_efficient + + self._net = nn.Sequential( + nn.Conv2d(in_features, mlp_dim, 1, 1, 0), + nn.ReLU(inplace=True), + nn.Conv2d(mlp_dim, n_attractors*2, 1, 1, 0), # x2 for linear norm + nn.ReLU(inplace=True) + ) + + def forward(self, x, b_prev, prev_b_embedding=None, interpolate=True, is_for_query=False): + """ + Args: + x (torch.Tensor) : feature block; shape - n, c, h, w + b_prev (torch.Tensor) : previous bin centers normed; shape - n, prev_nbins, h, w + + Returns: + tuple(torch.Tensor,torch.Tensor) : new bin centers normed and scaled; shape - n, nbins, h, w + """ + if prev_b_embedding is not None: + if interpolate: + prev_b_embedding = nn.functional.interpolate( + prev_b_embedding, x.shape[-2:], mode='bilinear', align_corners=True) + x = x + prev_b_embedding + + A = self._net(x) + eps = 1e-3 + A = A + eps + n, c, h, w = A.shape + A = A.view(n, self.n_attractors, 2, h, w) + A_normed = A / A.sum(dim=2, keepdim=True) # n, a, 2, h, w + A_normed = A[:, :, 0, ...] # n, na, h, w + + b_prev = nn.functional.interpolate( + b_prev, (h, w), mode='bilinear', align_corners=True) + b_centers = b_prev + + if self.attractor_type == 'exp': + dist = exp_attractor + else: + dist = inv_attractor + + if not self.memory_efficient: + func = {'mean': torch.mean, 'sum': torch.sum}[self.kind] + # .shape N, nbins, h, w + delta_c = func(dist(A_normed.unsqueeze( + 2) - b_centers.unsqueeze(1)), dim=1) + else: + delta_c = torch.zeros_like(b_centers, device=b_centers.device) + for i in range(self.n_attractors): + # .shape N, nbins, h, w + delta_c += dist(A_normed[:, i, ...].unsqueeze(1) - b_centers) + + if self.kind == 'mean': + delta_c = delta_c / self.n_attractors + + b_new_centers = b_centers + delta_c + B_centers = (self.max_depth - self.min_depth) * \ + b_new_centers + self.min_depth + B_centers, _ = torch.sort(B_centers, dim=1) + B_centers = torch.clip(B_centers, self.min_depth, self.max_depth) + return b_new_centers, B_centers + + +class AttractorLayerUnnormed(nn.Module): + def __init__(self, in_features, n_bins, n_attractors=16, mlp_dim=128, min_depth=1e-3, max_depth=10, + alpha=300, gamma=2, kind='sum', attractor_type='exp', memory_efficient=False): + """ + Attractor layer for bin centers. Bin centers are unbounded + """ + super().__init__() + + self.n_attractors = n_attractors + self.n_bins = n_bins + self.min_depth = min_depth + self.max_depth = max_depth + self.alpha = alpha + self.gamma = gamma + self.kind = kind + self.attractor_type = attractor_type + self.memory_efficient = memory_efficient + + self._net = nn.Sequential( + nn.Conv2d(in_features, mlp_dim, 1, 1, 0), + nn.ReLU(inplace=True), + nn.Conv2d(mlp_dim, n_attractors, 1, 1, 0), + nn.Softplus() + ) + + def forward(self, x, b_prev, prev_b_embedding=None, interpolate=True, is_for_query=False): + """ + Args: + x (torch.Tensor) : feature block; shape - n, c, h, w + b_prev (torch.Tensor) : previous bin centers normed; shape - n, prev_nbins, h, w + + Returns: + tuple(torch.Tensor,torch.Tensor) : new bin centers unbounded; shape - n, nbins, h, w. Two outputs just to keep the API consistent with the normed version + """ + if prev_b_embedding is not None: + if interpolate: + prev_b_embedding = nn.functional.interpolate( + prev_b_embedding, x.shape[-2:], mode='bilinear', align_corners=True) + x = x + prev_b_embedding + + A = self._net(x) + n, c, h, w = A.shape + + b_prev = nn.functional.interpolate( + b_prev, (h, w), mode='bilinear', align_corners=True) + b_centers = b_prev + + if self.attractor_type == 'exp': + dist = exp_attractor + else: + dist = inv_attractor + + if not self.memory_efficient: + func = {'mean': torch.mean, 'sum': torch.sum}[self.kind] + # .shape N, nbins, h, w + delta_c = func( + dist(A.unsqueeze(2) - b_centers.unsqueeze(1)), dim=1) + else: + delta_c = torch.zeros_like(b_centers, device=b_centers.device) + for i in range(self.n_attractors): + delta_c += dist(A[:, i, ...].unsqueeze(1) - + b_centers) # .shape N, nbins, h, w + + if self.kind == 'mean': + delta_c = delta_c / self.n_attractors + + b_new_centers = b_centers + delta_c + B_centers = b_new_centers + + return b_new_centers, B_centers diff --git a/dzoedepth/models/layers/dist_layers.py b/dzoedepth/models/layers/dist_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..d5ff793e94aeb43aa554ef8c2392080df5572e19 --- /dev/null +++ b/dzoedepth/models/layers/dist_layers.py @@ -0,0 +1,121 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import torch +import torch.nn as nn + + +def log_binom(n, k, eps=1e-7): + """ log(nCk) using stirling approximation """ + n = n + eps + k = k + eps + return n * torch.log(n) - k * torch.log(k) - (n-k) * torch.log(n-k+eps) + + +class LogBinomial(nn.Module): + def __init__(self, n_classes=256, act=torch.softmax): + """Compute log binomial distribution for n_classes + + Args: + n_classes (int, optional): number of output classes. Defaults to 256. + """ + super().__init__() + self.K = n_classes + self.act = act + self.register_buffer('k_idx', torch.arange( + 0, n_classes).view(1, -1, 1, 1)) + self.register_buffer('K_minus_1', torch.Tensor( + [self.K-1]).view(1, -1, 1, 1)) + + def forward(self, x, t=1., eps=1e-4): + """Compute log binomial distribution for x + + Args: + x (torch.Tensor - NCHW): probabilities + t (float, torch.Tensor - NCHW, optional): Temperature of distribution. Defaults to 1.. + eps (float, optional): Small number for numerical stability. Defaults to 1e-4. + + Returns: + torch.Tensor -NCHW: log binomial distribution logbinomial(p;t) + """ + if x.ndim == 3: + x = x.unsqueeze(1) # make it nchw + + one_minus_x = torch.clamp(1 - x, eps, 1) + x = torch.clamp(x, eps, 1) + y = log_binom(self.K_minus_1, self.k_idx) + self.k_idx * \ + torch.log(x) + (self.K - 1 - self.k_idx) * torch.log(one_minus_x) + return self.act(y/t, dim=1) + + +class ConditionalLogBinomial(nn.Module): + def __init__(self, in_features, condition_dim, n_classes=256, bottleneck_factor=2, p_eps=1e-4, max_temp=50, min_temp=1e-7, act=torch.softmax): + """Conditional Log Binomial distribution + + Args: + in_features (int): number of input channels in main feature + condition_dim (int): number of input channels in condition feature + n_classes (int, optional): Number of classes. Defaults to 256. + bottleneck_factor (int, optional): Hidden dim factor. Defaults to 2. + p_eps (float, optional): small eps value. Defaults to 1e-4. + max_temp (float, optional): Maximum temperature of output distribution. Defaults to 50. + min_temp (float, optional): Minimum temperature of output distribution. Defaults to 1e-7. + """ + super().__init__() + self.p_eps = p_eps + self.max_temp = max_temp + self.min_temp = min_temp + self.log_binomial_transform = LogBinomial(n_classes, act=act) + bottleneck = (in_features + condition_dim) // bottleneck_factor + self.mlp = nn.Sequential( + nn.Conv2d(in_features + condition_dim, bottleneck, + kernel_size=1, stride=1, padding=0), + nn.GELU(), + # 2 for p linear norm, 2 for t linear norm + nn.Conv2d(bottleneck, 2+2, kernel_size=1, stride=1, padding=0), + nn.Softplus() + ) + + def forward(self, x, cond): + """Forward pass + + Args: + x (torch.Tensor - NCHW): Main feature + cond (torch.Tensor - NCHW): condition feature + + Returns: + torch.Tensor: Output log binomial distribution + """ + pt = self.mlp(torch.concat((x, cond), dim=1)) + p, t = pt[:, :2, ...], pt[:, 2:, ...] + + p = p + self.p_eps + p = p[:, 0, ...] / (p[:, 0, ...] + p[:, 1, ...]) + + t = t + self.p_eps + t = t[:, 0, ...] / (t[:, 0, ...] + t[:, 1, ...]) + t = t.unsqueeze(1) + t = (self.max_temp - self.min_temp) * t + self.min_temp + + return self.log_binomial_transform(p, t) diff --git a/dzoedepth/models/layers/localbins_layers.py b/dzoedepth/models/layers/localbins_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..cda7b1f9c74ac0d54733ef6d1dcadc1c62ab4647 --- /dev/null +++ b/dzoedepth/models/layers/localbins_layers.py @@ -0,0 +1,169 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import torch +import torch.nn as nn + + +class SeedBinRegressor(nn.Module): + def __init__(self, in_features, n_bins=16, mlp_dim=256, min_depth=1e-3, max_depth=10): + """Bin center regressor network. Bin centers are bounded on (min_depth, max_depth) interval. + + Args: + in_features (int): input channels + n_bins (int, optional): Number of bin centers. Defaults to 16. + mlp_dim (int, optional): Hidden dimension. Defaults to 256. + min_depth (float, optional): Min depth value. Defaults to 1e-3. + max_depth (float, optional): Max depth value. Defaults to 10. + """ + super().__init__() + self.version = "1_1" + self.min_depth = min_depth + self.max_depth = max_depth + + self._net = nn.Sequential( + nn.Conv2d(in_features, mlp_dim, 1, 1, 0), + nn.ReLU(inplace=True), + nn.Conv2d(mlp_dim, n_bins, 1, 1, 0), + nn.ReLU(inplace=True) + ) + + def forward(self, x): + """ + Returns tensor of bin_width vectors (centers). One vector b for every pixel + """ + B = self._net(x) + eps = 1e-3 + B = B + eps + B_widths_normed = B / B.sum(dim=1, keepdim=True) + B_widths = (self.max_depth - self.min_depth) * \ + B_widths_normed # .shape NCHW + # pad has the form (left, right, top, bottom, front, back) + B_widths = nn.functional.pad( + B_widths, (0, 0, 0, 0, 1, 0), mode='constant', value=self.min_depth) + B_edges = torch.cumsum(B_widths, dim=1) # .shape NCHW + + B_centers = 0.5 * (B_edges[:, :-1, ...] + B_edges[:, 1:, ...]) + return B_widths_normed, B_centers + + +class SeedBinRegressorUnnormed(nn.Module): + def __init__(self, in_features, n_bins=16, mlp_dim=256, min_depth=1e-3, max_depth=10): + """Bin center regressor network. Bin centers are unbounded + + Args: + in_features (int): input channels + n_bins (int, optional): Number of bin centers. Defaults to 16. + mlp_dim (int, optional): Hidden dimension. Defaults to 256. + min_depth (float, optional): Not used. (for compatibility with SeedBinRegressor) + max_depth (float, optional): Not used. (for compatibility with SeedBinRegressor) + """ + super().__init__() + self.version = "1_1" + self._net = nn.Sequential( + nn.Conv2d(in_features, mlp_dim, 1, 1, 0), + nn.ReLU(inplace=True), + nn.Conv2d(mlp_dim, n_bins, 1, 1, 0), + nn.Softplus() + ) + + def forward(self, x): + """ + Returns tensor of bin_width vectors (centers). One vector b for every pixel + """ + B_centers = self._net(x) + return B_centers, B_centers + + +class Projector(nn.Module): + def __init__(self, in_features, out_features, mlp_dim=128): + """Projector MLP + + Args: + in_features (int): input channels + out_features (int): output channels + mlp_dim (int, optional): hidden dimension. Defaults to 128. + """ + super().__init__() + + self._net = nn.Sequential( + nn.Conv2d(in_features, mlp_dim, 1, 1, 0), + nn.ReLU(inplace=True), + nn.Conv2d(mlp_dim, out_features, 1, 1, 0), + ) + + def forward(self, x): + return self._net(x) + + + +class LinearSplitter(nn.Module): + def __init__(self, in_features, prev_nbins, split_factor=2, mlp_dim=128, min_depth=1e-3, max_depth=10): + super().__init__() + + self.prev_nbins = prev_nbins + self.split_factor = split_factor + self.min_depth = min_depth + self.max_depth = max_depth + + self._net = nn.Sequential( + nn.Conv2d(in_features, mlp_dim, 1, 1, 0), + nn.GELU(), + nn.Conv2d(mlp_dim, prev_nbins * split_factor, 1, 1, 0), + nn.ReLU() + ) + + def forward(self, x, b_prev, prev_b_embedding=None, interpolate=True, is_for_query=False): + """ + x : feature block; shape - n, c, h, w + b_prev : previous bin widths normed; shape - n, prev_nbins, h, w + """ + if prev_b_embedding is not None: + if interpolate: + prev_b_embedding = nn.functional.interpolate(prev_b_embedding, x.shape[-2:], mode='bilinear', align_corners=True) + x = x + prev_b_embedding + S = self._net(x) + eps = 1e-3 + S = S + eps + n, c, h, w = S.shape + S = S.view(n, self.prev_nbins, self.split_factor, h, w) + S_normed = S / S.sum(dim=2, keepdim=True) # fractional splits + + b_prev = nn.functional.interpolate(b_prev, (h,w), mode='bilinear', align_corners=True) + + + b_prev = b_prev / b_prev.sum(dim=1, keepdim=True) # renormalize for gurantees + # print(b_prev.shape, S_normed.shape) + # if is_for_query:(1).expand(-1, b_prev.size(0)//n, -1, -1, -1, -1).flatten(0,1) # TODO ? can replace all this with a single torch.repeat? + b = b_prev.unsqueeze(2) * S_normed + b = b.flatten(1,2) # .shape n, prev_nbins * split_factor, h, w + + # calculate bin centers for loss calculation + B_widths = (self.max_depth - self.min_depth) * b # .shape N, nprev * splitfactor, H, W + # pad has the form (left, right, top, bottom, front, back) + B_widths = nn.functional.pad(B_widths, (0,0,0,0,1,0), mode='constant', value=self.min_depth) + B_edges = torch.cumsum(B_widths, dim=1) # .shape NCHW + + B_centers = 0.5 * (B_edges[:, :-1, ...] + B_edges[:,1:,...]) + return b, B_centers \ No newline at end of file diff --git a/dzoedepth/models/layers/patch_transformer.py b/dzoedepth/models/layers/patch_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..e7306a95f5158029fd279c7641f19aa2a4b38e5b --- /dev/null +++ b/dzoedepth/models/layers/patch_transformer.py @@ -0,0 +1,92 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import torch +import torch.nn as nn + + +class PatchTransformerEncoder(nn.Module): + def __init__(self, in_channels, patch_size=10, embedding_dim=128, num_heads=4, use_class_token=False): + """ViT-like transformer block + + Args: + in_channels (int): Input channels + patch_size (int, optional): patch size. Defaults to 10. + embedding_dim (int, optional): Embedding dimension in transformer model. Defaults to 128. + num_heads (int, optional): number of attention heads. Defaults to 4. + use_class_token (bool, optional): Whether to use extra token at the start for global accumulation (called as "class token"). Defaults to False. + """ + super(PatchTransformerEncoder, self).__init__() + self.use_class_token = use_class_token + encoder_layers = nn.TransformerEncoderLayer( + embedding_dim, num_heads, dim_feedforward=1024) + self.transformer_encoder = nn.TransformerEncoder( + encoder_layers, num_layers=4) # takes shape S,N,E + + self.embedding_convPxP = nn.Conv2d(in_channels, embedding_dim, + kernel_size=patch_size, stride=patch_size, padding=0) + + def positional_encoding_1d(self, sequence_length, batch_size, embedding_dim, device='cpu'): + """Generate positional encodings + + Args: + sequence_length (int): Sequence length + embedding_dim (int): Embedding dimension + + Returns: + torch.Tensor SBE: Positional encodings + """ + position = torch.arange( + 0, sequence_length, dtype=torch.float32, device=device).unsqueeze(1) + index = torch.arange( + 0, embedding_dim, 2, dtype=torch.float32, device=device).unsqueeze(0) + div_term = torch.exp(index * (-torch.log(torch.tensor(10000.0, device=device)) / embedding_dim)) + pos_encoding = position * div_term + pos_encoding = torch.cat([torch.sin(pos_encoding), torch.cos(pos_encoding)], dim=1) + pos_encoding = pos_encoding.unsqueeze(1).repeat(1, batch_size, 1) + return pos_encoding + + + def forward(self, x): + """Forward pass + + Args: + x (torch.Tensor - NCHW): Input feature tensor + + Returns: + torch.Tensor - SNE: Transformer output embeddings. S - sequence length (=HW/patch_size^2), N - batch size, E - embedding dim + """ + embeddings = self.embedding_convPxP(x).flatten( + 2) # .shape = n,c,s = n, embedding_dim, s + if self.use_class_token: + # extra special token at start ? + embeddings = nn.functional.pad(embeddings, (1, 0)) + + # change to S,N,E format required by transformer + embeddings = embeddings.permute(2, 0, 1) + S, N, E = embeddings.shape + # dtype IS ADDED, NOT PRESENT IN THE MAINLINE + embeddings = embeddings + self.positional_encoding_1d(S, N, E, device=embeddings.device).to(dtype=embeddings.dtype) + x = self.transformer_encoder(embeddings) # .shape = S, N, E + return x diff --git a/dzoedepth/models/model_io.py b/dzoedepth/models/model_io.py new file mode 100644 index 0000000000000000000000000000000000000000..e35b098e46311ea36cbeee28d4d3274ca55cadfb --- /dev/null +++ b/dzoedepth/models/model_io.py @@ -0,0 +1,92 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import torch + +def load_state_dict(model, state_dict): + """Load state_dict into model, handling DataParallel and DistributedDataParallel. Also checks for "model" key in state_dict. + + DataParallel prefixes state_dict keys with 'module.' when saving. + If the model is not a DataParallel model but the state_dict is, then prefixes are removed. + If the model is a DataParallel model but the state_dict is not, then prefixes are added. + """ + state_dict = state_dict.get('model', state_dict) + # if model is a DataParallel model, then state_dict keys are prefixed with 'module.' + + do_prefix = isinstance( + model, (torch.nn.DataParallel, torch.nn.parallel.DistributedDataParallel)) + state = {} + for k, v in state_dict.items(): + if k.startswith('module.') and not do_prefix: + k = k[7:] + + if not k.startswith('module.') and do_prefix: + k = 'module.' + k + + state[k] = v + + model.load_state_dict(state, strict=False) + print("Loaded successfully") + return model + + +def load_wts(model, checkpoint_path): + ckpt = torch.load(checkpoint_path, map_location='cpu') + return load_state_dict(model, ckpt) + + +def load_state_dict_from_url(model, url, **kwargs): + state_dict = torch.hub.load_state_dict_from_url(url, map_location='cpu', **kwargs) + return load_state_dict(model, state_dict) + + +def load_state_from_resource(model, resource: str): + """Loads weights to the model from a given resource. A resource can be of following types: + 1. URL. Prefixed with "url::" + e.g. url::http(s)://url.resource.com/ckpt.pt + + 2. Local path. Prefixed with "local::" + e.g. local::/path/to/ckpt.pt + + + Args: + model (torch.nn.Module): Model + resource (str): resource string + + Returns: + torch.nn.Module: Model with loaded weights + """ + print(f"Using pretrained resource {resource}") + + if resource.startswith('url::'): + url = resource.split('url::')[1] + return load_state_dict_from_url(model, url, progress=True) + + elif resource.startswith('local::'): + path = resource.split('local::')[1] + return load_wts(model, path) + + else: + raise ValueError("Invalid resource type, only url:: and local:: are supported") + \ No newline at end of file diff --git a/dzoedepth/models/zoedepth/__init__.py b/dzoedepth/models/zoedepth/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f1e9a694852aaa28c500419d413ea8a572338e18 --- /dev/null +++ b/dzoedepth/models/zoedepth/__init__.py @@ -0,0 +1,31 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +from .zoedepth_v1 import ZoeDepth + +all_versions = { + "v1": ZoeDepth, +} + +get_version = lambda v : all_versions[v] \ No newline at end of file diff --git a/dzoedepth/models/zoedepth/config_zoedepth.json b/dzoedepth/models/zoedepth/config_zoedepth.json new file mode 100644 index 0000000000000000000000000000000000000000..6c68a22d9eb552d799755ecb671e92db77ccf79c --- /dev/null +++ b/dzoedepth/models/zoedepth/config_zoedepth.json @@ -0,0 +1,58 @@ +{ + "model": { + "name": "ZoeDepth", + "version_name": "v1", + "n_bins": 64, + "bin_embedding_dim": 128, + "bin_centers_type": "softplus", + "n_attractors":[16, 8, 4, 1], + "attractor_alpha": 1000, + "attractor_gamma": 2, + "attractor_kind" : "mean", + "attractor_type" : "inv", + "midas_model_type" : "DPT_BEiT_L_384", + "min_temp": 0.0212, + "max_temp": 50.0, + "output_distribution": "logbinomial", + "memory_efficient": true, + "inverse_midas": false, + "img_size": [384, 512] + }, + + "train": { + "train_midas": true, + "use_pretrained_midas": true, + "trainer": "zoedepth", + "epochs": 5, + "bs": 16, + "optim_kwargs": {"lr": 0.000161, "wd": 0.01}, + "sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true}, + "same_lr": false, + "w_si": 1, + "w_domain": 0.2, + "w_reg": 0, + "w_grad": 0, + "avoid_boundary": false, + "random_crop": false, + "input_width": 640, + "input_height": 480, + "midas_lr_factor": 1, + "encoder_lr_factor":10, + "pos_enc_lr_factor":10, + "freeze_midas_bn": true + + }, + + "infer":{ + "train_midas": false, + "use_pretrained_midas": false, + "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_N.pt", + "force_keep_ar": true + }, + + "eval":{ + "train_midas": false, + "use_pretrained_midas": false, + "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_N.pt" + } +} \ No newline at end of file diff --git a/dzoedepth/models/zoedepth/config_zoedepth_kitti.json b/dzoedepth/models/zoedepth/config_zoedepth_kitti.json new file mode 100644 index 0000000000000000000000000000000000000000..3e7266ec2d7e918143f54ee728ea4d8d4e9adb11 --- /dev/null +++ b/dzoedepth/models/zoedepth/config_zoedepth_kitti.json @@ -0,0 +1,22 @@ +{ + "model": { + "bin_centers_type": "normed", + "img_size": [384, 768] + }, + + "train": { + }, + + "infer":{ + "train_midas": false, + "use_pretrained_midas": false, + "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt", + "force_keep_ar": true + }, + + "eval":{ + "train_midas": false, + "use_pretrained_midas": false, + "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt" + } +} \ No newline at end of file diff --git a/dzoedepth/models/zoedepth/zoedepth_v1.py b/dzoedepth/models/zoedepth/zoedepth_v1.py new file mode 100644 index 0000000000000000000000000000000000000000..6d62cbd34222d45e7485bdd032368930a9b2cf88 --- /dev/null +++ b/dzoedepth/models/zoedepth/zoedepth_v1.py @@ -0,0 +1,250 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import itertools + +import torch +import torch.nn as nn +from dzoedepth.models.depth_model import DepthModel +from dzoedepth.models.base_models.midas import MidasCore +from dzoedepth.models.layers.attractor import AttractorLayer, AttractorLayerUnnormed +from dzoedepth.models.layers.dist_layers import ConditionalLogBinomial +from dzoedepth.models.layers.localbins_layers import (Projector, SeedBinRegressor, + SeedBinRegressorUnnormed) +from dzoedepth.models.model_io import load_state_from_resource + + +class ZoeDepth(DepthModel): + def __init__(self, core, n_bins=64, bin_centers_type="softplus", bin_embedding_dim=128, min_depth=1e-3, max_depth=10, + n_attractors=[16, 8, 4, 1], attractor_alpha=300, attractor_gamma=2, attractor_kind='sum', attractor_type='exp', min_temp=5, max_temp=50, train_midas=True, + midas_lr_factor=10, encoder_lr_factor=10, pos_enc_lr_factor=10, inverse_midas=False, **kwargs): + """ZoeDepth model. This is the version of ZoeDepth that has a single metric head + + Args: + core (models.base_models.midas.MidasCore): The base midas model that is used for extraction of "relative" features + n_bins (int, optional): Number of bin centers. Defaults to 64. + bin_centers_type (str, optional): "normed" or "softplus". Activation type used for bin centers. For "normed" bin centers, linear normalization trick is applied. This results in bounded bin centers. + For "softplus", softplus activation is used and thus are unbounded. Defaults to "softplus". + bin_embedding_dim (int, optional): bin embedding dimension. Defaults to 128. + min_depth (float, optional): Lower bound for normed bin centers. Defaults to 1e-3. + max_depth (float, optional): Upper bound for normed bin centers. Defaults to 10. + n_attractors (List[int], optional): Number of bin attractors at decoder layers. Defaults to [16, 8, 4, 1]. + attractor_alpha (int, optional): Proportional attractor strength. Refer to models.layers.attractor for more details. Defaults to 300. + attractor_gamma (int, optional): Exponential attractor strength. Refer to models.layers.attractor for more details. Defaults to 2. + attractor_kind (str, optional): Attraction aggregation "sum" or "mean". Defaults to 'sum'. + attractor_type (str, optional): Type of attractor to use; "inv" (Inverse attractor) or "exp" (Exponential attractor). Defaults to 'exp'. + min_temp (int, optional): Lower bound for temperature of output probability distribution. Defaults to 5. + max_temp (int, optional): Upper bound for temperature of output probability distribution. Defaults to 50. + train_midas (bool, optional): Whether to train "core", the base midas model. Defaults to True. + midas_lr_factor (int, optional): Learning rate reduction factor for base midas model except its encoder and positional encodings. Defaults to 10. + encoder_lr_factor (int, optional): Learning rate reduction factor for the encoder in midas model. Defaults to 10. + pos_enc_lr_factor (int, optional): Learning rate reduction factor for positional encodings in the base midas model. Defaults to 10. + """ + super().__init__() + + self.core = core + self.max_depth = max_depth + self.min_depth = min_depth + self.min_temp = min_temp + self.bin_centers_type = bin_centers_type + + self.midas_lr_factor = midas_lr_factor + self.encoder_lr_factor = encoder_lr_factor + self.pos_enc_lr_factor = pos_enc_lr_factor + self.train_midas = train_midas + self.inverse_midas = inverse_midas + + if self.encoder_lr_factor <= 0: + self.core.freeze_encoder( + freeze_rel_pos=self.pos_enc_lr_factor <= 0) + + N_MIDAS_OUT = 32 + btlnck_features = self.core.output_channels[0] + num_out_features = self.core.output_channels[1:] + + self.conv2 = nn.Conv2d(btlnck_features, btlnck_features, + kernel_size=1, stride=1, padding=0) # btlnck conv + + if bin_centers_type == "normed": + SeedBinRegressorLayer = SeedBinRegressor + Attractor = AttractorLayer + elif bin_centers_type == "softplus": + SeedBinRegressorLayer = SeedBinRegressorUnnormed + Attractor = AttractorLayerUnnormed + elif bin_centers_type == "hybrid1": + SeedBinRegressorLayer = SeedBinRegressor + Attractor = AttractorLayerUnnormed + elif bin_centers_type == "hybrid2": + SeedBinRegressorLayer = SeedBinRegressorUnnormed + Attractor = AttractorLayer + else: + raise ValueError( + "bin_centers_type should be one of 'normed', 'softplus', 'hybrid1', 'hybrid2'") + + self.seed_bin_regressor = SeedBinRegressorLayer( + btlnck_features, n_bins=n_bins, min_depth=min_depth, max_depth=max_depth) + self.seed_projector = Projector(btlnck_features, bin_embedding_dim) + self.projectors = nn.ModuleList([ + Projector(num_out, bin_embedding_dim) + for num_out in num_out_features + ]) + self.attractors = nn.ModuleList([ + Attractor(bin_embedding_dim, n_bins, n_attractors=n_attractors[i], min_depth=min_depth, max_depth=max_depth, + alpha=attractor_alpha, gamma=attractor_gamma, kind=attractor_kind, attractor_type=attractor_type) + for i in range(len(num_out_features)) + ]) + + last_in = N_MIDAS_OUT + 1 # +1 for relative depth + + # use log binomial instead of softmax + self.conditional_log_binomial = ConditionalLogBinomial( + last_in, bin_embedding_dim, n_classes=n_bins, min_temp=min_temp, max_temp=max_temp) + + def forward(self, x, return_final_centers=False, denorm=False, return_probs=False, **kwargs): + """ + Args: + x (torch.Tensor): Input image tensor of shape (B, C, H, W) + return_final_centers (bool, optional): Whether to return the final bin centers. Defaults to False. + denorm (bool, optional): Whether to denormalize the input image. This reverses ImageNet normalization as midas normalization is different. Defaults to False. + return_probs (bool, optional): Whether to return the output probability distribution. Defaults to False. + + Returns: + dict: Dictionary containing the following keys: + - rel_depth (torch.Tensor): Relative depth map of shape (B, H, W) + - metric_depth (torch.Tensor): Metric depth map of shape (B, 1, H, W) + - bin_centers (torch.Tensor): Bin centers of shape (B, n_bins). Present only if return_final_centers is True + - probs (torch.Tensor): Output probability distribution of shape (B, n_bins, H, W). Present only if return_probs is True + + """ + b, c, h, w = x.shape + # print("input shape ", x.shape) + self.orig_input_width = w + self.orig_input_height = h + rel_depth, out = self.core(x, denorm=denorm, return_rel_depth=True) + # print("output shapes", rel_depth.shape, out.shape) + + outconv_activation = out[0] + btlnck = out[1] + x_blocks = out[2:] + + x_d0 = self.conv2(btlnck) + x = x_d0 + _, seed_b_centers = self.seed_bin_regressor(x) + + if self.bin_centers_type == 'normed' or self.bin_centers_type == 'hybrid2': + b_prev = (seed_b_centers - self.min_depth) / \ + (self.max_depth - self.min_depth) + else: + b_prev = seed_b_centers + + prev_b_embedding = self.seed_projector(x) + + # unroll this loop for better performance + for projector, attractor, x in zip(self.projectors, self.attractors, x_blocks): + b_embedding = projector(x) + b, b_centers = attractor( + b_embedding, b_prev, prev_b_embedding, interpolate=True) + b_prev = b.clone() + prev_b_embedding = b_embedding.clone() + + last = outconv_activation + + if self.inverse_midas: + # invert depth followed by normalization + rel_depth = 1.0 / (rel_depth + 1e-6) + rel_depth = (rel_depth - rel_depth.min()) / \ + (rel_depth.max() - rel_depth.min()) + # concat rel depth with last. First interpolate rel depth to last size + rel_cond = rel_depth.unsqueeze(1) + rel_cond = nn.functional.interpolate( + rel_cond, size=last.shape[2:], mode='bilinear', align_corners=True) + last = torch.cat([last, rel_cond], dim=1) + + b_embedding = nn.functional.interpolate( + b_embedding, last.shape[-2:], mode='bilinear', align_corners=True) + x = self.conditional_log_binomial(last, b_embedding) + + # Now depth value is Sum px * cx , where cx are bin_centers from the last bin tensor + # print(x.shape, b_centers.shape) + b_centers = nn.functional.interpolate( + b_centers, x.shape[-2:], mode='bilinear', align_corners=True) + out = torch.sum(x * b_centers, dim=1, keepdim=True) + + # Structure output dict + output = dict(metric_depth=out) + if return_final_centers or return_probs: + output['bin_centers'] = b_centers + + if return_probs: + output['probs'] = x + + return output + + def get_lr_params(self, lr): + """ + Learning rate configuration for different layers of the model + Args: + lr (float) : Base learning rate + Returns: + list : list of parameters to optimize and their learning rates, in the format required by torch optimizers. + """ + param_conf = [] + if self.train_midas: + if self.encoder_lr_factor > 0: + param_conf.append({'params': self.core.get_enc_params_except_rel_pos( + ), 'lr': lr / self.encoder_lr_factor}) + + if self.pos_enc_lr_factor > 0: + param_conf.append( + {'params': self.core.get_rel_pos_params(), 'lr': lr / self.pos_enc_lr_factor}) + + midas_params = self.core.core.scratch.parameters() + midas_lr_factor = self.midas_lr_factor + param_conf.append( + {'params': midas_params, 'lr': lr / midas_lr_factor}) + + remaining_modules = [] + for name, child in self.named_children(): + if name != 'core': + remaining_modules.append(child) + remaining_params = itertools.chain( + *[child.parameters() for child in remaining_modules]) + + param_conf.append({'params': remaining_params, 'lr': lr}) + + return param_conf + + @staticmethod + def build(midas_model_type="DPT_BEiT_L_384", pretrained_resource=None, use_pretrained_midas=False, train_midas=False, freeze_midas_bn=True, **kwargs): + core = MidasCore.build(midas_model_type=midas_model_type, use_pretrained_midas=use_pretrained_midas, + train_midas=train_midas, fetch_features=True, freeze_bn=freeze_midas_bn, **kwargs) + model = ZoeDepth(core, **kwargs) + if pretrained_resource: + assert isinstance(pretrained_resource, str), "pretrained_resource must be a string" + model = load_state_from_resource(model, pretrained_resource) + return model + + @staticmethod + def build_from_config(config): + return ZoeDepth.build(**config) diff --git a/dzoedepth/models/zoedepth_nk/__init__.py b/dzoedepth/models/zoedepth_nk/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..56692da246c65d3c390236faa9ee1bf97040b824 --- /dev/null +++ b/dzoedepth/models/zoedepth_nk/__init__.py @@ -0,0 +1,31 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +from .zoedepth_nk_v1 import ZoeDepthNK + +all_versions = { + "v1": ZoeDepthNK, +} + +get_version = lambda v : all_versions[v] \ No newline at end of file diff --git a/dzoedepth/models/zoedepth_nk/config_zoedepth_nk.json b/dzoedepth/models/zoedepth_nk/config_zoedepth_nk.json new file mode 100644 index 0000000000000000000000000000000000000000..ae036e38243566e0bb79a4821e4897d9bc4aaae1 --- /dev/null +++ b/dzoedepth/models/zoedepth_nk/config_zoedepth_nk.json @@ -0,0 +1,67 @@ +{ + "model": { + "name": "ZoeDepthNK", + "version_name": "v1", + "bin_conf" : [ + { + "name": "nyu", + "n_bins": 64, + "min_depth": 1e-3, + "max_depth": 10.0 + }, + { + "name": "kitti", + "n_bins": 64, + "min_depth": 1e-3, + "max_depth": 80.0 + } + ], + "bin_embedding_dim": 128, + "bin_centers_type": "softplus", + "n_attractors":[16, 8, 4, 1], + "attractor_alpha": 1000, + "attractor_gamma": 2, + "attractor_kind" : "mean", + "attractor_type" : "inv", + "min_temp": 0.0212, + "max_temp": 50.0, + "memory_efficient": true, + "midas_model_type" : "DPT_BEiT_L_384", + "img_size": [384, 512] + }, + + "train": { + "train_midas": true, + "use_pretrained_midas": true, + "trainer": "zoedepth_nk", + "epochs": 5, + "bs": 16, + "optim_kwargs": {"lr": 0.0002512, "wd": 0.01}, + "sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true}, + "same_lr": false, + "w_si": 1, + "w_domain": 100, + "avoid_boundary": false, + "random_crop": false, + "input_width": 640, + "input_height": 480, + "w_grad": 0, + "w_reg": 0, + "midas_lr_factor": 10, + "encoder_lr_factor":10, + "pos_enc_lr_factor":10 + }, + + "infer": { + "train_midas": false, + "pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt", + "use_pretrained_midas": false, + "force_keep_ar": true + }, + + "eval": { + "train_midas": false, + "pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt", + "use_pretrained_midas": false + } +} \ No newline at end of file diff --git a/dzoedepth/models/zoedepth_nk/zoedepth_nk_v1.py b/dzoedepth/models/zoedepth_nk/zoedepth_nk_v1.py new file mode 100644 index 0000000000000000000000000000000000000000..6eccc9cd9c77895e4aa9695e703f2bd3786eb2bc --- /dev/null +++ b/dzoedepth/models/zoedepth_nk/zoedepth_nk_v1.py @@ -0,0 +1,333 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import itertools + +import torch +import torch.nn as nn + +from dzoedepth.models.depth_model import DepthModel +from dzoedepth.models.base_models.midas import MidasCore +from dzoedepth.models.layers.attractor import AttractorLayer, AttractorLayerUnnormed +from dzoedepth.models.layers.dist_layers import ConditionalLogBinomial +from dzoedepth.models.layers.localbins_layers import (Projector, SeedBinRegressor, + SeedBinRegressorUnnormed) +from dzoedepth.models.layers.patch_transformer import PatchTransformerEncoder +from dzoedepth.models.model_io import load_state_from_resource + + +class ZoeDepthNK(DepthModel): + def __init__(self, core, bin_conf, bin_centers_type="softplus", bin_embedding_dim=128, + n_attractors=[16, 8, 4, 1], attractor_alpha=300, attractor_gamma=2, attractor_kind='sum', attractor_type='exp', + min_temp=5, max_temp=50, + memory_efficient=False, train_midas=True, + is_midas_pretrained=True, midas_lr_factor=1, encoder_lr_factor=10, pos_enc_lr_factor=10, inverse_midas=False, **kwargs): + """ZoeDepthNK model. This is the version of ZoeDepth that has two metric heads and uses a learned router to route to experts. + + Args: + core (models.base_models.midas.MidasCore): The base midas model that is used for extraction of "relative" features + + bin_conf (List[dict]): A list of dictionaries that contain the bin configuration for each metric head. Each dictionary should contain the following keys: + "name" (str, typically same as the dataset name), "n_bins" (int), "min_depth" (float), "max_depth" (float) + + The length of this list determines the number of metric heads. + bin_centers_type (str, optional): "normed" or "softplus". Activation type used for bin centers. For "normed" bin centers, linear normalization trick is applied. This results in bounded bin centers. + For "softplus", softplus activation is used and thus are unbounded. Defaults to "normed". + bin_embedding_dim (int, optional): bin embedding dimension. Defaults to 128. + + n_attractors (List[int], optional): Number of bin attractors at decoder layers. Defaults to [16, 8, 4, 1]. + attractor_alpha (int, optional): Proportional attractor strength. Refer to models.layers.attractor for more details. Defaults to 300. + attractor_gamma (int, optional): Exponential attractor strength. Refer to models.layers.attractor for more details. Defaults to 2. + attractor_kind (str, optional): Attraction aggregation "sum" or "mean". Defaults to 'sum'. + attractor_type (str, optional): Type of attractor to use; "inv" (Inverse attractor) or "exp" (Exponential attractor). Defaults to 'exp'. + + min_temp (int, optional): Lower bound for temperature of output probability distribution. Defaults to 5. + max_temp (int, optional): Upper bound for temperature of output probability distribution. Defaults to 50. + + memory_efficient (bool, optional): Whether to use memory efficient version of attractor layers. Memory efficient version is slower but is recommended incase of multiple metric heads in order save GPU memory. Defaults to False. + + train_midas (bool, optional): Whether to train "core", the base midas model. Defaults to True. + is_midas_pretrained (bool, optional): Is "core" pretrained? Defaults to True. + midas_lr_factor (int, optional): Learning rate reduction factor for base midas model except its encoder and positional encodings. Defaults to 10. + encoder_lr_factor (int, optional): Learning rate reduction factor for the encoder in midas model. Defaults to 10. + pos_enc_lr_factor (int, optional): Learning rate reduction factor for positional encodings in the base midas model. Defaults to 10. + + """ + + super().__init__() + + self.core = core + self.bin_conf = bin_conf + self.min_temp = min_temp + self.max_temp = max_temp + self.memory_efficient = memory_efficient + self.train_midas = train_midas + self.is_midas_pretrained = is_midas_pretrained + self.midas_lr_factor = midas_lr_factor + self.encoder_lr_factor = encoder_lr_factor + self.pos_enc_lr_factor = pos_enc_lr_factor + self.inverse_midas = inverse_midas + + N_MIDAS_OUT = 32 + btlnck_features = self.core.output_channels[0] + num_out_features = self.core.output_channels[1:] + # self.scales = [16, 8, 4, 2] # spatial scale factors + + self.conv2 = nn.Conv2d( + btlnck_features, btlnck_features, kernel_size=1, stride=1, padding=0) + + # Transformer classifier on the bottleneck + self.patch_transformer = PatchTransformerEncoder( + btlnck_features, 1, 128, use_class_token=True) + self.mlp_classifier = nn.Sequential( + nn.Linear(128, 128), + nn.ReLU(), + nn.Linear(128, 2) + ) + + if bin_centers_type == "normed": + SeedBinRegressorLayer = SeedBinRegressor + Attractor = AttractorLayer + elif bin_centers_type == "softplus": + SeedBinRegressorLayer = SeedBinRegressorUnnormed + Attractor = AttractorLayerUnnormed + elif bin_centers_type == "hybrid1": + SeedBinRegressorLayer = SeedBinRegressor + Attractor = AttractorLayerUnnormed + elif bin_centers_type == "hybrid2": + SeedBinRegressorLayer = SeedBinRegressorUnnormed + Attractor = AttractorLayer + else: + raise ValueError( + "bin_centers_type should be one of 'normed', 'softplus', 'hybrid1', 'hybrid2'") + self.bin_centers_type = bin_centers_type + # We have bins for each bin conf. + # Create a map (ModuleDict) of 'name' -> seed_bin_regressor + self.seed_bin_regressors = nn.ModuleDict( + {conf['name']: SeedBinRegressorLayer(btlnck_features, conf["n_bins"], mlp_dim=bin_embedding_dim//2, min_depth=conf["min_depth"], max_depth=conf["max_depth"]) + for conf in bin_conf} + ) + + self.seed_projector = Projector( + btlnck_features, bin_embedding_dim, mlp_dim=bin_embedding_dim//2) + self.projectors = nn.ModuleList([ + Projector(num_out, bin_embedding_dim, mlp_dim=bin_embedding_dim//2) + for num_out in num_out_features + ]) + + # Create a map (ModuleDict) of 'name' -> attractors (ModuleList) + self.attractors = nn.ModuleDict( + {conf['name']: nn.ModuleList([ + Attractor(bin_embedding_dim, n_attractors[i], + mlp_dim=bin_embedding_dim, alpha=attractor_alpha, + gamma=attractor_gamma, kind=attractor_kind, + attractor_type=attractor_type, memory_efficient=memory_efficient, + min_depth=conf["min_depth"], max_depth=conf["max_depth"]) + for i in range(len(n_attractors)) + ]) + for conf in bin_conf} + ) + + last_in = N_MIDAS_OUT + # conditional log binomial for each bin conf + self.conditional_log_binomial = nn.ModuleDict( + {conf['name']: ConditionalLogBinomial(last_in, bin_embedding_dim, conf['n_bins'], bottleneck_factor=4, min_temp=self.min_temp, max_temp=self.max_temp) + for conf in bin_conf} + ) + + def forward(self, x, return_final_centers=False, denorm=False, return_probs=False, **kwargs): + """ + Args: + x (torch.Tensor): Input image tensor of shape (B, C, H, W). Assumes all images are from the same domain. + return_final_centers (bool, optional): Whether to return the final centers of the attractors. Defaults to False. + denorm (bool, optional): Whether to denormalize the input image. Defaults to False. + return_probs (bool, optional): Whether to return the probabilities of the bins. Defaults to False. + + Returns: + dict: Dictionary of outputs with keys: + - "rel_depth": Relative depth map of shape (B, 1, H, W) + - "metric_depth": Metric depth map of shape (B, 1, H, W) + - "domain_logits": Domain logits of shape (B, 2) + - "bin_centers": Bin centers of shape (B, N, H, W). Present only if return_final_centers is True + - "probs": Bin probabilities of shape (B, N, H, W). Present only if return_probs is True + """ + b, c, h, w = x.shape + self.orig_input_width = w + self.orig_input_height = h + rel_depth, out = self.core(x, denorm=denorm, return_rel_depth=True) + + outconv_activation = out[0] + btlnck = out[1] + x_blocks = out[2:] + + x_d0 = self.conv2(btlnck) + x = x_d0 + + # Predict which path to take + embedding = self.patch_transformer(x)[0] # N, E + domain_logits = self.mlp_classifier(embedding) # N, 2 + domain_vote = torch.softmax(domain_logits.sum( + dim=0, keepdim=True), dim=-1) # 1, 2 + + # Get the path + bin_conf_name = ["nyu", "kitti"][torch.argmax( + domain_vote, dim=-1).squeeze().item()] + + try: + conf = [c for c in self.bin_conf if c.name == bin_conf_name][0] + except IndexError: + raise ValueError( + f"bin_conf_name {bin_conf_name} not found in bin_confs") + + min_depth = conf['min_depth'] + max_depth = conf['max_depth'] + + seed_bin_regressor = self.seed_bin_regressors[bin_conf_name] + _, seed_b_centers = seed_bin_regressor(x) + if self.bin_centers_type == 'normed' or self.bin_centers_type == 'hybrid2': + b_prev = (seed_b_centers - min_depth)/(max_depth - min_depth) + else: + b_prev = seed_b_centers + prev_b_embedding = self.seed_projector(x) + + attractors = self.attractors[bin_conf_name] + for projector, attractor, x in zip(self.projectors, attractors, x_blocks): + b_embedding = projector(x) + b, b_centers = attractor( + b_embedding, b_prev, prev_b_embedding, interpolate=True) + b_prev = b + prev_b_embedding = b_embedding + + last = outconv_activation + + b_centers = nn.functional.interpolate( + b_centers, last.shape[-2:], mode='bilinear', align_corners=True) + b_embedding = nn.functional.interpolate( + b_embedding, last.shape[-2:], mode='bilinear', align_corners=True) + + clb = self.conditional_log_binomial[bin_conf_name] + x = clb(last, b_embedding) + + # Now depth value is Sum px * cx , where cx are bin_centers from the last bin tensor + # print(x.shape, b_centers.shape) + # b_centers = nn.functional.interpolate(b_centers, x.shape[-2:], mode='bilinear', align_corners=True) + out = torch.sum(x * b_centers, dim=1, keepdim=True) + + output = dict(domain_logits=domain_logits, metric_depth=out) + if return_final_centers or return_probs: + output['bin_centers'] = b_centers + + if return_probs: + output['probs'] = x + return output + + def get_lr_params(self, lr): + """ + Learning rate configuration for different layers of the model + + Args: + lr (float) : Base learning rate + Returns: + list : list of parameters to optimize and their learning rates, in the format required by torch optimizers. + """ + param_conf = [] + if self.train_midas: + def get_rel_pos_params(): + for name, p in self.core.core.pretrained.named_parameters(): + if "relative_position" in name: + yield p + + def get_enc_params_except_rel_pos(): + for name, p in self.core.core.pretrained.named_parameters(): + if "relative_position" not in name: + yield p + + encoder_params = get_enc_params_except_rel_pos() + rel_pos_params = get_rel_pos_params() + midas_params = self.core.core.scratch.parameters() + midas_lr_factor = self.midas_lr_factor if self.is_midas_pretrained else 1.0 + param_conf.extend([ + {'params': encoder_params, 'lr': lr / self.encoder_lr_factor}, + {'params': rel_pos_params, 'lr': lr / self.pos_enc_lr_factor}, + {'params': midas_params, 'lr': lr / midas_lr_factor} + ]) + + remaining_modules = [] + for name, child in self.named_children(): + if name != 'core': + remaining_modules.append(child) + remaining_params = itertools.chain( + *[child.parameters() for child in remaining_modules]) + param_conf.append({'params': remaining_params, 'lr': lr}) + return param_conf + + def get_conf_parameters(self, conf_name): + """ + Returns parameters of all the ModuleDicts children that are exclusively used for the given bin configuration + """ + params = [] + for name, child in self.named_children(): + if isinstance(child, nn.ModuleDict): + for bin_conf_name, module in child.items(): + if bin_conf_name == conf_name: + params += list(module.parameters()) + return params + + def freeze_conf(self, conf_name): + """ + Freezes all the parameters of all the ModuleDicts children that are exclusively used for the given bin configuration + """ + for p in self.get_conf_parameters(conf_name): + p.requires_grad = False + + def unfreeze_conf(self, conf_name): + """ + Unfreezes all the parameters of all the ModuleDicts children that are exclusively used for the given bin configuration + """ + for p in self.get_conf_parameters(conf_name): + p.requires_grad = True + + def freeze_all_confs(self): + """ + Freezes all the parameters of all the ModuleDicts children + """ + for name, child in self.named_children(): + if isinstance(child, nn.ModuleDict): + for bin_conf_name, module in child.items(): + for p in module.parameters(): + p.requires_grad = False + + @staticmethod + def build(midas_model_type="DPT_BEiT_L_384", pretrained_resource=None, use_pretrained_midas=False, train_midas=False, freeze_midas_bn=True, **kwargs): + core = MidasCore.build(midas_model_type=midas_model_type, use_pretrained_midas=use_pretrained_midas, + train_midas=train_midas, fetch_features=True, freeze_bn=freeze_midas_bn, **kwargs) + model = ZoeDepthNK(core, **kwargs) + if pretrained_resource: + assert isinstance(pretrained_resource, str), "pretrained_resource must be a string" + model = load_state_from_resource(model, pretrained_resource) + return model + + @staticmethod + def build_from_config(config): + return ZoeDepthNK.build(**config) diff --git a/dzoedepth/trainers/__init__.py b/dzoedepth/trainers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/dzoedepth/trainers/base_trainer.py b/dzoedepth/trainers/base_trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..cd01e019fcd39e40ccb2a9b57d1e19fcc4c5ac8f --- /dev/null +++ b/dzoedepth/trainers/base_trainer.py @@ -0,0 +1,326 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import os +import uuid +import warnings +from datetime import datetime as dt +from typing import Dict + +import matplotlib.pyplot as plt +import numpy as np +import torch +import torch.distributed as dist +import torch.nn as nn +import torch.optim as optim +import wandb +from tqdm import tqdm + +from zoedepth.utils.config import flatten +from zoedepth.utils.misc import RunningAverageDict, colorize, colors + + +def is_rank_zero(args): + return args.rank == 0 + + +class BaseTrainer: + def __init__(self, config, model, train_loader, test_loader=None, device=None): + """ Base Trainer class for training a model.""" + + self.config = config + self.metric_criterion = "abs_rel" + if device is None: + device = torch.device( + 'cuda') if torch.cuda.is_available() else torch.device('cpu') + self.device = device + self.model = model + self.train_loader = train_loader + self.test_loader = test_loader + self.optimizer = self.init_optimizer() + self.scheduler = self.init_scheduler() + + def resize_to_target(self, prediction, target): + if prediction.shape[2:] != target.shape[-2:]: + prediction = nn.functional.interpolate( + prediction, size=target.shape[-2:], mode="bilinear", align_corners=True + ) + return prediction + + def load_ckpt(self, checkpoint_dir="./checkpoints", ckpt_type="best"): + import glob + import os + + from zoedepth.models.model_io import load_wts + + if hasattr(self.config, "checkpoint"): + checkpoint = self.config.checkpoint + elif hasattr(self.config, "ckpt_pattern"): + pattern = self.config.ckpt_pattern + matches = glob.glob(os.path.join( + checkpoint_dir, f"*{pattern}*{ckpt_type}*")) + if not (len(matches) > 0): + raise ValueError(f"No matches found for the pattern {pattern}") + checkpoint = matches[0] + else: + return + model = load_wts(self.model, checkpoint) + # TODO : Resuming training is not properly supported in this repo. Implement loading / saving of optimizer and scheduler to support it. + print("Loaded weights from {0}".format(checkpoint)) + warnings.warn( + "Resuming training is not properly supported in this repo. Implement loading / saving of optimizer and scheduler to support it.") + self.model = model + + def init_optimizer(self): + m = self.model.module if self.config.multigpu else self.model + + if self.config.same_lr: + print("Using same LR") + if hasattr(m, 'core'): + m.core.unfreeze() + params = self.model.parameters() + else: + print("Using diff LR") + if not hasattr(m, 'get_lr_params'): + raise NotImplementedError( + f"Model {m.__class__.__name__} does not implement get_lr_params. Please implement it or use the same LR for all parameters.") + + params = m.get_lr_params(self.config.lr) + + return optim.AdamW(params, lr=self.config.lr, weight_decay=self.config.wd) + + def init_scheduler(self): + lrs = [l['lr'] for l in self.optimizer.param_groups] + return optim.lr_scheduler.OneCycleLR(self.optimizer, lrs, epochs=self.config.epochs, steps_per_epoch=len(self.train_loader), + cycle_momentum=self.config.cycle_momentum, + base_momentum=0.85, max_momentum=0.95, div_factor=self.config.div_factor, final_div_factor=self.config.final_div_factor, pct_start=self.config.pct_start, three_phase=self.config.three_phase) + + def train_on_batch(self, batch, train_step): + raise NotImplementedError + + def validate_on_batch(self, batch, val_step): + raise NotImplementedError + + def raise_if_nan(self, losses): + for key, value in losses.items(): + if torch.isnan(value): + raise ValueError(f"{key} is NaN, Stopping training") + + @property + def iters_per_epoch(self): + return len(self.train_loader) + + @property + def total_iters(self): + return self.config.epochs * self.iters_per_epoch + + def should_early_stop(self): + if self.config.get('early_stop', False) and self.step > self.config.early_stop: + return True + + def train(self): + print(f"Training {self.config.name}") + if self.config.uid is None: + self.config.uid = str(uuid.uuid4()).split('-')[-1] + run_id = f"{dt.now().strftime('%d-%h_%H-%M')}-{self.config.uid}" + self.config.run_id = run_id + self.config.experiment_id = f"{self.config.name}{self.config.version_name}_{run_id}" + self.should_write = ((not self.config.distributed) + or self.config.rank == 0) + self.should_log = self.should_write # and logging + if self.should_log: + tags = self.config.tags.split( + ',') if self.config.tags != '' else None + wandb.init(project=self.config.project, name=self.config.experiment_id, config=flatten(self.config), dir=self.config.root, + tags=tags, notes=self.config.notes, settings=wandb.Settings(start_method="fork")) + + self.model.train() + self.step = 0 + best_loss = np.inf + validate_every = int(self.config.validate_every * self.iters_per_epoch) + + + if self.config.prefetch: + + for i, batch in tqdm(enumerate(self.train_loader), desc=f"Prefetching...", + total=self.iters_per_epoch) if is_rank_zero(self.config) else enumerate(self.train_loader): + pass + + losses = {} + def stringify_losses(L): return "; ".join(map( + lambda kv: f"{colors.fg.purple}{kv[0]}{colors.reset}: {round(kv[1].item(),3):.4e}", L.items())) + for epoch in range(self.config.epochs): + if self.should_early_stop(): + break + + self.epoch = epoch + ################################# Train loop ########################################################## + if self.should_log: + wandb.log({"Epoch": epoch}, step=self.step) + pbar = tqdm(enumerate(self.train_loader), desc=f"Epoch: {epoch + 1}/{self.config.epochs}. Loop: Train", + total=self.iters_per_epoch) if is_rank_zero(self.config) else enumerate(self.train_loader) + for i, batch in pbar: + if self.should_early_stop(): + print("Early stopping") + break + # print(f"Batch {self.step+1} on rank {self.config.rank}") + losses = self.train_on_batch(batch, i) + # print(f"trained batch {self.step+1} on rank {self.config.rank}") + + self.raise_if_nan(losses) + if is_rank_zero(self.config) and self.config.print_losses: + pbar.set_description( + f"Epoch: {epoch + 1}/{self.config.epochs}. Loop: Train. Losses: {stringify_losses(losses)}") + self.scheduler.step() + + if self.should_log and self.step % 50 == 0: + wandb.log({f"Train/{name}": loss.item() + for name, loss in losses.items()}, step=self.step) + + self.step += 1 + + ######################################################################################################## + + if self.test_loader: + if (self.step % validate_every) == 0: + self.model.eval() + if self.should_write: + self.save_checkpoint( + f"{self.config.experiment_id}_latest.pt") + + ################################# Validation loop ################################################## + # validate on the entire validation set in every process but save only from rank 0, I know, inefficient, but avoids divergence of processes + metrics, test_losses = self.validate() + # print("Validated: {}".format(metrics)) + if self.should_log: + wandb.log( + {f"Test/{name}": tloss for name, tloss in test_losses.items()}, step=self.step) + + wandb.log({f"Metrics/{k}": v for k, + v in metrics.items()}, step=self.step) + + if (metrics[self.metric_criterion] < best_loss) and self.should_write: + self.save_checkpoint( + f"{self.config.experiment_id}_best.pt") + best_loss = metrics[self.metric_criterion] + + self.model.train() + + if self.config.distributed: + dist.barrier() + # print(f"Validated: {metrics} on device {self.config.rank}") + + # print(f"Finished step {self.step} on device {self.config.rank}") + ################################################################################################# + + # Save / validate at the end + self.step += 1 # log as final point + self.model.eval() + self.save_checkpoint(f"{self.config.experiment_id}_latest.pt") + if self.test_loader: + + ################################# Validation loop ################################################## + metrics, test_losses = self.validate() + # print("Validated: {}".format(metrics)) + if self.should_log: + wandb.log({f"Test/{name}": tloss for name, + tloss in test_losses.items()}, step=self.step) + wandb.log({f"Metrics/{k}": v for k, + v in metrics.items()}, step=self.step) + + if (metrics[self.metric_criterion] < best_loss) and self.should_write: + self.save_checkpoint( + f"{self.config.experiment_id}_best.pt") + best_loss = metrics[self.metric_criterion] + + self.model.train() + + def validate(self): + with torch.no_grad(): + losses_avg = RunningAverageDict() + metrics_avg = RunningAverageDict() + for i, batch in tqdm(enumerate(self.test_loader), desc=f"Epoch: {self.epoch + 1}/{self.config.epochs}. Loop: Validation", total=len(self.test_loader), disable=not is_rank_zero(self.config)): + metrics, losses = self.validate_on_batch(batch, val_step=i) + + if losses: + losses_avg.update(losses) + if metrics: + metrics_avg.update(metrics) + + return metrics_avg.get_value(), losses_avg.get_value() + + def save_checkpoint(self, filename): + if not self.should_write: + return + root = self.config.save_dir + if not os.path.isdir(root): + os.makedirs(root) + + fpath = os.path.join(root, filename) + m = self.model.module if self.config.multigpu else self.model + torch.save( + { + "model": m.state_dict(), + "optimizer": None, # TODO : Change to self.optimizer.state_dict() if resume support is needed, currently None to reduce file size + "epoch": self.epoch + }, fpath) + + def log_images(self, rgb: Dict[str, list] = {}, depth: Dict[str, list] = {}, scalar_field: Dict[str, list] = {}, prefix="", scalar_cmap="jet", min_depth=None, max_depth=None): + if not self.should_log: + return + + if min_depth is None: + try: + min_depth = self.config.min_depth + max_depth = self.config.max_depth + except AttributeError: + min_depth = None + max_depth = None + + depth = {k: colorize(v, vmin=min_depth, vmax=max_depth) + for k, v in depth.items()} + scalar_field = {k: colorize( + v, vmin=None, vmax=None, cmap=scalar_cmap) for k, v in scalar_field.items()} + images = {**rgb, **depth, **scalar_field} + wimages = { + prefix+"Predictions": [wandb.Image(v, caption=k) for k, v in images.items()]} + wandb.log(wimages, step=self.step) + + def log_line_plot(self, data): + if not self.should_log: + return + + plt.plot(data) + plt.ylabel("Scale factors") + wandb.log({"Scale factors": wandb.Image(plt)}, step=self.step) + plt.close() + + def log_bar_plot(self, title, labels, values): + if not self.should_log: + return + + data = [[label, val] for (label, val) in zip(labels, values)] + table = wandb.Table(data=data, columns=["label", "value"]) + wandb.log({title: wandb.plot.bar(table, "label", + "value", title=title)}, step=self.step) diff --git a/dzoedepth/trainers/builder.py b/dzoedepth/trainers/builder.py new file mode 100644 index 0000000000000000000000000000000000000000..1492b2924d638bbc045fe451e1f7e28b0b7eeb39 --- /dev/null +++ b/dzoedepth/trainers/builder.py @@ -0,0 +1,48 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +from importlib import import_module + + +def get_trainer(config): + """Builds and returns a trainer based on the config. + + Args: + config (dict): the config dict (typically constructed using utils.config.get_config) + config.trainer (str): the name of the trainer to use. The module named "{config.trainer}_trainer" must exist in trainers root module + + Raises: + ValueError: If the specified trainer does not exist under trainers/ folder + + Returns: + Trainer (inherited from zoedepth.trainers.BaseTrainer): The Trainer object + """ + assert "trainer" in config and config.trainer is not None and config.trainer != '', "Trainer not specified. Config: {0}".format( + config) + try: + Trainer = getattr(import_module( + f"zoedepth.trainers.{config.trainer}_trainer"), 'Trainer') + except ModuleNotFoundError as e: + raise ValueError(f"Trainer {config.trainer}_trainer not found.") from e + return Trainer diff --git a/dzoedepth/trainers/loss.py b/dzoedepth/trainers/loss.py new file mode 100644 index 0000000000000000000000000000000000000000..a1f50fe6bd4b5d3dbe65b0f4b3a260ec1483ecf7 --- /dev/null +++ b/dzoedepth/trainers/loss.py @@ -0,0 +1,316 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.cuda.amp as amp +import numpy as np + + +KEY_OUTPUT = 'metric_depth' + + +def extract_key(prediction, key): + if isinstance(prediction, dict): + return prediction[key] + return prediction + + +# Main loss function used for ZoeDepth. Copy/paste from AdaBins repo (https://github.com/shariqfarooq123/AdaBins/blob/0952d91e9e762be310bb4cd055cbfe2448c0ce20/loss.py#L7) +class SILogLoss(nn.Module): + """SILog loss (pixel-wise)""" + def __init__(self, beta=0.15): + super(SILogLoss, self).__init__() + self.name = 'SILog' + self.beta = beta + + def forward(self, input, target, mask=None, interpolate=True, return_interpolated=False): + input = extract_key(input, KEY_OUTPUT) + if input.shape[-1] != target.shape[-1] and interpolate: + input = nn.functional.interpolate( + input, target.shape[-2:], mode='bilinear', align_corners=True) + intr_input = input + else: + intr_input = input + + if target.ndim == 3: + target = target.unsqueeze(1) + + if mask is not None: + if mask.ndim == 3: + mask = mask.unsqueeze(1) + + input = input[mask] + target = target[mask] + + with amp.autocast(enabled=False): # amp causes NaNs in this loss function + alpha = 1e-7 + g = torch.log(input + alpha) - torch.log(target + alpha) + + # n, c, h, w = g.shape + # norm = 1/(h*w) + # Dg = norm * torch.sum(g**2) - (0.85/(norm**2)) * (torch.sum(g))**2 + + Dg = torch.var(g) + self.beta * torch.pow(torch.mean(g), 2) + + loss = 10 * torch.sqrt(Dg) + + if torch.isnan(loss): + print("Nan SILog loss") + print("input:", input.shape) + print("target:", target.shape) + print("G", torch.sum(torch.isnan(g))) + print("Input min max", torch.min(input), torch.max(input)) + print("Target min max", torch.min(target), torch.max(target)) + print("Dg", torch.isnan(Dg)) + print("loss", torch.isnan(loss)) + + if not return_interpolated: + return loss + + return loss, intr_input + + +def grad(x): + # x.shape : n, c, h, w + diff_x = x[..., 1:, 1:] - x[..., 1:, :-1] + diff_y = x[..., 1:, 1:] - x[..., :-1, 1:] + mag = diff_x**2 + diff_y**2 + # angle_ratio + angle = torch.atan(diff_y / (diff_x + 1e-10)) + return mag, angle + + +def grad_mask(mask): + return mask[..., 1:, 1:] & mask[..., 1:, :-1] & mask[..., :-1, 1:] + + +class GradL1Loss(nn.Module): + """Gradient loss""" + def __init__(self): + super(GradL1Loss, self).__init__() + self.name = 'GradL1' + + def forward(self, input, target, mask=None, interpolate=True, return_interpolated=False): + input = extract_key(input, KEY_OUTPUT) + if input.shape[-1] != target.shape[-1] and interpolate: + input = nn.functional.interpolate( + input, target.shape[-2:], mode='bilinear', align_corners=True) + intr_input = input + else: + intr_input = input + + grad_gt = grad(target) + grad_pred = grad(input) + mask_g = grad_mask(mask) + + loss = nn.functional.l1_loss(grad_pred[0][mask_g], grad_gt[0][mask_g]) + loss = loss + \ + nn.functional.l1_loss(grad_pred[1][mask_g], grad_gt[1][mask_g]) + if not return_interpolated: + return loss + return loss, intr_input + + +class OrdinalRegressionLoss(object): + + def __init__(self, ord_num, beta, discretization="SID"): + self.ord_num = ord_num + self.beta = beta + self.discretization = discretization + + def _create_ord_label(self, gt): + N,one, H, W = gt.shape + # print("gt shape:", gt.shape) + + ord_c0 = torch.ones(N, self.ord_num, H, W).to(gt.device) + if self.discretization == "SID": + label = self.ord_num * torch.log(gt) / np.log(self.beta) + else: + label = self.ord_num * (gt - 1.0) / (self.beta - 1.0) + label = label.long() + mask = torch.linspace(0, self.ord_num - 1, self.ord_num, requires_grad=False) \ + .view(1, self.ord_num, 1, 1).to(gt.device) + mask = mask.repeat(N, 1, H, W).contiguous().long() + mask = (mask > label) + ord_c0[mask] = 0 + ord_c1 = 1 - ord_c0 + # implementation according to the paper. + # ord_label = torch.ones(N, self.ord_num * 2, H, W).to(gt.device) + # ord_label[:, 0::2, :, :] = ord_c0 + # ord_label[:, 1::2, :, :] = ord_c1 + # reimplementation for fast speed. + ord_label = torch.cat((ord_c0, ord_c1), dim=1) + return ord_label, mask + + def __call__(self, prob, gt): + """ + :param prob: ordinal regression probability, N x 2*Ord Num x H x W, torch.Tensor + :param gt: depth ground truth, NXHxW, torch.Tensor + :return: loss: loss value, torch.float + """ + # N, C, H, W = prob.shape + valid_mask = gt > 0. + ord_label, mask = self._create_ord_label(gt) + # print("prob shape: {}, ord label shape: {}".format(prob.shape, ord_label.shape)) + entropy = -prob * ord_label + loss = torch.sum(entropy, dim=1)[valid_mask.squeeze(1)] + return loss.mean() + + +class DiscreteNLLLoss(nn.Module): + """Cross entropy loss""" + def __init__(self, min_depth=1e-3, max_depth=10, depth_bins=64): + super(DiscreteNLLLoss, self).__init__() + self.name = 'CrossEntropy' + self.ignore_index = -(depth_bins + 1) + # self._loss_func = nn.NLLLoss(ignore_index=self.ignore_index) + self._loss_func = nn.CrossEntropyLoss(ignore_index=self.ignore_index) + self.min_depth = min_depth + self.max_depth = max_depth + self.depth_bins = depth_bins + self.alpha = 1 + self.zeta = 1 - min_depth + self.beta = max_depth + self.zeta + + def quantize_depth(self, depth): + # depth : N1HW + # output : NCHW + + # Quantize depth log-uniformly on [1, self.beta] into self.depth_bins bins + depth = torch.log(depth / self.alpha) / np.log(self.beta / self.alpha) + depth = depth * (self.depth_bins - 1) + depth = torch.round(depth) + depth = depth.long() + return depth + + + + def _dequantize_depth(self, depth): + """ + Inverse of quantization + depth : NCHW -> N1HW + """ + # Get the center of the bin + + + + + def forward(self, input, target, mask=None, interpolate=True, return_interpolated=False): + input = extract_key(input, KEY_OUTPUT) + # assert torch.all(input <= 0), "Input should be negative" + + if input.shape[-1] != target.shape[-1] and interpolate: + input = nn.functional.interpolate( + input, target.shape[-2:], mode='bilinear', align_corners=True) + intr_input = input + else: + intr_input = input + + # assert torch.all(input)<=1) + if target.ndim == 3: + target = target.unsqueeze(1) + + target = self.quantize_depth(target) + if mask is not None: + if mask.ndim == 3: + mask = mask.unsqueeze(1) + + # Set the mask to ignore_index + mask = mask.long() + input = input * mask + (1 - mask) * self.ignore_index + target = target * mask + (1 - mask) * self.ignore_index + + + + input = input.flatten(2) # N, nbins, H*W + target = target.flatten(1) # N, H*W + loss = self._loss_func(input, target) + + if not return_interpolated: + return loss + return loss, intr_input + + + + +def compute_scale_and_shift(prediction, target, mask): + # system matrix: A = [[a_00, a_01], [a_10, a_11]] + a_00 = torch.sum(mask * prediction * prediction, (1, 2)) + a_01 = torch.sum(mask * prediction, (1, 2)) + a_11 = torch.sum(mask, (1, 2)) + + # right hand side: b = [b_0, b_1] + b_0 = torch.sum(mask * prediction * target, (1, 2)) + b_1 = torch.sum(mask * target, (1, 2)) + + # solution: x = A^-1 . b = [[a_11, -a_01], [-a_10, a_00]] / (a_00 * a_11 - a_01 * a_10) . b + x_0 = torch.zeros_like(b_0) + x_1 = torch.zeros_like(b_1) + + det = a_00 * a_11 - a_01 * a_01 + # A needs to be a positive definite matrix. + valid = det > 0 + + x_0[valid] = (a_11[valid] * b_0[valid] - a_01[valid] * b_1[valid]) / det[valid] + x_1[valid] = (-a_01[valid] * b_0[valid] + a_00[valid] * b_1[valid]) / det[valid] + + return x_0, x_1 +class ScaleAndShiftInvariantLoss(nn.Module): + def __init__(self): + super().__init__() + self.name = "SSILoss" + + def forward(self, prediction, target, mask, interpolate=True, return_interpolated=False): + + if prediction.shape[-1] != target.shape[-1] and interpolate: + prediction = nn.functional.interpolate(prediction, target.shape[-2:], mode='bilinear', align_corners=True) + intr_input = prediction + else: + intr_input = prediction + + + prediction, target, mask = prediction.squeeze(), target.squeeze(), mask.squeeze() + assert prediction.shape == target.shape, f"Shape mismatch: Expected same shape but got {prediction.shape} and {target.shape}." + + scale, shift = compute_scale_and_shift(prediction, target, mask) + + scaled_prediction = scale.view(-1, 1, 1) * prediction + shift.view(-1, 1, 1) + + loss = nn.functional.l1_loss(scaled_prediction[mask], target[mask]) + if not return_interpolated: + return loss + return loss, intr_input + + + + +if __name__ == '__main__': + # Tests for DiscreteNLLLoss + celoss = DiscreteNLLLoss() + print(celoss(torch.rand(4, 64, 26, 32)*10, torch.rand(4, 1, 26, 32)*10, )) + + d = torch.Tensor([6.59, 3.8, 10.0]) + print(celoss.dequantize_depth(celoss.quantize_depth(d))) diff --git a/dzoedepth/trainers/zoedepth_nk_trainer.py b/dzoedepth/trainers/zoedepth_nk_trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..d697ac1f8b52940ec3476db9076e5516fa8577c7 --- /dev/null +++ b/dzoedepth/trainers/zoedepth_nk_trainer.py @@ -0,0 +1,143 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import torch +import torch.cuda.amp as amp +import torch.nn as nn + +from zoedepth.trainers.loss import GradL1Loss, SILogLoss +from zoedepth.utils.config import DATASETS_CONFIG +from zoedepth.utils.misc import compute_metrics + +from .base_trainer import BaseTrainer + + +class Trainer(BaseTrainer): + def __init__(self, config, model, train_loader, test_loader=None, device=None): + super().__init__(config, model, train_loader, + test_loader=test_loader, device=device) + self.device = device + self.silog_loss = SILogLoss() + self.grad_loss = GradL1Loss() + self.domain_classifier_loss = nn.CrossEntropyLoss() + + self.scaler = amp.GradScaler(enabled=self.config.use_amp) + + def train_on_batch(self, batch, train_step): + """ + Expects a batch of images and depth as input + batch["image"].shape : batch_size, c, h, w + batch["depth"].shape : batch_size, 1, h, w + + Assumes all images in a batch are from the same dataset + """ + + images, depths_gt = batch['image'].to( + self.device), batch['depth'].to(self.device) + # batch['dataset'] is a tensor strings all valued either 'nyu' or 'kitti'. labels nyu -> 0, kitti -> 1 + dataset = batch['dataset'][0] + # Convert to 0s or 1s + domain_labels = torch.Tensor([dataset == 'kitti' for _ in range( + images.size(0))]).to(torch.long).to(self.device) + + # m = self.model.module if self.config.multigpu else self.model + + b, c, h, w = images.size() + mask = batch["mask"].to(self.device).to(torch.bool) + + losses = {} + + with amp.autocast(enabled=self.config.use_amp): + output = self.model(images) + pred_depths = output['metric_depth'] + domain_logits = output['domain_logits'] + + l_si, pred = self.silog_loss( + pred_depths, depths_gt, mask=mask, interpolate=True, return_interpolated=True) + loss = self.config.w_si * l_si + losses[self.silog_loss.name] = l_si + + if self.config.w_grad > 0: + l_grad = self.grad_loss(pred, depths_gt, mask=mask) + loss = loss + self.config.w_grad * l_grad + losses[self.grad_loss.name] = l_grad + else: + l_grad = torch.Tensor([0]) + + if self.config.w_domain > 0: + l_domain = self.domain_classifier_loss( + domain_logits, domain_labels) + loss = loss + self.config.w_domain * l_domain + losses["DomainLoss"] = l_domain + else: + l_domain = torch.Tensor([0.]) + + self.scaler.scale(loss).backward() + + if self.config.clip_grad > 0: + self.scaler.unscale_(self.optimizer) + nn.utils.clip_grad_norm_( + self.model.parameters(), self.config.clip_grad) + + self.scaler.step(self.optimizer) + + if self.should_log and self.step > 1 and (self.step % int(self.config.log_images_every * self.iters_per_epoch)) == 0: + depths_gt[torch.logical_not(mask)] = -99 + self.log_images(rgb={"Input": images[0, ...]}, depth={"GT": depths_gt[0], "PredictedMono": pred[0]}, prefix="Train", + min_depth=DATASETS_CONFIG[dataset]['min_depth'], max_depth=DATASETS_CONFIG[dataset]['max_depth']) + + self.scaler.update() + self.optimizer.zero_grad(set_to_none=True) + + return losses + + def validate_on_batch(self, batch, val_step): + images = batch['image'].to(self.device) + depths_gt = batch['depth'].to(self.device) + dataset = batch['dataset'][0] + if 'has_valid_depth' in batch: + if not batch['has_valid_depth']: + return None, None + + depths_gt = depths_gt.squeeze().unsqueeze(0).unsqueeze(0) + with amp.autocast(enabled=self.config.use_amp): + m = self.model.module if self.config.multigpu else self.model + pred_depths = m(images)["metric_depth"] + pred_depths = pred_depths.squeeze().unsqueeze(0).unsqueeze(0) + + mask = torch.logical_and( + depths_gt > self.config.min_depth, depths_gt < self.config.max_depth) + with amp.autocast(enabled=self.config.use_amp): + l_depth = self.silog_loss( + pred_depths, depths_gt, mask=mask.to(torch.bool), interpolate=True) + + metrics = compute_metrics(depths_gt, pred_depths, **self.config) + losses = {f"{self.silog_loss.name}": l_depth.item()} + + if val_step == 1 and self.should_log: + depths_gt[torch.logical_not(mask)] = -99 + self.log_images(rgb={"Input": images[0]}, depth={"GT": depths_gt[0], "PredictedMono": pred_depths[0]}, prefix="Test", + min_depth=DATASETS_CONFIG[dataset]['min_depth'], max_depth=DATASETS_CONFIG[dataset]['max_depth']) + + return metrics, losses diff --git a/dzoedepth/trainers/zoedepth_trainer.py b/dzoedepth/trainers/zoedepth_trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..0a38b0e52782f883b4acef26d728ee15daaf0f8f --- /dev/null +++ b/dzoedepth/trainers/zoedepth_trainer.py @@ -0,0 +1,177 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import torch +import torch.cuda.amp as amp +import torch.nn as nn + +from zoedepth.trainers.loss import GradL1Loss, SILogLoss +from zoedepth.utils.config import DATASETS_CONFIG +from zoedepth.utils.misc import compute_metrics +from zoedepth.data.preprocess import get_black_border + +from .base_trainer import BaseTrainer +from torchvision import transforms +from PIL import Image +import numpy as np + +class Trainer(BaseTrainer): + def __init__(self, config, model, train_loader, test_loader=None, device=None): + super().__init__(config, model, train_loader, + test_loader=test_loader, device=device) + self.device = device + self.silog_loss = SILogLoss() + self.grad_loss = GradL1Loss() + self.scaler = amp.GradScaler(enabled=self.config.use_amp) + + def train_on_batch(self, batch, train_step): + """ + Expects a batch of images and depth as input + batch["image"].shape : batch_size, c, h, w + batch["depth"].shape : batch_size, 1, h, w + """ + + images, depths_gt = batch['image'].to( + self.device), batch['depth'].to(self.device) + dataset = batch['dataset'][0] + + b, c, h, w = images.size() + mask = batch["mask"].to(self.device).to(torch.bool) + + losses = {} + + with amp.autocast(enabled=self.config.use_amp): + + output = self.model(images) + pred_depths = output['metric_depth'] + + l_si, pred = self.silog_loss( + pred_depths, depths_gt, mask=mask, interpolate=True, return_interpolated=True) + loss = self.config.w_si * l_si + losses[self.silog_loss.name] = l_si + + if self.config.w_grad > 0: + l_grad = self.grad_loss(pred, depths_gt, mask=mask) + loss = loss + self.config.w_grad * l_grad + losses[self.grad_loss.name] = l_grad + else: + l_grad = torch.Tensor([0]) + + self.scaler.scale(loss).backward() + + if self.config.clip_grad > 0: + self.scaler.unscale_(self.optimizer) + nn.utils.clip_grad_norm_( + self.model.parameters(), self.config.clip_grad) + + self.scaler.step(self.optimizer) + + if self.should_log and (self.step % int(self.config.log_images_every * self.iters_per_epoch)) == 0: + # -99 is treated as invalid depth in the log_images function and is colored grey. + depths_gt[torch.logical_not(mask)] = -99 + + self.log_images(rgb={"Input": images[0, ...]}, depth={"GT": depths_gt[0], "PredictedMono": pred[0]}, prefix="Train", + min_depth=DATASETS_CONFIG[dataset]['min_depth'], max_depth=DATASETS_CONFIG[dataset]['max_depth']) + + if self.config.get("log_rel", False): + self.log_images( + scalar_field={"RelPred": output["relative_depth"][0]}, prefix="TrainRel") + + self.scaler.update() + self.optimizer.zero_grad() + + return losses + + @torch.no_grad() + def eval_infer(self, x): + with amp.autocast(enabled=self.config.use_amp): + m = self.model.module if self.config.multigpu else self.model + pred_depths = m(x)['metric_depth'] + return pred_depths + + @torch.no_grad() + def crop_aware_infer(self, x): + # if we are not avoiding the black border, we can just use the normal inference + if not self.config.get("avoid_boundary", False): + return self.eval_infer(x) + + # otherwise, we need to crop the image to avoid the black border + # For now, this may be a bit slow due to converting to numpy and back + # We assume no normalization is done on the input image + + # get the black border + assert x.shape[0] == 1, "Only batch size 1 is supported for now" + x_pil = transforms.ToPILImage()(x[0].cpu()) + x_np = np.array(x_pil, dtype=np.uint8) + black_border_params = get_black_border(x_np) + top, bottom, left, right = black_border_params.top, black_border_params.bottom, black_border_params.left, black_border_params.right + x_np_cropped = x_np[top:bottom, left:right, :] + x_cropped = transforms.ToTensor()(Image.fromarray(x_np_cropped)) + + # run inference on the cropped image + pred_depths_cropped = self.eval_infer(x_cropped.unsqueeze(0).to(self.device)) + + # resize the prediction to x_np_cropped's size + pred_depths_cropped = nn.functional.interpolate( + pred_depths_cropped, size=(x_np_cropped.shape[0], x_np_cropped.shape[1]), mode="bilinear", align_corners=False) + + + # pad the prediction back to the original size + pred_depths = torch.zeros((1, 1, x_np.shape[0], x_np.shape[1]), device=pred_depths_cropped.device, dtype=pred_depths_cropped.dtype) + pred_depths[:, :, top:bottom, left:right] = pred_depths_cropped + + return pred_depths + + + + def validate_on_batch(self, batch, val_step): + images = batch['image'].to(self.device) + depths_gt = batch['depth'].to(self.device) + dataset = batch['dataset'][0] + mask = batch["mask"].to(self.device) + if 'has_valid_depth' in batch: + if not batch['has_valid_depth']: + return None, None + + depths_gt = depths_gt.squeeze().unsqueeze(0).unsqueeze(0) + mask = mask.squeeze().unsqueeze(0).unsqueeze(0) + if dataset == 'nyu': + pred_depths = self.crop_aware_infer(images) + else: + pred_depths = self.eval_infer(images) + pred_depths = pred_depths.squeeze().unsqueeze(0).unsqueeze(0) + + with amp.autocast(enabled=self.config.use_amp): + l_depth = self.silog_loss( + pred_depths, depths_gt, mask=mask.to(torch.bool), interpolate=True) + + metrics = compute_metrics(depths_gt, pred_depths, **self.config) + losses = {f"{self.silog_loss.name}": l_depth.item()} + + if val_step == 1 and self.should_log: + depths_gt[torch.logical_not(mask)] = -99 + self.log_images(rgb={"Input": images[0]}, depth={"GT": depths_gt[0], "PredictedMono": pred_depths[0]}, prefix="Test", + min_depth=DATASETS_CONFIG[dataset]['min_depth'], max_depth=DATASETS_CONFIG[dataset]['max_depth']) + + return metrics, losses diff --git a/dzoedepth/utils/__init__.py b/dzoedepth/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1ae1a1e4e86d9a5b14586cd006ed43d2bbc9b4a6 --- /dev/null +++ b/dzoedepth/utils/__init__.py @@ -0,0 +1,24 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + diff --git a/dzoedepth/utils/__pycache__/__init__.cpython-310.pyc b/dzoedepth/utils/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cda73b9af12d1cad367af7d070c66b382a7b7c1e Binary files /dev/null and b/dzoedepth/utils/__pycache__/__init__.cpython-310.pyc differ diff --git a/dzoedepth/utils/__pycache__/__init__.cpython-311.pyc b/dzoedepth/utils/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..312137d7d564a82eb9d71f83989ee26c326656d0 Binary files /dev/null and b/dzoedepth/utils/__pycache__/__init__.cpython-311.pyc differ diff --git a/dzoedepth/utils/__pycache__/__init__.cpython-312.pyc b/dzoedepth/utils/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..295e2d18df3daabb29d28863032b6c89d60751d8 Binary files /dev/null and b/dzoedepth/utils/__pycache__/__init__.cpython-312.pyc differ diff --git a/dzoedepth/utils/__pycache__/arg_utils.cpython-310.pyc b/dzoedepth/utils/__pycache__/arg_utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6a6425952fe21b2391a8bf1b51517dbf439e33ae Binary files /dev/null and b/dzoedepth/utils/__pycache__/arg_utils.cpython-310.pyc differ diff --git a/dzoedepth/utils/__pycache__/arg_utils.cpython-311.pyc b/dzoedepth/utils/__pycache__/arg_utils.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..066090e653a7d8c2e2ab8432534566ae8700106b Binary files /dev/null and b/dzoedepth/utils/__pycache__/arg_utils.cpython-311.pyc differ diff --git a/dzoedepth/utils/__pycache__/arg_utils.cpython-312.pyc b/dzoedepth/utils/__pycache__/arg_utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..af3b4223aa1e8e5adb107eea5c653b04ebca5ddb Binary files /dev/null and b/dzoedepth/utils/__pycache__/arg_utils.cpython-312.pyc differ diff --git a/dzoedepth/utils/__pycache__/config.cpython-310.pyc b/dzoedepth/utils/__pycache__/config.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d7bd27fb84c4a3866ba72db2851d5f75593fe2ff Binary files /dev/null and b/dzoedepth/utils/__pycache__/config.cpython-310.pyc differ diff --git a/dzoedepth/utils/__pycache__/config.cpython-311.pyc b/dzoedepth/utils/__pycache__/config.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..84c0615304014246139ba07547b5219d1873c34e Binary files /dev/null and b/dzoedepth/utils/__pycache__/config.cpython-311.pyc differ diff --git a/dzoedepth/utils/__pycache__/config.cpython-312.pyc b/dzoedepth/utils/__pycache__/config.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a3238401bfcc309b9e8f107d442f1d8bab853f67 Binary files /dev/null and b/dzoedepth/utils/__pycache__/config.cpython-312.pyc differ diff --git a/dzoedepth/utils/__pycache__/geometry.cpython-312.pyc b/dzoedepth/utils/__pycache__/geometry.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..886f157346e2738013a62959d0aaa86b09089942 Binary files /dev/null and b/dzoedepth/utils/__pycache__/geometry.cpython-312.pyc differ diff --git a/dzoedepth/utils/arg_utils.py b/dzoedepth/utils/arg_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..5124e8c617874b6457b7dbaebeec61c166577933 --- /dev/null +++ b/dzoedepth/utils/arg_utils.py @@ -0,0 +1,33 @@ + + +def infer_type(x): # hacky way to infer type from string args + if not isinstance(x, str): + return x + + try: + x = int(x) + return x + except ValueError: + pass + + try: + x = float(x) + return x + except ValueError: + pass + + return x + + +def parse_unknown(unknown_args): + clean = [] + for a in unknown_args: + if "=" in a: + k, v = a.split("=") + clean.extend([k, v]) + else: + clean.append(a) + + keys = clean[::2] + values = clean[1::2] + return {k.replace("--", ""): infer_type(v) for k, v in zip(keys, values)} diff --git a/dzoedepth/utils/config.py b/dzoedepth/utils/config.py new file mode 100644 index 0000000000000000000000000000000000000000..2928a82af6018217b833b77aa246884ba697ced0 --- /dev/null +++ b/dzoedepth/utils/config.py @@ -0,0 +1,437 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import json +import os + +from dzoedepth.utils.easydict import EasyDict as edict + +from dzoedepth.utils.arg_utils import infer_type +import pathlib +import platform + +ROOT = pathlib.Path(__file__).parent.parent.resolve() + +HOME_DIR = os.path.expanduser("~") + +COMMON_CONFIG = { + "save_dir": os.path.expanduser("~/shortcuts/monodepth3_checkpoints"), + "project": "ZoeDepth", + "tags": '', + "notes": "", + "gpu": None, + "root": ".", + "uid": None, + "print_losses": False +} + +DATASETS_CONFIG = { + "kitti": { + "dataset": "kitti", + "min_depth": 0.001, + "max_depth": 80, + "data_path": os.path.join(HOME_DIR, "shortcuts/datasets/kitti/raw"), + "gt_path": os.path.join(HOME_DIR, "shortcuts/datasets/kitti/gts"), + "filenames_file": "./train_test_inputs/kitti_eigen_train_files_with_gt.txt", + "input_height": 352, + "input_width": 1216, # 704 + "data_path_eval": os.path.join(HOME_DIR, "shortcuts/datasets/kitti/raw"), + "gt_path_eval": os.path.join(HOME_DIR, "shortcuts/datasets/kitti/gts"), + "filenames_file_eval": "./train_test_inputs/kitti_eigen_test_files_with_gt.txt", + + "min_depth_eval": 1e-3, + "max_depth_eval": 80, + + "do_random_rotate": True, + "degree": 1.0, + "do_kb_crop": True, + "garg_crop": True, + "eigen_crop": False, + "use_right": False + }, + "kitti_test": { + "dataset": "kitti", + "min_depth": 0.001, + "max_depth": 80, + "data_path": os.path.join(HOME_DIR, "shortcuts/datasets/kitti/raw"), + "gt_path": os.path.join(HOME_DIR, "shortcuts/datasets/kitti/gts"), + "filenames_file": "./train_test_inputs/kitti_eigen_train_files_with_gt.txt", + "input_height": 352, + "input_width": 1216, + "data_path_eval": os.path.join(HOME_DIR, "shortcuts/datasets/kitti/raw"), + "gt_path_eval": os.path.join(HOME_DIR, "shortcuts/datasets/kitti/gts"), + "filenames_file_eval": "./train_test_inputs/kitti_eigen_test_files_with_gt.txt", + + "min_depth_eval": 1e-3, + "max_depth_eval": 80, + + "do_random_rotate": False, + "degree": 1.0, + "do_kb_crop": True, + "garg_crop": True, + "eigen_crop": False, + "use_right": False + }, + "nyu": { + "dataset": "nyu", + "avoid_boundary": False, + "min_depth": 1e-3, # originally 0.1 + "max_depth": 10, + "data_path": os.path.join(HOME_DIR, "shortcuts/datasets/nyu_depth_v2/sync/"), + "gt_path": os.path.join(HOME_DIR, "shortcuts/datasets/nyu_depth_v2/sync/"), + "filenames_file": "./train_test_inputs/nyudepthv2_train_files_with_gt.txt", + "input_height": 480, + "input_width": 640, + "data_path_eval": os.path.join(HOME_DIR, "shortcuts/datasets/nyu_depth_v2/official_splits/test/"), + "gt_path_eval": os.path.join(HOME_DIR, "shortcuts/datasets/nyu_depth_v2/official_splits/test/"), + "filenames_file_eval": "./train_test_inputs/nyudepthv2_test_files_with_gt.txt", + "min_depth_eval": 1e-3, + "max_depth_eval": 10, + "min_depth_diff": -10, + "max_depth_diff": 10, + + "do_random_rotate": True, + "degree": 1.0, + "do_kb_crop": False, + "garg_crop": False, + "eigen_crop": True + }, + "ibims": { + "dataset": "ibims", + "ibims_root": os.path.join(HOME_DIR, "shortcuts/datasets/ibims/ibims1_core_raw/"), + "eigen_crop": True, + "garg_crop": False, + "do_kb_crop": False, + "min_depth_eval": 0, + "max_depth_eval": 10, + "min_depth": 1e-3, + "max_depth": 10 + }, + "sunrgbd": { + "dataset": "sunrgbd", + "sunrgbd_root": os.path.join(HOME_DIR, "shortcuts/datasets/SUNRGBD/test/"), + "eigen_crop": True, + "garg_crop": False, + "do_kb_crop": False, + "min_depth_eval": 0, + "max_depth_eval": 8, + "min_depth": 1e-3, + "max_depth": 10 + }, + "diml_indoor": { + "dataset": "diml_indoor", + "diml_indoor_root": os.path.join(HOME_DIR, "shortcuts/datasets/diml_indoor_test/"), + "eigen_crop": True, + "garg_crop": False, + "do_kb_crop": False, + "min_depth_eval": 0, + "max_depth_eval": 10, + "min_depth": 1e-3, + "max_depth": 10 + }, + "diml_outdoor": { + "dataset": "diml_outdoor", + "diml_outdoor_root": os.path.join(HOME_DIR, "shortcuts/datasets/diml_outdoor_test/"), + "eigen_crop": False, + "garg_crop": True, + "do_kb_crop": False, + "min_depth_eval": 2, + "max_depth_eval": 80, + "min_depth": 1e-3, + "max_depth": 80 + }, + "diode_indoor": { + "dataset": "diode_indoor", + "diode_indoor_root": os.path.join(HOME_DIR, "shortcuts/datasets/diode_indoor/"), + "eigen_crop": True, + "garg_crop": False, + "do_kb_crop": False, + "min_depth_eval": 1e-3, + "max_depth_eval": 10, + "min_depth": 1e-3, + "max_depth": 10 + }, + "diode_outdoor": { + "dataset": "diode_outdoor", + "diode_outdoor_root": os.path.join(HOME_DIR, "shortcuts/datasets/diode_outdoor/"), + "eigen_crop": False, + "garg_crop": True, + "do_kb_crop": False, + "min_depth_eval": 1e-3, + "max_depth_eval": 80, + "min_depth": 1e-3, + "max_depth": 80 + }, + "hypersim_test": { + "dataset": "hypersim_test", + "hypersim_test_root": os.path.join(HOME_DIR, "shortcuts/datasets/hypersim_test/"), + "eigen_crop": True, + "garg_crop": False, + "do_kb_crop": False, + "min_depth_eval": 1e-3, + "max_depth_eval": 80, + "min_depth": 1e-3, + "max_depth": 10 + }, + "vkitti": { + "dataset": "vkitti", + "vkitti_root": os.path.join(HOME_DIR, "shortcuts/datasets/vkitti_test/"), + "eigen_crop": False, + "garg_crop": True, + "do_kb_crop": True, + "min_depth_eval": 1e-3, + "max_depth_eval": 80, + "min_depth": 1e-3, + "max_depth": 80 + }, + "vkitti2": { + "dataset": "vkitti2", + "vkitti2_root": os.path.join(HOME_DIR, "shortcuts/datasets/vkitti2/"), + "eigen_crop": False, + "garg_crop": True, + "do_kb_crop": True, + "min_depth_eval": 1e-3, + "max_depth_eval": 80, + "min_depth": 1e-3, + "max_depth": 80, + }, + "ddad": { + "dataset": "ddad", + "ddad_root": os.path.join(HOME_DIR, "shortcuts/datasets/ddad/ddad_val/"), + "eigen_crop": False, + "garg_crop": True, + "do_kb_crop": True, + "min_depth_eval": 1e-3, + "max_depth_eval": 80, + "min_depth": 1e-3, + "max_depth": 80, + }, +} + +ALL_INDOOR = ["nyu", "ibims", "sunrgbd", "diode_indoor", "hypersim_test"] +ALL_OUTDOOR = ["kitti", "diml_outdoor", "diode_outdoor", "vkitti2", "ddad"] +ALL_EVAL_DATASETS = ALL_INDOOR + ALL_OUTDOOR + +COMMON_TRAINING_CONFIG = { + "dataset": "nyu", + "distributed": True, + "workers": 16, + "clip_grad": 0.1, + "use_shared_dict": False, + "shared_dict": None, + "use_amp": False, + + "aug": True, + "random_crop": False, + "random_translate": False, + "translate_prob": 0.2, + "max_translation": 100, + + "validate_every": 0.25, + "log_images_every": 0.1, + "prefetch": False, +} + + +def flatten(config, except_keys=('bin_conf')): + def recurse(inp): + if isinstance(inp, dict): + for key, value in inp.items(): + if key in except_keys: + yield (key, value) + if isinstance(value, dict): + yield from recurse(value) + else: + yield (key, value) + + return dict(list(recurse(config))) + + +def split_combined_args(kwargs): + """Splits the arguments that are combined with '__' into multiple arguments. + Combined arguments should have equal number of keys and values. + Keys are separated by '__' and Values are separated with ';'. + For example, '__n_bins__lr=256;0.001' + + Args: + kwargs (dict): key-value pairs of arguments where key-value is optionally combined according to the above format. + + Returns: + dict: Parsed dict with the combined arguments split into individual key-value pairs. + """ + new_kwargs = dict(kwargs) + for key, value in kwargs.items(): + if key.startswith("__"): + keys = key.split("__")[1:] + values = value.split(";") + assert len(keys) == len( + values), f"Combined arguments should have equal number of keys and values. Keys are separated by '__' and Values are separated with ';'. For example, '__n_bins__lr=256;0.001. Given (keys,values) is ({keys}, {values})" + for k, v in zip(keys, values): + new_kwargs[k] = v + return new_kwargs + + +def parse_list(config, key, dtype=int): + """Parse a list of values for the key if the value is a string. The values are separated by a comma. + Modifies the config in place. + """ + if key in config: + if isinstance(config[key], str): + config[key] = list(map(dtype, config[key].split(','))) + assert isinstance(config[key], list) and all([isinstance(e, dtype) for e in config[key]] + ), f"{key} should be a list of values dtype {dtype}. Given {config[key]} of type {type(config[key])} with values of type {[type(e) for e in config[key]]}." + + +def get_model_config(model_name, model_version=None): + """Find and parse the .json config file for the model. + + Args: + model_name (str): name of the model. The config file should be named config_{model_name}[_{model_version}].json under the models/{model_name} directory. + model_version (str, optional): Specific config version. If specified config_{model_name}_{model_version}.json is searched for and used. Otherwise config_{model_name}.json is used. Defaults to None. + + Returns: + easydict: the config dictionary for the model. + """ + config_fname = f"config_{model_name}_{model_version}.json" if model_version is not None else f"config_{model_name}.json" + config_file = os.path.join(ROOT, "models", model_name, config_fname) + if not os.path.exists(config_file): + return None + + with open(config_file, "r") as f: + config = edict(json.load(f)) + + # handle dictionary inheritance + # only training config is supported for inheritance + if "inherit" in config.train and config.train.inherit is not None: + inherit_config = get_model_config(config.train["inherit"]).train + for key, value in inherit_config.items(): + if key not in config.train: + config.train[key] = value + return edict(config) + + +def update_model_config(config, mode, model_name, model_version=None, strict=False): + model_config = get_model_config(model_name, model_version) + if model_config is not None: + config = {**config, ** + flatten({**model_config.model, **model_config[mode]})} + elif strict: + raise ValueError(f"Config file for model {model_name} not found.") + return config + + +def check_choices(name, value, choices): + # return # No checks in dev branch + if value not in choices: + raise ValueError(f"{name} {value} not in supported choices {choices}") + + +KEYS_TYPE_BOOL = ["use_amp", "distributed", "use_shared_dict", "same_lr", "aug", "three_phase", + "prefetch", "cycle_momentum"] # Casting is not necessary as their int casted values in config are 0 or 1 + + +def get_config(model_name, mode='train', dataset=None, **overwrite_kwargs): + """Main entry point to get the config for the model. + + Args: + model_name (str): name of the desired model. + mode (str, optional): "train" or "infer". Defaults to 'train'. + dataset (str, optional): If specified, the corresponding dataset configuration is loaded as well. Defaults to None. + + Keyword Args: key-value pairs of arguments to overwrite the default config. + + The order of precedence for overwriting the config is (Higher precedence first): + # 1. overwrite_kwargs + # 2. "config_version": Config file version if specified in overwrite_kwargs. The corresponding config loaded is config_{model_name}_{config_version}.json + # 3. "version_name": Default Model version specific config specified in overwrite_kwargs. The corresponding config loaded is config_{model_name}_{version_name}.json + # 4. common_config: Default config for all models specified in COMMON_CONFIG + + Returns: + easydict: The config dictionary for the model. + """ + + + check_choices("Model", model_name, ["zoedepth", "zoedepth_nk"]) + check_choices("Mode", mode, ["train", "infer", "eval"]) + if mode == "train": + check_choices("Dataset", dataset, ["nyu", "kitti", "mix", None]) + + config = flatten({**COMMON_CONFIG, **COMMON_TRAINING_CONFIG}) + config = update_model_config(config, mode, model_name) + + # update with model version specific config + version_name = overwrite_kwargs.get("version_name", config["version_name"]) + config = update_model_config(config, mode, model_name, version_name) + + # update with config version if specified + config_version = overwrite_kwargs.get("config_version", None) + if config_version is not None: + print("Overwriting config with config_version", config_version) + config = update_model_config(config, mode, model_name, config_version) + + # update with overwrite_kwargs + # Combined args are useful for hyperparameter search + overwrite_kwargs = split_combined_args(overwrite_kwargs) + config = {**config, **overwrite_kwargs} + + # Casting to bool # TODO: Not necessary. Remove and test + for key in KEYS_TYPE_BOOL: + if key in config: + config[key] = bool(config[key]) + + # Model specific post processing of config + parse_list(config, "n_attractors") + + # adjust n_bins for each bin configuration if bin_conf is given and n_bins is passed in overwrite_kwargs + if 'bin_conf' in config and 'n_bins' in overwrite_kwargs: + bin_conf = config['bin_conf'] # list of dicts + n_bins = overwrite_kwargs['n_bins'] + new_bin_conf = [] + for conf in bin_conf: + conf['n_bins'] = n_bins + new_bin_conf.append(conf) + config['bin_conf'] = new_bin_conf + + if mode == "train": + orig_dataset = dataset + if dataset == "mix": + dataset = 'nyu' # Use nyu as default for mix. Dataset config is changed accordingly while loading the dataloader + if dataset is not None: + config['project'] = f"MonoDepth3-{orig_dataset}" # Set project for wandb + + if dataset is not None: + config['dataset'] = dataset + config = {**DATASETS_CONFIG[dataset], **config} + + + config['model'] = model_name + typed_config = {k: infer_type(v) for k, v in config.items()} + # add hostname to config + config['hostname'] = platform.node() + return edict(typed_config) + + +def change_dataset(config, new_dataset): + config.update(DATASETS_CONFIG[new_dataset]) + return config diff --git a/dzoedepth/utils/easydict/__init__.py b/dzoedepth/utils/easydict/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0d0c83507325193c88566ae0c6aa1347dd41c411 --- /dev/null +++ b/dzoedepth/utils/easydict/__init__.py @@ -0,0 +1,158 @@ +""" +EasyDict +Copy/pasted from https://github.com/makinacorpus/easydict +Original author: Mathieu Leplatre +""" + +class EasyDict(dict): + """ + Get attributes + + >>> d = EasyDict({'foo':3}) + >>> d['foo'] + 3 + >>> d.foo + 3 + >>> d.bar + Traceback (most recent call last): + ... + AttributeError: 'EasyDict' object has no attribute 'bar' + + Works recursively + + >>> d = EasyDict({'foo':3, 'bar':{'x':1, 'y':2}}) + >>> isinstance(d.bar, dict) + True + >>> d.bar.x + 1 + + Bullet-proof + + >>> EasyDict({}) + {} + >>> EasyDict(d={}) + {} + >>> EasyDict(None) + {} + >>> d = {'a': 1} + >>> EasyDict(**d) + {'a': 1} + >>> EasyDict((('a', 1), ('b', 2))) + {'a': 1, 'b': 2} + + Set attributes + + >>> d = EasyDict() + >>> d.foo = 3 + >>> d.foo + 3 + >>> d.bar = {'prop': 'value'} + >>> d.bar.prop + 'value' + >>> d + {'foo': 3, 'bar': {'prop': 'value'}} + >>> d.bar.prop = 'newer' + >>> d.bar.prop + 'newer' + + + Values extraction + + >>> d = EasyDict({'foo':0, 'bar':[{'x':1, 'y':2}, {'x':3, 'y':4}]}) + >>> isinstance(d.bar, list) + True + >>> from operator import attrgetter + >>> list(map(attrgetter('x'), d.bar)) + [1, 3] + >>> list(map(attrgetter('y'), d.bar)) + [2, 4] + >>> d = EasyDict() + >>> list(d.keys()) + [] + >>> d = EasyDict(foo=3, bar=dict(x=1, y=2)) + >>> d.foo + 3 + >>> d.bar.x + 1 + + Still like a dict though + + >>> o = EasyDict({'clean':True}) + >>> list(o.items()) + [('clean', True)] + + And like a class + + >>> class Flower(EasyDict): + ... power = 1 + ... + >>> f = Flower() + >>> f.power + 1 + >>> f = Flower({'height': 12}) + >>> f.height + 12 + >>> f['power'] + 1 + >>> sorted(f.keys()) + ['height', 'power'] + + update and pop items + >>> d = EasyDict(a=1, b='2') + >>> e = EasyDict(c=3.0, a=9.0) + >>> d.update(e) + >>> d.c + 3.0 + >>> d['c'] + 3.0 + >>> d.get('c') + 3.0 + >>> d.update(a=4, b=4) + >>> d.b + 4 + >>> d.pop('a') + 4 + >>> d.a + Traceback (most recent call last): + ... + AttributeError: 'EasyDict' object has no attribute 'a' + """ + def __init__(self, d=None, **kwargs): + if d is None: + d = {} + else: + d = dict(d) + if kwargs: + d.update(**kwargs) + for k, v in d.items(): + setattr(self, k, v) + # Class attributes + for k in self.__class__.__dict__.keys(): + if not (k.startswith('__') and k.endswith('__')) and not k in ('update', 'pop'): + setattr(self, k, getattr(self, k)) + + def __setattr__(self, name, value): + if isinstance(value, (list, tuple)): + value = [self.__class__(x) + if isinstance(x, dict) else x for x in value] + elif isinstance(value, dict) and not isinstance(value, self.__class__): + value = self.__class__(value) + super(EasyDict, self).__setattr__(name, value) + super(EasyDict, self).__setitem__(name, value) + + __setitem__ = __setattr__ + + def update(self, e=None, **f): + d = e or dict() + d.update(f) + for k in d: + setattr(self, k, d[k]) + + def pop(self, k, d=None): + delattr(self, k) + return super(EasyDict, self).pop(k, d) + + +if __name__ == "__main__": + import doctest + doctest.testmod() \ No newline at end of file diff --git a/dzoedepth/utils/easydict/__pycache__/__init__.cpython-310.pyc b/dzoedepth/utils/easydict/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..21ef899f5fe510d27f7316130de376d7feb8e752 Binary files /dev/null and b/dzoedepth/utils/easydict/__pycache__/__init__.cpython-310.pyc differ diff --git a/dzoedepth/utils/easydict/__pycache__/__init__.cpython-311.pyc b/dzoedepth/utils/easydict/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b9317f72f5570c67eb63798e284930619b5ec824 Binary files /dev/null and b/dzoedepth/utils/easydict/__pycache__/__init__.cpython-311.pyc differ diff --git a/dzoedepth/utils/easydict/__pycache__/__init__.cpython-312.pyc b/dzoedepth/utils/easydict/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1fdffc05abb88c07251998fa621c12c79ccea541 Binary files /dev/null and b/dzoedepth/utils/easydict/__pycache__/__init__.cpython-312.pyc differ diff --git a/dzoedepth/utils/geometry.py b/dzoedepth/utils/geometry.py new file mode 100644 index 0000000000000000000000000000000000000000..5b405f2c90efb002c7382d159f35a8a58adacd24 --- /dev/null +++ b/dzoedepth/utils/geometry.py @@ -0,0 +1,98 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +import numpy as np + +def get_intrinsics(H,W): + """ + Intrinsics for a pinhole camera model. + Assume fov of 55 degrees and central principal point. + """ + f = 0.5 * W / np.tan(0.5 * 55 * np.pi / 180.0) + cx = 0.5 * W + cy = 0.5 * H + return np.array([[f, 0, cx], + [0, f, cy], + [0, 0, 1]]) + +def depth_to_points(depth, R=None, t=None): + + K = get_intrinsics(depth.shape[1], depth.shape[2]) + Kinv = np.linalg.inv(K) + if R is None: + R = np.eye(3) + if t is None: + t = np.zeros(3) + + # M converts from your coordinate to PyTorch3D's coordinate system + M = np.eye(3) + M[0, 0] = -1.0 + M[1, 1] = -1.0 + + height, width = depth.shape[1:3] + + x = np.arange(width) + y = np.arange(height) + coord = np.stack(np.meshgrid(x, y), -1) + coord = np.concatenate((coord, np.ones_like(coord)[:, :, [0]]), -1) # z=1 + coord = coord.astype(np.float32) + # coord = torch.as_tensor(coord, dtype=torch.float32, device=device) + coord = coord[None] # bs, h, w, 3 + + D = depth[:, :, :, None, None] + # print(D.shape, Kinv[None, None, None, ...].shape, coord[:, :, :, :, None].shape ) + pts3D_1 = D * Kinv[None, None, None, ...] @ coord[:, :, :, :, None] + # pts3D_1 live in your coordinate system. Convert them to Py3D's + pts3D_1 = M[None, None, None, ...] @ pts3D_1 + # from reference to targe tviewpoint + pts3D_2 = R[None, None, None, ...] @ pts3D_1 + t[None, None, None, :, None] + # pts3D_2 = pts3D_1 + # depth_2 = pts3D_2[:, :, :, 2, :] # b,1,h,w + return pts3D_2[:, :, :, :3, 0][0] + + +def create_triangles(h, w, mask=None): + """ + Reference: https://github.com/google-research/google-research/blob/e96197de06613f1b027d20328e06d69829fa5a89/infinite_nature/render_utils.py#L68 + Creates mesh triangle indices from a given pixel grid size. + This function is not and need not be differentiable as triangle indices are + fixed. + Args: + h: (int) denoting the height of the image. + w: (int) denoting the width of the image. + Returns: + triangles: 2D numpy array of indices (int) with shape (2(W-1)(H-1) x 3) + """ + x, y = np.meshgrid(range(w - 1), range(h - 1)) + tl = y * w + x + tr = y * w + x + 1 + bl = (y + 1) * w + x + br = (y + 1) * w + x + 1 + triangles = np.array([tl, bl, tr, br, tr, bl]) + triangles = np.transpose(triangles, (1, 2, 0)).reshape( + ((w - 1) * (h - 1) * 2, 3)) + if mask is not None: + mask = mask.reshape(-1) + triangles = triangles[mask[triangles].all(1)] + return triangles diff --git a/dzoedepth/utils/misc.py b/dzoedepth/utils/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..9fc1d469d1d7d831cd247f4c6634c957c2c9aba7 --- /dev/null +++ b/dzoedepth/utils/misc.py @@ -0,0 +1,368 @@ +# MIT License + +# Copyright (c) 2022 Intelligent Systems Lab Org + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File author: Shariq Farooq Bhat + +"""Miscellaneous utility functions.""" + +from scipy import ndimage + +import base64 +import math +import re +from io import BytesIO + +import matplotlib +import matplotlib.cm +import numpy as np +import requests +import torch +import torch.distributed as dist +import torch.nn +import torch.nn as nn +import torch.utils.data.distributed +from PIL import Image +from torchvision.transforms import ToTensor + + +class RunningAverage: + def __init__(self): + self.avg = 0 + self.count = 0 + + def append(self, value): + self.avg = (value + self.count * self.avg) / (self.count + 1) + self.count += 1 + + def get_value(self): + return self.avg + + +def denormalize(x): + """Reverses the imagenet normalization applied to the input. + + Args: + x (torch.Tensor - shape(N,3,H,W)): input tensor + + Returns: + torch.Tensor - shape(N,3,H,W): Denormalized input + """ + mean = torch.Tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(x.device) + std = torch.Tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(x.device) + return x * std + mean + + +class RunningAverageDict: + """A dictionary of running averages.""" + def __init__(self): + self._dict = None + + def update(self, new_dict): + if new_dict is None: + return + + if self._dict is None: + self._dict = dict() + for key, value in new_dict.items(): + self._dict[key] = RunningAverage() + + for key, value in new_dict.items(): + self._dict[key].append(value) + + def get_value(self): + if self._dict is None: + return None + return {key: value.get_value() for key, value in self._dict.items()} + + +def colorize(value, vmin=None, vmax=None, cmap='gray_r', invalid_val=-99, invalid_mask=None, background_color=(128, 128, 128, 255), gamma_corrected=False, value_transform=None): + """Converts a depth map to a color image. + + Args: + value (torch.Tensor, numpy.ndarry): Input depth map. Shape: (H, W) or (1, H, W) or (1, 1, H, W). All singular dimensions are squeezed + vmin (float, optional): vmin-valued entries are mapped to start color of cmap. If None, value.min() is used. Defaults to None. + vmax (float, optional): vmax-valued entries are mapped to end color of cmap. If None, value.max() is used. Defaults to None. + cmap (str, optional): matplotlib colormap to use. Defaults to 'magma_r'. + invalid_val (int, optional): Specifies value of invalid pixels that should be colored as 'background_color'. Defaults to -99. + invalid_mask (numpy.ndarray, optional): Boolean mask for invalid regions. Defaults to None. + background_color (tuple[int], optional): 4-tuple RGB color to give to invalid pixels. Defaults to (128, 128, 128, 255). + gamma_corrected (bool, optional): Apply gamma correction to colored image. Defaults to False. + value_transform (Callable, optional): Apply transform function to valid pixels before coloring. Defaults to None. + + Returns: + numpy.ndarray, dtype - uint8: Colored depth map. Shape: (H, W, 4) + """ + if isinstance(value, torch.Tensor): + value = value.detach().cpu().numpy() + + value = value.squeeze() + if invalid_mask is None: + invalid_mask = value == invalid_val + mask = np.logical_not(invalid_mask) + + # normalize + vmin = np.percentile(value[mask],2) if vmin is None else vmin + vmax = np.percentile(value[mask],85) if vmax is None else vmax + if vmin != vmax: + value = (value - vmin) / (vmax - vmin) # vmin..vmax + else: + # Avoid 0-division + value = value * 0. + + # squeeze last dim if it exists + # grey out the invalid values + + value[invalid_mask] = np.nan + cmapper = matplotlib.cm.get_cmap(cmap) + if value_transform: + value = value_transform(value) + # value = value / value.max() + value = cmapper(value, bytes=True) # (nxmx4) + + # img = value[:, :, :] + img = value[...] + img[invalid_mask] = background_color + + # return img.transpose((2, 0, 1)) + if gamma_corrected: + # gamma correction + img = img / 255 + img = np.power(img, 2.2) + img = img * 255 + img = img.astype(np.uint8) + return img + + +def count_parameters(model, include_all=False): + return sum(p.numel() for p in model.parameters() if p.requires_grad or include_all) + + +def compute_errors(gt, pred): + """Compute metrics for 'pred' compared to 'gt' + + Args: + gt (numpy.ndarray): Ground truth values + pred (numpy.ndarray): Predicted values + + gt.shape should be equal to pred.shape + + Returns: + dict: Dictionary containing the following metrics: + 'a1': Delta1 accuracy: Fraction of pixels that are within a scale factor of 1.25 + 'a2': Delta2 accuracy: Fraction of pixels that are within a scale factor of 1.25^2 + 'a3': Delta3 accuracy: Fraction of pixels that are within a scale factor of 1.25^3 + 'abs_rel': Absolute relative error + 'rmse': Root mean squared error + 'log_10': Absolute log10 error + 'sq_rel': Squared relative error + 'rmse_log': Root mean squared error on the log scale + 'silog': Scale invariant log error + """ + thresh = np.maximum((gt / pred), (pred / gt)) + a1 = (thresh < 1.25).mean() + a2 = (thresh < 1.25 ** 2).mean() + a3 = (thresh < 1.25 ** 3).mean() + + abs_rel = np.mean(np.abs(gt - pred) / gt) + sq_rel = np.mean(((gt - pred) ** 2) / gt) + + rmse = (gt - pred) ** 2 + rmse = np.sqrt(rmse.mean()) + + rmse_log = (np.log(gt) - np.log(pred)) ** 2 + rmse_log = np.sqrt(rmse_log.mean()) + + err = np.log(pred) - np.log(gt) + silog = np.sqrt(np.mean(err ** 2) - np.mean(err) ** 2) * 100 + + log_10 = (np.abs(np.log10(gt) - np.log10(pred))).mean() + return dict(a1=a1, a2=a2, a3=a3, abs_rel=abs_rel, rmse=rmse, log_10=log_10, rmse_log=rmse_log, + silog=silog, sq_rel=sq_rel) + + +def compute_metrics(gt, pred, interpolate=True, garg_crop=False, eigen_crop=True, dataset='nyu', min_depth_eval=0.1, max_depth_eval=10, **kwargs): + """Compute metrics of predicted depth maps. Applies cropping and masking as necessary or specified via arguments. Refer to compute_errors for more details on metrics. + """ + if 'config' in kwargs: + config = kwargs['config'] + garg_crop = config.garg_crop + eigen_crop = config.eigen_crop + min_depth_eval = config.min_depth_eval + max_depth_eval = config.max_depth_eval + + if gt.shape[-2:] != pred.shape[-2:] and interpolate: + pred = nn.functional.interpolate( + pred, gt.shape[-2:], mode='bilinear', align_corners=True) + + pred = pred.squeeze().cpu().numpy() + pred[pred < min_depth_eval] = min_depth_eval + pred[pred > max_depth_eval] = max_depth_eval + pred[np.isinf(pred)] = max_depth_eval + pred[np.isnan(pred)] = min_depth_eval + + gt_depth = gt.squeeze().cpu().numpy() + valid_mask = np.logical_and( + gt_depth > min_depth_eval, gt_depth < max_depth_eval) + + if garg_crop or eigen_crop: + gt_height, gt_width = gt_depth.shape + eval_mask = np.zeros(valid_mask.shape) + + if garg_crop: + eval_mask[int(0.40810811 * gt_height):int(0.99189189 * gt_height), + int(0.03594771 * gt_width):int(0.96405229 * gt_width)] = 1 + + elif eigen_crop: + # print("-"*10, " EIGEN CROP ", "-"*10) + if dataset == 'kitti': + eval_mask[int(0.3324324 * gt_height):int(0.91351351 * gt_height), + int(0.0359477 * gt_width):int(0.96405229 * gt_width)] = 1 + else: + # assert gt_depth.shape == (480, 640), "Error: Eigen crop is currently only valid for (480, 640) images" + eval_mask[45:471, 41:601] = 1 + else: + eval_mask = np.ones(valid_mask.shape) + valid_mask = np.logical_and(valid_mask, eval_mask) + return compute_errors(gt_depth[valid_mask], pred[valid_mask]) + + +#################################### Model uilts ################################################ + + +def parallelize(config, model, find_unused_parameters=True): + + if config.gpu is not None: + torch.cuda.set_device(config.gpu) + model = model.cuda(config.gpu) + + config.multigpu = False + if config.distributed: + # Use DDP + config.multigpu = True + config.rank = config.rank * config.ngpus_per_node + config.gpu + dist.init_process_group(backend=config.dist_backend, init_method=config.dist_url, + world_size=config.world_size, rank=config.rank) + config.batch_size = int(config.batch_size / config.ngpus_per_node) + # config.batch_size = 8 + config.workers = int( + (config.num_workers + config.ngpus_per_node - 1) / config.ngpus_per_node) + print("Device", config.gpu, "Rank", config.rank, "batch size", + config.batch_size, "Workers", config.workers) + torch.cuda.set_device(config.gpu) + model = nn.SyncBatchNorm.convert_sync_batchnorm(model) + model = model.cuda(config.gpu) + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[config.gpu], output_device=config.gpu, + find_unused_parameters=find_unused_parameters) + + elif config.gpu is None: + # Use DP + config.multigpu = True + model = model.cuda() + model = torch.nn.DataParallel(model) + + return model + + +################################################################################################# + + +##################################################################################################### + + +class colors: + '''Colors class: + Reset all colors with colors.reset + Two subclasses fg for foreground and bg for background. + Use as colors.subclass.colorname. + i.e. colors.fg.red or colors.bg.green + Also, the generic bold, disable, underline, reverse, strikethrough, + and invisible work with the main class + i.e. colors.bold + ''' + reset = '\033[0m' + bold = '\033[01m' + disable = '\033[02m' + underline = '\033[04m' + reverse = '\033[07m' + strikethrough = '\033[09m' + invisible = '\033[08m' + + class fg: + black = '\033[30m' + red = '\033[31m' + green = '\033[32m' + orange = '\033[33m' + blue = '\033[34m' + purple = '\033[35m' + cyan = '\033[36m' + lightgrey = '\033[37m' + darkgrey = '\033[90m' + lightred = '\033[91m' + lightgreen = '\033[92m' + yellow = '\033[93m' + lightblue = '\033[94m' + pink = '\033[95m' + lightcyan = '\033[96m' + + class bg: + black = '\033[40m' + red = '\033[41m' + green = '\033[42m' + orange = '\033[43m' + blue = '\033[44m' + purple = '\033[45m' + cyan = '\033[46m' + lightgrey = '\033[47m' + + +def printc(text, color): + print(f"{color}{text}{colors.reset}") + +############################################ + +def get_image_from_url(url): + response = requests.get(url) + img = Image.open(BytesIO(response.content)).convert("RGB") + return img + +def url_to_torch(url, size=(384, 384)): + img = get_image_from_url(url) + img = img.resize(size, Image.ANTIALIAS) + img = torch.from_numpy(np.asarray(img)).float() + img = img.permute(2, 0, 1) + img.div_(255) + return img + +def pil_to_batched_tensor(img): + return ToTensor()(img).unsqueeze(0) + +def save_raw_16bit(depth, fpath="raw.png"): + if isinstance(depth, torch.Tensor): + depth = depth.squeeze().cpu().numpy() + + assert isinstance(depth, np.ndarray), "Depth must be a torch tensor or numpy array" + assert depth.ndim == 2, "Depth must be 2D" + depth = depth * 256 # scale for 16-bit png + depth = depth.astype(np.uint16) + depth = Image.fromarray(depth) + depth.save(fpath) + print("Saved raw depth to", fpath) \ No newline at end of file diff --git a/examples.png b/examples.png new file mode 100644 index 0000000000000000000000000000000000000000..a34913d49cb3bf468b92b37c731b0111992968de --- /dev/null +++ b/examples.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9f8ec92099923ddcbf01000221a53202f4f0c97f036c301cd2d4af4990d08f3 +size 2571871 diff --git a/inpaint/DOCUMENTATION.md b/inpaint/DOCUMENTATION.md new file mode 100644 index 0000000000000000000000000000000000000000..a0e7ce0e00f95fdd41f83f3163b6ac28b15cc9df --- /dev/null +++ b/inpaint/DOCUMENTATION.md @@ -0,0 +1,146 @@ +# Documentation + +## Python scripts + +These files are for our monocular 3D Tracking pipeline: + +`main.py` Execute 3D photo inpainting + +`mesh.py` Functions about context-aware depth inpainting + +`mesh_tools.py` Some common functions used in `mesh.py` + +`utils.py` Some common functions used in image preprocessing, data loading + +`networks.py` Network architectures of inpainting model + + +MiDaS/ + +`run.py` Execute depth estimation + +`monodepth_net.py` Network architecture of depth estimation model + +`MiDaS_utils.py` Some common functions in depth estimation + + +## Configuration + +```bash +argument.yml +``` + +- `depth_edge_model_ckpt: checkpoints/EdgeModel.pth` + - Pretrained model of depth-edge inpainting +- `depth_feat_model_ckpt: checkpoints/DepthModel.pth` + - Pretrained model of depth inpainting +- `rgb_feat_model_ckpt: checkpoints/ColorModel.pth` + - Pretrained model of color inpainting +- `MiDaS_model_ckpt: MiDaS/model.pt` + - Pretrained model of depth estimation +- `use_boostmonodepth: True` + - Use [BoostMonocularDepth](https://github.com/compphoto/BoostingMonocularDepth) to get sharper monocular depth estimation +- `fps: 40` + - Frame per second of output rendered video +- `num_frames: 240` + - Total number of frames in output rendered video +- `x_shift_range: [-0.03, -0.03, -0.03]` + - The translations on x-axis of output rendered videos. + - This parameter is a list. Each element corresponds to a specific camera motion. +- `y_shift_range: [-0.00, -0.00, -0.03]` + - The translations on y-axis of output rendered videos. + - This parameter is a list. Each element corresponds to a specific camera motion. +- `z_shift_range: [-0.07, -0.07, -0.07]` + - The translations on z-axis of output rendered videos. + - This parameter is a list. Each element corresponds to a specific camera motion. +- `traj_types: ['straight-line', 'circle', 'circle']` + - The type of camera trajectory. + - This parameter is a list. + - Currently, we only privode `straight-line` and `circle`. +- `video_postfix: ['zoom-in', 'swing', 'circle']` + - The postfix of video. + - This parameter is a list. +- Note that the number of elements in `x_shift_range`, `y_shift_range`, `z_shift_range`, `traj_types` and `video_postfix` should be equal. +- `specific: '' ` + - The specific image name, use this to specify the image to be executed. By default, all the image in the folder will be executed. +- `longer_side_len: 960` + - The length of larger dimension in output resolution. +- `src_folder: image` + - Input image directory. +- `depth_folder: depth` + - Estimated depth directory. +- `mesh_folder: mesh` + - Output 3-D mesh directory. +- `video_folder: video` + - Output rendered video directory +- `load_ply: False` + - Action to load existed mesh (.ply) file +- `save_ply: True` + - Action to store the output mesh (.ply) file + - Disable this option `save_ply: False` to reduce the computational time. +- `inference_video: True` + - Action to rendered the output video +- `gpu_ids: 0` + - The ID of working GPU. Leave it blank or negative to use CPU. +- `offscreen_rendering: True` + - If you're executing the process in a remote server (via ssh), please switch on this flag. + - Sometimes, using off-screen rendering result in longer execution time. +- `img_format: '.jpg'` + - Input image format. +- `depth_format: '.npy'` + - Input depth (disparity) format. Use NumPy array file as default. + - If the user wants to edit the depth (disparity) map manually, we provide `.png` format depth (disparity) map. + - Remember to switch this parameter from `.npy` to `.png` when using depth (disparity) map with `.png` format. +- `require_midas: True` + - Set it to `True` if the user wants to use depth map estimated by `MiDaS`. + - Set it to `False` if the user wants to use manually edited depth map. + - If the user wants to edit the depth (disparity) map manually, we provide `.png` format depth (disparity) map. + - Remember to switch this parameter from `True` to `False` when using manually edited depth map. +- `depth_threshold: 0.04` + - A threshold in disparity, adjacent two pixels are discontinuity pixels + if the difference between them excceed this number. +- `ext_edge_threshold: 0.002` + - The threshold to define inpainted depth edge. A pixel in inpainted edge + map belongs to extended depth edge if the value of that pixel exceeds this number, +- `sparse_iter: 5` + - Total iteration numbers of bilateral median filter +- `filter_size: [7, 7, 5, 5, 5]` + - Window size of bilateral median filter in each iteration. +- `sigma_s: 4.0` + - Intensity term of bilateral median filter +- `sigma_r: 0.5` + - Spatial term of bilateral median filter +- `redundant_number: 12` + - The number defines short segments. If a depth edge is shorter than this number, + it is a short segment and removed. +- `background_thickness: 70` + - The thickness of synthesis area. +- `context_thickness: 140` + - The thickness of context area. +- `background_thickness_2: 70` + - The thickness of synthesis area when inpaint second time. +- `context_thickness_2: 70` + - The thickness of context area when inpaint second time. +- `discount_factor: 1.00` +- `log_depth: True` + - The scale of depth inpainting. If true, performing inpainting in log scale. + Otherwise, performing in linear scale. +- `largest_size: 512` + - The largest size of inpainted image patch. +- `depth_edge_dilate: 10` + - The thickness of dilated synthesis area. +- `depth_edge_dilate_2: 5` + - The thickness of dilated synthesis area when inpaint second time. +- `extrapolate_border: True` + - Action to extrapolate out-side the border. +- `extrapolation_thickness: 60` + - The thickness of extrapolated area. +- `repeat_inpaint_edge: True` + - Action to apply depth edge inpainting model repeatedly. Sometimes inpainting depth + edge once results in short inpinated edge, apply depth edge inpainting repeatedly + could help you prolong the inpainted depth edge. +- `crop_border: [0.03, 0.03, 0.05, 0.03]` + - The fraction of pixels to crop out around the borders `[top, left, bottom, right]`. +- `anti_flickering: True` + - Action to avoid flickering effect in the output video. + - This may result in longer computational time in rendering phase. diff --git a/inpaint/LICENSE b/inpaint/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..f32be3769fea45122a5756603369867059cd4dc0 --- /dev/null +++ b/inpaint/LICENSE @@ -0,0 +1,50 @@ + +MIT License + +Copyright (c) 2020 Virginia Tech Vision and Learning Lab + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +------------------ LICENSE FOR MiDaS -------------------- + +MIT License + +Copyright (c) 2019 Intel ISL (Intel Intelligent Systems Lab) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +--------------------------- LICENSE FOR EdgeConnect -------------------------------- + +Attribution-NonCommercial 4.0 International \ No newline at end of file diff --git a/inpaint/README.md b/inpaint/README.md new file mode 100644 index 0000000000000000000000000000000000000000..329430d36c44789f3bbd6a86f5febb487846329a --- /dev/null +++ b/inpaint/README.md @@ -0,0 +1,95 @@ +# [CVPR 2020] 3D Photography using Context-aware Layered Depth Inpainting + +[![Open 3DPhotoInpainting in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1706ToQrkIZshRSJSHvZ1RuCiM__YX3Bz) + +### [[Paper](https://arxiv.org/abs/2004.04727)] [[Project Website](https://shihmengli.github.io/3D-Photo-Inpainting/)] [[Google Colab](https://colab.research.google.com/drive/1706ToQrkIZshRSJSHvZ1RuCiM__YX3Bz)] + +

+ +

+ +We propose a method for converting a single RGB-D input image into a 3D photo, i.e., a multi-layer representation for novel view synthesis that contains hallucinated color and depth structures in regions occluded in the original view. We use a Layered Depth Image with explicit pixel connectivity as underlying representation, and present a learning-based inpainting model that iteratively synthesizes new local color-and-depth content into the occluded region in a spatial context-aware manner. The resulting 3D photos can be efficiently rendered with motion parallax using standard graphics engines. We validate the effectiveness of our method on a wide range of challenging everyday scenes and show fewer artifacts when compared with the state-of-the-arts. +
+ +**3D Photography using Context-aware Layered Depth Inpainting** +
+[Meng-Li Shih](https://shihmengli.github.io/), +[Shih-Yang Su](https://lemonatsu.github.io/), +[Johannes Kopf](https://johanneskopf.de/), and +[Jia-Bin Huang](https://filebox.ece.vt.edu/~jbhuang/) +
+In IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2020. + + +## Prerequisites + +- Linux (tested on Ubuntu 18.04.4 LTS) +- Anaconda +- Python 3.7 (tested on 3.7.4) +- PyTorch 1.4.0 (tested on 1.4.0 for execution) + +and the Python dependencies listed in [requirements.txt](requirements.txt) +- To get started, please run the following commands: + ```bash + conda create -n 3DP python=3.7 anaconda + conda activate 3DP + pip install -r requirements.txt + conda install pytorch==1.4.0 torchvision==0.5.0 cudatoolkit==10.1.243 -c pytorch + ``` +- Next, please download the model weight using the following command: + ```bash + chmod +x download.sh + ./download.sh + ``` + +## Quick start +Please follow the instructions in this section. +This should allow to execute our results. +For more detailed instructions, please refer to [`DOCUMENTATION.md`](DOCUMENTATION.md). + +## Execute +1. Put ```.jpg``` files (e.g., test.jpg) into the ```image``` folder. + - E.g., `image/moon.jpg` +2. Run the following command + ```bash + python main.py --config argument.yml + ``` + - Note: The 3D photo generation process usually takes about 2-3 minutes depending on the available computing resources. +3. The results are stored in the following directories: + - Corresponding depth map estimated by [MiDaS](https://github.com/intel-isl/MiDaS.git) + - E.g. ```depth/moon.npy```, ```depth/moon.png``` + - User could edit ```depth/moon.png``` manually. + - Remember to set the following two flags as listed below if user wants to use manually edited ```depth/moon.png``` as input for 3D Photo. + - `depth_format: '.png'` + - `require_midas: False` + - Inpainted 3D mesh (Optional: User need to switch on the flag `save_ply`) + - E.g. ```mesh/moon.ply``` + - Rendered videos with zoom-in motion + - E.g. ```video/moon_zoom-in.mp4``` + - Rendered videos with swing motion + - E.g. ```video/moon_swing.mp4``` + - Rendered videos with circle motion + - E.g. ```video/moon_circle.mp4``` + - Rendered videos with dolly zoom-in effect + - E.g. ```video/moon_dolly-zoom-in.mp4``` + - Note: We assume that the object of focus is located at the center of the image. +4. (Optional) If you want to change the default configuration. Please read [`DOCUMENTATION.md`](DOCUMENTATION.md) and modified ```argument.yml```. + + +## License +This work is licensed under MIT License. See [LICENSE](LICENSE) for details. + +If you find our code/models useful, please consider citing our paper: +``` +@inproceedings{Shih3DP20, + author = {Shih, Meng-Li and Su, Shih-Yang and Kopf, Johannes and Huang, Jia-Bin}, + title = {3D Photography using Context-aware Layered Depth Inpainting}, + booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year = {2020} +} +``` + +## Acknowledgments +- We thank Pratul Srinivasan for providing clarification of the method [Srinivasan et al. CVPR 2019](https://people.eecs.berkeley.edu/~pratul/publication/mpi_extrapolation/). +- We thank the author of [Zhou et al. 2018](https://people.eecs.berkeley.edu/~tinghuiz/projects/mpi/), [Choi et al. 2019](https://github.com/NVlabs/extreme-view-synth/), [Mildenhall et al. 2019](https://github.com/Fyusion/LLFF), [Srinivasan et al. 2019](https://github.com/google-research/google-research/tree/ac9b04e1dbdac468fda53e798a326fe9124e49fe/mpi_extrapolation), [Wiles et al. 2020](http://www.robots.ox.ac.uk/~ow/synsin.html), [Niklaus et al. 2019](https://github.com/sniklaus/3d-ken-burns) for providing their implementations online. +- Our code builds upon [EdgeConnect](https://github.com/knazeri/edge-connect), [MiDaS](https://github.com/intel-isl/MiDaS.git) and [pytorch-inpainting-with-partial-conv](https://github.com/naoto0804/pytorch-inpainting-with-partial-conv) diff --git a/inpaint/__init__.py b/inpaint/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/inpaint/__pycache__/__init__.cpython-310.pyc b/inpaint/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..024fedd6b201dc9e4da10cebc4d0a2218f69d845 Binary files /dev/null and b/inpaint/__pycache__/__init__.cpython-310.pyc differ diff --git a/inpaint/__pycache__/__init__.cpython-311.pyc b/inpaint/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..25056b6c37bec21111fb18f014ad7298f10d59b8 Binary files /dev/null and b/inpaint/__pycache__/__init__.cpython-311.pyc differ diff --git a/inpaint/__pycache__/__init__.cpython-312.pyc b/inpaint/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..46ed86edc3f29fc13467cbc9e7015c243d30f45b Binary files /dev/null and b/inpaint/__pycache__/__init__.cpython-312.pyc differ diff --git a/inpaint/__pycache__/bilateral_filtering.cpython-310.pyc b/inpaint/__pycache__/bilateral_filtering.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7100c3738fce8a1524f019d0b6ebfff41c758a5a Binary files /dev/null and b/inpaint/__pycache__/bilateral_filtering.cpython-310.pyc differ diff --git a/inpaint/__pycache__/bilateral_filtering.cpython-312.pyc b/inpaint/__pycache__/bilateral_filtering.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..017ee830064124136037d4e95aeccc81bb490935 Binary files /dev/null and b/inpaint/__pycache__/bilateral_filtering.cpython-312.pyc differ diff --git a/inpaint/__pycache__/mesh.cpython-310.pyc b/inpaint/__pycache__/mesh.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ab0e1ffc35fedbe6536f3bc14f86b615c4ff7ae0 Binary files /dev/null and b/inpaint/__pycache__/mesh.cpython-310.pyc differ diff --git a/inpaint/__pycache__/mesh.cpython-311.pyc b/inpaint/__pycache__/mesh.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..348a1af3574c31704ab7822dc75d0340cc13455c Binary files /dev/null and b/inpaint/__pycache__/mesh.cpython-311.pyc differ diff --git a/inpaint/__pycache__/mesh.cpython-312.pyc b/inpaint/__pycache__/mesh.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d00be00633fad9bc416cc86bab7a854d6deaf300 Binary files /dev/null and b/inpaint/__pycache__/mesh.cpython-312.pyc differ diff --git a/inpaint/__pycache__/mesh_tools.cpython-310.pyc b/inpaint/__pycache__/mesh_tools.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3763760360375ca073fa8d17d5f30d5b5261453a Binary files /dev/null and b/inpaint/__pycache__/mesh_tools.cpython-310.pyc differ diff --git a/inpaint/__pycache__/mesh_tools.cpython-311.pyc b/inpaint/__pycache__/mesh_tools.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a55a633f459d327c99e1b8faf32ed6c147353a93 Binary files /dev/null and b/inpaint/__pycache__/mesh_tools.cpython-311.pyc differ diff --git a/inpaint/__pycache__/mesh_tools.cpython-312.pyc b/inpaint/__pycache__/mesh_tools.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..af84a1272428cf06ff32bedac8555cd2c3764aa7 Binary files /dev/null and b/inpaint/__pycache__/mesh_tools.cpython-312.pyc differ diff --git a/inpaint/__pycache__/networks.cpython-310.pyc b/inpaint/__pycache__/networks.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9fe089e4d528f8eecaf5351dd2a127566ef74093 Binary files /dev/null and b/inpaint/__pycache__/networks.cpython-310.pyc differ diff --git a/inpaint/__pycache__/networks.cpython-312.pyc b/inpaint/__pycache__/networks.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..14ac7be8f1a74bc1cf5b54189d71ed21371a4776 Binary files /dev/null and b/inpaint/__pycache__/networks.cpython-312.pyc differ diff --git a/inpaint/__pycache__/utils.cpython-310.pyc b/inpaint/__pycache__/utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f017135d1440024af8da655d8b2d9825bf1ae5e2 Binary files /dev/null and b/inpaint/__pycache__/utils.cpython-310.pyc differ diff --git a/inpaint/__pycache__/utils.cpython-311.pyc b/inpaint/__pycache__/utils.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..340d1469f16186a3cc60b58197c2356da8a31161 Binary files /dev/null and b/inpaint/__pycache__/utils.cpython-311.pyc differ diff --git a/inpaint/__pycache__/utils.cpython-312.pyc b/inpaint/__pycache__/utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7d4942f336cb6ff4269f5201ed3085f778b0a4a8 Binary files /dev/null and b/inpaint/__pycache__/utils.cpython-312.pyc differ diff --git a/inpaint/argument.yml b/inpaint/argument.yml new file mode 100644 index 0000000000000000000000000000000000000000..673dbbf6202a91f4e65d3539777b7c31b5af9cfb --- /dev/null +++ b/inpaint/argument.yml @@ -0,0 +1,47 @@ +depth_edge_model_ckpt: checkpoints/edge-model.pth +depth_feat_model_ckpt: checkpoints/depth-model.pth +rgb_feat_model_ckpt: checkpoints/color-model.pth +MiDaS_model_ckpt: MiDaS/model.pt +use_boostmonodepth: True +fps: 40 +num_frames: 240 +x_shift_range: [0.00, 0.00, -0.015, -0.015] +y_shift_range: [0.00, 0.00, -0.015, -0.00] +z_shift_range: [-0.05, -0.05, -0.05, -0.05] +traj_types: ['double-straight-line', 'double-straight-line', 'circle', 'circle'] +video_postfix: ['dolly-zoom-in', 'zoom-in', 'circle', 'swing'] +specific: '' +longer_side_len: 960 +src_folder: image +depth_folder: depth +mesh_folder: mesh +video_folder: video +load_ply: False +save_ply: True +inference_video: True +gpu_ids: 0 +offscreen_rendering: False +img_format: '.jpg' +depth_format: '.npy' +require_midas: True +depth_threshold: 0.04 +ext_edge_threshold: 0.002 +sparse_iter: 5 +filter_size: [7, 7, 5, 5, 5] +sigma_s: 4.0 +sigma_r: 0.5 +redundant_number: 12 +background_thickness: 70 +context_thickness: 140 +background_thickness_2: 70 +context_thickness_2: 70 +discount_factor: 1.00 +log_depth: True +largest_size: 512 +depth_edge_dilate: 10 +depth_edge_dilate_2: 5 +extrapolate_border: True +extrapolation_thickness: 60 +repeat_inpaint_edge: True +crop_border: [0.03, 0.03, 0.05, 0.03] +anti_flickering: True diff --git a/inpaint/bilateral_filtering.py b/inpaint/bilateral_filtering.py new file mode 100644 index 0000000000000000000000000000000000000000..fe9420c819bca7e0d4c700f6c0b0baa6f6332efd --- /dev/null +++ b/inpaint/bilateral_filtering.py @@ -0,0 +1,215 @@ +import numpy as np +from functools import reduce + +def sparse_bilateral_filtering( + depth, image, config, HR=False, mask=None, gsHR=True, edge_id=None, num_iter=None, num_gs_iter=None, spdb=False +): + """ + config: + - filter_size + """ + import time + + save_images = [] + save_depths = [] + save_discontinuities = [] + vis_depth = depth.copy() + backup_vis_depth = vis_depth.copy() + + depth_max = vis_depth.max() + depth_min = vis_depth.min() + vis_image = image.copy() + for i in range(num_iter): + if isinstance(config["filter_size"], list): + window_size = config["filter_size"][i] + else: + window_size = config["filter_size"] + vis_image = image.copy() + save_images.append(vis_image) + save_depths.append(vis_depth) + u_over, b_over, l_over, r_over = vis_depth_discontinuity(vis_depth, config, mask=mask) + vis_image[u_over > 0] = np.array([0, 0, 0]) + vis_image[b_over > 0] = np.array([0, 0, 0]) + vis_image[l_over > 0] = np.array([0, 0, 0]) + vis_image[r_over > 0] = np.array([0, 0, 0]) + + discontinuity_map = (u_over + b_over + l_over + r_over).clip(0.0, 1.0) + discontinuity_map[depth == 0] = 1 + save_discontinuities.append(discontinuity_map) + if mask is not None: + discontinuity_map[mask == 0] = 0 + vis_depth = bilateral_filter( + vis_depth, config, discontinuity_map=discontinuity_map, HR=HR, mask=mask, window_size=window_size + ) + + return save_images, save_depths + + +def vis_depth_discontinuity(depth, config, vis_diff=False, label=False, mask=None): + """ + config: + - + """ + if label == False: + disp = 1./depth + u_diff = (disp[1:, :] - disp[:-1, :])[:-1, 1:-1] + b_diff = (disp[:-1, :] - disp[1:, :])[1:, 1:-1] + l_diff = (disp[:, 1:] - disp[:, :-1])[1:-1, :-1] + r_diff = (disp[:, :-1] - disp[:, 1:])[1:-1, 1:] + if mask is not None: + u_mask = (mask[1:, :] * mask[:-1, :])[:-1, 1:-1] + b_mask = (mask[:-1, :] * mask[1:, :])[1:, 1:-1] + l_mask = (mask[:, 1:] * mask[:, :-1])[1:-1, :-1] + r_mask = (mask[:, :-1] * mask[:, 1:])[1:-1, 1:] + u_diff = u_diff * u_mask + b_diff = b_diff * b_mask + l_diff = l_diff * l_mask + r_diff = r_diff * r_mask + u_over = (np.abs(u_diff) > config['depth_threshold']).astype(np.float32) + b_over = (np.abs(b_diff) > config['depth_threshold']).astype(np.float32) + l_over = (np.abs(l_diff) > config['depth_threshold']).astype(np.float32) + r_over = (np.abs(r_diff) > config['depth_threshold']).astype(np.float32) + else: + disp = depth + u_diff = (disp[1:, :] * disp[:-1, :])[:-1, 1:-1] + b_diff = (disp[:-1, :] * disp[1:, :])[1:, 1:-1] + l_diff = (disp[:, 1:] * disp[:, :-1])[1:-1, :-1] + r_diff = (disp[:, :-1] * disp[:, 1:])[1:-1, 1:] + if mask is not None: + u_mask = (mask[1:, :] * mask[:-1, :])[:-1, 1:-1] + b_mask = (mask[:-1, :] * mask[1:, :])[1:, 1:-1] + l_mask = (mask[:, 1:] * mask[:, :-1])[1:-1, :-1] + r_mask = (mask[:, :-1] * mask[:, 1:])[1:-1, 1:] + u_diff = u_diff * u_mask + b_diff = b_diff * b_mask + l_diff = l_diff * l_mask + r_diff = r_diff * r_mask + u_over = (np.abs(u_diff) > 0).astype(np.float32) + b_over = (np.abs(b_diff) > 0).astype(np.float32) + l_over = (np.abs(l_diff) > 0).astype(np.float32) + r_over = (np.abs(r_diff) > 0).astype(np.float32) + u_over = np.pad(u_over, 1, mode='constant') + b_over = np.pad(b_over, 1, mode='constant') + l_over = np.pad(l_over, 1, mode='constant') + r_over = np.pad(r_over, 1, mode='constant') + u_diff = np.pad(u_diff, 1, mode='constant') + b_diff = np.pad(b_diff, 1, mode='constant') + l_diff = np.pad(l_diff, 1, mode='constant') + r_diff = np.pad(r_diff, 1, mode='constant') + + if vis_diff: + return [u_over, b_over, l_over, r_over], [u_diff, b_diff, l_diff, r_diff] + else: + return [u_over, b_over, l_over, r_over] + +def bilateral_filter(depth, config, discontinuity_map=None, HR=False, mask=None, window_size=False): + sort_time = 0 + replace_time = 0 + filter_time = 0 + init_time = 0 + filtering_time = 0 + sigma_s = config['sigma_s'] + sigma_r = config['sigma_r'] + if window_size == False: + window_size = config['filter_size'] + midpt = window_size//2 + ax = np.arange(-midpt, midpt+1.) + xx, yy = np.meshgrid(ax, ax) + if discontinuity_map is not None: + spatial_term = np.exp(-(xx**2 + yy**2) / (2. * sigma_s**2)) + + # padding + depth = depth[1:-1, 1:-1] + depth = np.pad(depth, ((1,1), (1,1)), 'edge') + pad_depth = np.pad(depth, (midpt,midpt), 'edge') + if discontinuity_map is not None: + discontinuity_map = discontinuity_map[1:-1, 1:-1] + discontinuity_map = np.pad(discontinuity_map, ((1,1), (1,1)), 'edge') + pad_discontinuity_map = np.pad(discontinuity_map, (midpt,midpt), 'edge') + pad_discontinuity_hole = 1 - pad_discontinuity_map + # filtering + output = depth.copy() + pad_depth_patches = rolling_window(pad_depth, [window_size, window_size], [1,1]) + if discontinuity_map is not None: + pad_discontinuity_patches = rolling_window(pad_discontinuity_map, [window_size, window_size], [1,1]) + pad_discontinuity_hole_patches = rolling_window(pad_discontinuity_hole, [window_size, window_size], [1,1]) + + if mask is not None: + pad_mask = np.pad(mask, (midpt,midpt), 'constant') + pad_mask_patches = rolling_window(pad_mask, [window_size, window_size], [1,1]) + from itertools import product + if discontinuity_map is not None: + pH, pW = pad_depth_patches.shape[:2] + for pi in range(pH): + for pj in range(pW): + if mask is not None and mask[pi, pj] == 0: + continue + if discontinuity_map is not None: + if bool(pad_discontinuity_patches[pi, pj].any()) is False: + continue + discontinuity_patch = pad_discontinuity_patches[pi, pj] + discontinuity_holes = pad_discontinuity_hole_patches[pi, pj] + depth_patch = pad_depth_patches[pi, pj] + depth_order = depth_patch.ravel().argsort() + patch_midpt = depth_patch[window_size//2, window_size//2] + if discontinuity_map is not None: + coef = discontinuity_holes.astype(np.float32) + if mask is not None: + coef = coef * pad_mask_patches[pi, pj] + else: + range_term = np.exp(-(depth_patch-patch_midpt)**2 / (2. * sigma_r**2)) + coef = spatial_term * range_term + if coef.max() == 0: + output[pi, pj] = patch_midpt + continue + if discontinuity_map is not None and (coef.max() == 0): + output[pi, pj] = patch_midpt + else: + coef = coef/(coef.sum()) + coef_order = coef.ravel()[depth_order] + cum_coef = np.cumsum(coef_order) + ind = np.digitize(0.5, cum_coef) + output[pi, pj] = depth_patch.ravel()[depth_order][ind] + else: + pH, pW = pad_depth_patches.shape[:2] + for pi in range(pH): + for pj in range(pW): + if discontinuity_map is not None: + if pad_discontinuity_patches[pi, pj][window_size//2, window_size//2] == 1: + continue + discontinuity_patch = pad_discontinuity_patches[pi, pj] + discontinuity_holes = (1. - discontinuity_patch) + depth_patch = pad_depth_patches[pi, pj] + depth_order = depth_patch.ravel().argsort() + patch_midpt = depth_patch[window_size//2, window_size//2] + range_term = np.exp(-(depth_patch-patch_midpt)**2 / (2. * sigma_r**2)) + if discontinuity_map is not None: + coef = spatial_term * range_term * discontinuity_holes + else: + coef = spatial_term * range_term + if coef.sum() == 0: + output[pi, pj] = patch_midpt + continue + if discontinuity_map is not None and (coef.sum() == 0): + output[pi, pj] = patch_midpt + else: + coef = coef/(coef.sum()) + coef_order = coef.ravel()[depth_order] + cum_coef = np.cumsum(coef_order) + ind = np.digitize(0.5, cum_coef) + output[pi, pj] = depth_patch.ravel()[depth_order][ind] + + return output + +def rolling_window(a, window, strides): + assert len(a.shape)==len(window)==len(strides), "\'a\', \'window\', \'strides\' dimension mismatch" + shape_fn = lambda i,w,s: (a.shape[i]-w)//s + 1 + shape = [shape_fn(i,w,s) for i,(w,s) in enumerate(zip(window, strides))] + list(window) + def acc_shape(i): + if i+1>=len(a.shape): + return 1 + else: + return reduce(lambda x,y:x*y, a.shape[i+1:]) + _strides = [acc_shape(i)*s*a.itemsize for i,s in enumerate(strides)] + list(a.strides) + + return np.lib.stride_tricks.as_strided(a, shape=shape, strides=_strides) diff --git a/inpaint/boostmonodepth_utils.py b/inpaint/boostmonodepth_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..52b06a1785345e6637a33d7f2f19f322481a14e4 --- /dev/null +++ b/inpaint/boostmonodepth_utils.py @@ -0,0 +1,68 @@ +import os +import cv2 +import glob +import numpy as np +import imageio +from MiDaS.MiDaS_utils import write_depth + +BOOST_BASE = 'BoostingMonocularDepth' + +BOOST_INPUTS = 'inputs' +BOOST_OUTPUTS = 'outputs' + +def run_boostmonodepth(img_names, src_folder, depth_folder): + + if not isinstance(img_names, list): + img_names = [img_names] + + # remove irrelevant files first + clean_folder(os.path.join(BOOST_BASE, BOOST_INPUTS)) + clean_folder(os.path.join(BOOST_BASE, BOOST_OUTPUTS)) + + tgt_names = [] + for img_name in img_names: + base_name = os.path.basename(img_name) + tgt_name = os.path.join(BOOST_BASE, BOOST_INPUTS, base_name) + os.system(f'cp {img_name} {tgt_name}') + + # keep only the file name here. + # they save all depth as .png file + tgt_names.append(os.path.basename(tgt_name).replace('.jpg', '.png')) + + os.system(f'cd {BOOST_BASE} && python run.py --Final --data_dir {BOOST_INPUTS}/ --output_dir {BOOST_OUTPUTS} --depthNet 0') + + for i, (img_name, tgt_name) in enumerate(zip(img_names, tgt_names)): + img = imageio.imread(img_name) + H, W = img.shape[:2] + scale = 640. / max(H, W) + + # resize and save depth + target_height, target_width = int(round(H * scale)), int(round(W * scale)) + depth = imageio.imread(os.path.join(BOOST_BASE, BOOST_OUTPUTS, tgt_name)) + depth = np.array(depth).astype(np.float32) + depth = resize_depth(depth, target_width, target_height) + np.save(os.path.join(depth_folder, tgt_name.replace('.png', '.npy')), depth / 32768. - 1.) + write_depth(os.path.join(depth_folder, tgt_name.replace('.png', '')), depth) + +def clean_folder(folder, img_exts=['.png', '.jpg', '.npy']): + + for img_ext in img_exts: + paths_to_check = os.path.join(folder, f'*{img_ext}') + if len(glob.glob(paths_to_check)) == 0: + continue + print(paths_to_check) + os.system(f'rm {paths_to_check}') + +def resize_depth(depth, width, height): + """Resize numpy (or image read by imageio) depth map + + Args: + depth (numpy): depth + width (int): image width + height (int): image height + + Returns: + array: processed depth + """ + depth = cv2.blur(depth, (3, 3)) + return cv2.resize(depth, (width, height), interpolation=cv2.INTER_AREA) diff --git a/inpaint/download.sh b/inpaint/download.sh new file mode 100644 index 0000000000000000000000000000000000000000..66031e41ea144ee39e291036e405914a89f9c6de --- /dev/null +++ b/inpaint/download.sh @@ -0,0 +1,25 @@ +#!/bin/sh +fb_status=$(wget --spider -S https://filebox.ece.vt.edu/ 2>&1 | grep "HTTP/1.1 200 OK") + +mkdir checkpoints + +echo "downloading from filebox ..." +wget https://filebox.ece.vt.edu/~jbhuang/project/3DPhoto/model/color-model.pth +wget https://filebox.ece.vt.edu/~jbhuang/project/3DPhoto/model/depth-model.pth +wget https://filebox.ece.vt.edu/~jbhuang/project/3DPhoto/model/edge-model.pth +wget https://filebox.ece.vt.edu/~jbhuang/project/3DPhoto/model/model.pt + +mv color-model.pth checkpoints/. +mv depth-model.pth checkpoints/. +mv edge-model.pth checkpoints/. +mv model.pt MiDaS/. + +echo "cloning from BoostingMonocularDepth ..." +git clone https://github.com/compphoto/BoostingMonocularDepth.git +mkdir -p BoostingMonocularDepth/pix2pix/checkpoints/mergemodel/ + +echo "downloading mergenet weights ..." +wget https://filebox.ece.vt.edu/~jbhuang/project/3DPhoto/model/latest_net_G.pth +mv latest_net_G.pth BoostingMonocularDepth/pix2pix/checkpoints/mergemodel/ +wget https://github.com/intel-isl/MiDaS/releases/download/v2/model-f46da743.pt +mv model-f46da743.pt BoostingMonocularDepth/midas/model.pt diff --git a/inpaint/main.py b/inpaint/main.py new file mode 100644 index 0000000000000000000000000000000000000000..834d459b9d9ebb522ac4097ff7e0f083f5cc5de7 --- /dev/null +++ b/inpaint/main.py @@ -0,0 +1,141 @@ +import numpy as np +import argparse +import glob +import os +from functools import partial +import vispy +import scipy.misc as misc +from tqdm import tqdm +import yaml +import time +import sys +from mesh import write_mesh, read_ply, output_3d_photo +from utils import get_MiDaS_samples, read_MiDaS_depth +import torch +import cv2 +from skimage.transform import resize +import imageio +import copy +from networks import Inpaint_Color_Net, Inpaint_Depth_Net, Inpaint_Edge_Net +from MiDaS.run import run_depth +from boostmonodepth_utils import run_boostmonodepth +from MiDaS.monodepth_net import MonoDepthNet +import MiDaS.MiDaS_utils as MiDaS_utils +from bilateral_filtering import sparse_bilateral_filtering + +parser = argparse.ArgumentParser() +parser.add_argument('--config', type=str, default='argument.yml',help='Configure of post processing') +args = parser.parse_args() +config = yaml.load(open(args.config, 'r')) +if config['offscreen_rendering'] is True: + vispy.use(app='egl') +os.makedirs(config['mesh_folder'], exist_ok=True) +os.makedirs(config['video_folder'], exist_ok=True) +os.makedirs(config['depth_folder'], exist_ok=True) +sample_list = get_MiDaS_samples(config['src_folder'], config['depth_folder'], config, config['specific']) +normal_canvas, all_canvas = None, None + +if isinstance(config["gpu_ids"], int) and (config["gpu_ids"] >= 0): + device = config["gpu_ids"] +else: + device = "cpu" + +print(f"running on device {device}") + +for idx in tqdm(range(len(sample_list))): + depth = None + sample = sample_list[idx] + print("Current Source ==> ", sample['src_pair_name']) + mesh_fi = os.path.join(config['mesh_folder'], sample['src_pair_name'] +'.ply') + image = imageio.imread(sample['ref_img_fi']) + + print(f"Running depth extraction at {time.time()}") + if config['use_boostmonodepth'] is True: + run_boostmonodepth(sample['ref_img_fi'], config['src_folder'], config['depth_folder']) + elif config['require_midas'] is True: + run_depth([sample['ref_img_fi']], config['src_folder'], config['depth_folder'], + config['MiDaS_model_ckpt'], MonoDepthNet, MiDaS_utils, target_w=640) + + if 'npy' in config['depth_format']: + config['output_h'], config['output_w'] = np.load(sample['depth_fi']).shape[:2] + else: + config['output_h'], config['output_w'] = imageio.imread(sample['depth_fi']).shape[:2] + frac = config['longer_side_len'] / max(config['output_h'], config['output_w']) + config['output_h'], config['output_w'] = int(config['output_h'] * frac), int(config['output_w'] * frac) + config['original_h'], config['original_w'] = config['output_h'], config['output_w'] + if image.ndim == 2: + image = image[..., None].repeat(3, -1) + if np.sum(np.abs(image[..., 0] - image[..., 1])) == 0 and np.sum(np.abs(image[..., 1] - image[..., 2])) == 0: + config['gray_image'] = True + else: + config['gray_image'] = False + image = cv2.resize(image, (config['output_w'], config['output_h']), interpolation=cv2.INTER_AREA) + depth = read_MiDaS_depth(sample['depth_fi'], 3.0, config['output_h'], config['output_w']) + mean_loc_depth = depth[depth.shape[0]//2, depth.shape[1]//2] + if not(config['load_ply'] is True and os.path.exists(mesh_fi)): + vis_photos, vis_depths = sparse_bilateral_filtering(depth.copy(), image.copy(), config, num_iter=config['sparse_iter'], spdb=False) + depth = vis_depths[-1] + model = None + torch.cuda.empty_cache() + print("Start Running 3D_Photo ...") + print(f"Loading edge model at {time.time()}") + depth_edge_model = Inpaint_Edge_Net(init_weights=True) + depth_edge_weight = torch.load(config['depth_edge_model_ckpt'], + map_location=torch.device(device)) + depth_edge_model.load_state_dict(depth_edge_weight) + depth_edge_model = depth_edge_model.to(device) + depth_edge_model.eval() + + print(f"Loading depth model at {time.time()}") + depth_feat_model = Inpaint_Depth_Net() + depth_feat_weight = torch.load(config['depth_feat_model_ckpt'], + map_location=torch.device(device)) + depth_feat_model.load_state_dict(depth_feat_weight, strict=True) + depth_feat_model = depth_feat_model.to(device) + depth_feat_model.eval() + depth_feat_model = depth_feat_model.to(device) + print(f"Loading rgb model at {time.time()}") + rgb_model = Inpaint_Color_Net() + rgb_feat_weight = torch.load(config['rgb_feat_model_ckpt'], + map_location=torch.device(device)) + rgb_model.load_state_dict(rgb_feat_weight) + rgb_model.eval() + rgb_model = rgb_model.to(device) + graph = None + + + print(f"Writing depth ply (and basically doing everything) at {time.time()}") + rt_info = write_mesh(image, + depth, + sample['int_mtx'], + mesh_fi, + config, + rgb_model, + depth_edge_model, + depth_edge_model, + depth_feat_model) + + if rt_info is False: + continue + rgb_model = None + color_feat_model = None + depth_edge_model = None + depth_feat_model = None + torch.cuda.empty_cache() + if config['save_ply'] is True or config['load_ply'] is True: + verts, colors, faces, Height, Width, hFov, vFov = read_ply(mesh_fi) + else: + verts, colors, faces, Height, Width, hFov, vFov = rt_info + + + print(f"Making video at {time.time()}") + videos_poses, video_basename = copy.deepcopy(sample['tgts_poses']), sample['tgt_name'] + top = (config.get('original_h') // 2 - sample['int_mtx'][1, 2] * config['output_h']) + left = (config.get('original_w') // 2 - sample['int_mtx'][0, 2] * config['output_w']) + down, right = top + config['output_h'], left + config['output_w'] + border = [int(xx) for xx in [top, down, left, right]] + normal_canvas, all_canvas = output_3d_photo(verts.copy(), colors.copy(), faces.copy(), copy.deepcopy(Height), copy.deepcopy(Width), copy.deepcopy(hFov), copy.deepcopy(vFov), + copy.deepcopy(sample['tgt_pose']), sample['video_postfix'], copy.deepcopy(sample['ref_pose']), copy.deepcopy(config['video_folder']), + image.copy(), copy.deepcopy(sample['int_mtx']), config, image, + videos_poses, video_basename, config.get('original_h'), config.get('original_w'), border=border, depth=depth, normal_canvas=normal_canvas, all_canvas=all_canvas, + mean_loc_depth=mean_loc_depth) diff --git a/inpaint/mesh.py b/inpaint/mesh.py new file mode 100644 index 0000000000000000000000000000000000000000..9b2c19e322858a09a9fdc6417619288e87f84952 --- /dev/null +++ b/inpaint/mesh.py @@ -0,0 +1,2548 @@ +import os +import numpy as np +""" +try: + import cynetworkx as netx +except ImportError: + import networkx as netx +""" +import networkx as netx +import matplotlib.pyplot as plt +from functools import partial +from vispy import scene, io +from vispy.scene import visuals +from vispy.visuals.filters import Alpha +import cv2 +from moviepy.editor import ImageSequenceClip +from skimage.transform import resize +import time +import copy +import torch +import os +from inpaint.utils import path_planning, open_small_mask, clean_far_edge, refine_depth_around_edge +from inpaint.utils import refine_color_around_edge, filter_irrelevant_edge_new, require_depth_edge, clean_far_edge_new +from inpaint.utils import create_placeholder, refresh_node, find_largest_rect +from inpaint.mesh_tools import get_depth_from_maps, get_map_from_ccs, get_edge_from_nodes, get_depth_from_nodes, get_rgb_from_nodes, crop_maps_by_size, convert2tensor, recursive_add_edge, update_info, filter_edge, relabel_node, depth_inpainting +from inpaint.mesh_tools import refresh_bord_depth, enlarge_border, fill_dummy_bord, extrapolate, fill_missing_node, incomplete_node, get_valid_size, dilate_valid_size, size_operation +import transforms3d +import random +from functools import reduce +import struct +import tqdm +import sys + +def create_mesh(depth, image, int_mtx, config): + H, W, C = image.shape + ext_H, ext_W = H + 2 * config['extrapolation_thickness'], W + 2 * config['extrapolation_thickness'] + LDI = netx.Graph(H=ext_H, W=ext_W, noext_H=H, noext_W=W, cam_param=int_mtx) + xy2depth = {} + int_mtx_pix = int_mtx * np.array([[W], [H], [1.]]) + LDI.graph['cam_param_pix'], LDI.graph['cam_param_pix_inv'] = int_mtx_pix, np.linalg.inv(int_mtx_pix) + disp = 1. / (-depth) + LDI.graph['hoffset'], LDI.graph['woffset'] = config['extrapolation_thickness'], config['extrapolation_thickness'] + LDI.graph['bord_up'], LDI.graph['bord_down'] = LDI.graph['hoffset'] + 0, LDI.graph['hoffset'] + H + LDI.graph['bord_left'], LDI.graph['bord_right'] = LDI.graph['woffset'] + 0, LDI.graph['woffset'] + W + for idx in range(H): + for idy in range(W): + x, y = idx + LDI.graph['hoffset'], idy + LDI.graph['woffset'] + LDI.add_node((x, y, -depth[idx, idy]), + color=image[idx, idy], + disp=disp[idx, idy], + synthesis=False, + cc_id=set()) + xy2depth[(x, y)] = [-depth[idx, idy]] + for x, y, d in LDI.nodes: + two_nes = [ne for ne in [(x+1, y), (x, y+1)] if ne[0] < LDI.graph['bord_down'] and ne[1] < LDI.graph['bord_right']] + [LDI.add_edge((ne[0], ne[1], xy2depth[ne][0]), (x, y, d)) for ne in two_nes] + LDI = calculate_fov(LDI) + image = np.pad(image, + pad_width=((config['extrapolation_thickness'], config['extrapolation_thickness']), + (config['extrapolation_thickness'], config['extrapolation_thickness']), + (0, 0)), + mode='constant') + depth = np.pad(depth, + pad_width=((config['extrapolation_thickness'], config['extrapolation_thickness']), + (config['extrapolation_thickness'], config['extrapolation_thickness'])), + mode='constant') + + return LDI, xy2depth, image, depth + + +def tear_edges(mesh, threshold = 0.00025, xy2depth=None): + remove_edge_list = [] + remove_horizon, remove_vertical = np.zeros((2, mesh.graph['H'], mesh.graph['W'])) + mesh_nodes = mesh.nodes + for edge in mesh.edges: + if abs(mesh_nodes[edge[0]]['disp'] - mesh_nodes[edge[1]]['disp']) > threshold: + remove_edge_list.append((edge[0], edge[1])) + + near, far = edge if abs(edge[0][2]) < abs(edge[1][2]) else edge[::-1] + + mesh_nodes[far]['near'] = [] if mesh_nodes[far].get('near') is None else mesh_nodes[far]['near'].append(near) + mesh_nodes[near]['far'] = [] if mesh_nodes[near].get('far') is None else mesh_nodes[near]['far'].append(far) + + if near[0] == far[0]: + remove_horizon[near[0], np.minimum(near[1], far[1])] = 1 + elif near[1] == far[1]: + remove_vertical[np.minimum(near[0], far[0]), near[1]] = 1 + mesh.remove_edges_from(remove_edge_list) + + remove_edge_list = [] + + dang_horizon = np.where(np.roll(remove_horizon, 1, 0) + np.roll(remove_horizon, -1, 0) - remove_horizon == 2) + dang_vertical = np.where(np.roll(remove_vertical, 1, 1) + np.roll(remove_vertical, -1, 1) - remove_vertical == 2) + + horizon_condition = lambda x, y: mesh.graph['bord_up'] + 1 <= x < mesh.graph['bord_down'] - 1 + vertical_condition = lambda x, y: mesh.graph['bord_left'] + 1 <= y < mesh.graph['bord_right'] - 1 + + prjto3d = lambda x, y: (x, y, xy2depth[(x, y)][0]) + + node_existence = lambda x, y: mesh.has_node(prjto3d(x, y)) + + for x, y in zip(dang_horizon[0], dang_horizon[1]): + if horizon_condition(x, y) and node_existence(x, y) and node_existence(x, y+1): + remove_edge_list.append((prjto3d(x, y), prjto3d(x, y+1))) + for x, y in zip(dang_vertical[0], dang_vertical[1]): + if vertical_condition(x, y) and node_existence(x, y) and node_existence(x+1, y): + remove_edge_list.append((prjto3d(x, y), prjto3d(x+1, y))) + mesh.remove_edges_from(remove_edge_list) + + return mesh + +def calculate_fov(mesh): + k = mesh.graph['cam_param'] + mesh.graph['hFov'] = 2 * np.arctan(1. / (2*k[0, 0])) + mesh.graph['vFov'] = 2 * np.arctan(1. / (2*k[1, 1])) + mesh.graph['aspect'] = mesh.graph['noext_H'] / mesh.graph['noext_W'] + + return mesh + +def calculate_fov_FB(mesh): + mesh.graph['aspect'] = mesh.graph['H'] / mesh.graph['W'] + if mesh.graph['H'] > mesh.graph['W']: + mesh.graph['hFov'] = 0.508015513 + half_short = np.tan(mesh.graph['hFov']/2.0) + half_long = half_short * mesh.graph['aspect'] + mesh.graph['vFov'] = 2.0 * np.arctan(half_long) + else: + mesh.graph['vFov'] = 0.508015513 + half_short = np.tan(mesh.graph['vFov']/2.0) + half_long = half_short / mesh.graph['aspect'] + mesh.graph['hFov'] = 2.0 * np.arctan(half_long) + + return mesh + +def reproject_3d_int_detail(sx, sy, z, k_00, k_02, k_11, k_12, w_offset, h_offset): + abs_z = abs(z) + return [abs_z * ((sy+0.5-w_offset) * k_00 + k_02), abs_z * ((sx+0.5-h_offset) * k_11 + k_12), abs_z] + +def reproject_3d_int_detail_FB(sx, sy, z, w_offset, h_offset, mesh): + if mesh.graph.get('tan_hFov') is None: + mesh.graph['tan_hFov'] = np.tan(mesh.graph['hFov'] / 2.) + if mesh.graph.get('tan_vFov') is None: + mesh.graph['tan_vFov'] = np.tan(mesh.graph['vFov'] / 2.) + + ray = np.array([(-1. + 2. * ((sy+0.5-w_offset)/(mesh.graph['W'] - 1))) * mesh.graph['tan_hFov'], + (1. - 2. * (sx+0.5-h_offset)/(mesh.graph['H'] - 1)) * mesh.graph['tan_vFov'], + -1]) + point_3d = ray * np.abs(z) + + return point_3d + + +def reproject_3d_int(sx, sy, z, mesh): + k = mesh.graph['cam_param_pix_inv'].copy() + if k[0, 2] > 0: + k = np.linalg.inv(k) + ray = np.dot(k, np.array([sy-mesh.graph['woffset'], sx-mesh.graph['hoffset'], 1]).reshape(3, 1)) + + point_3d = ray * np.abs(z) + point_3d = point_3d.flatten() + + return point_3d + +def generate_init_node(mesh, config, min_node_in_cc): + mesh_nodes = mesh.nodes + + info_on_pix = {} + + ccs = sorted(netx.connected_components(mesh), key = len, reverse=True) + remove_nodes = [] + + for cc in ccs: + + remove_flag = True if len(cc) < min_node_in_cc else False + if remove_flag is False: + for (nx, ny, nd) in cc: + info_on_pix[(nx, ny)] = [{'depth':nd, + 'color':mesh_nodes[(nx, ny, nd)]['color'], + 'synthesis':False, + 'disp':mesh_nodes[(nx, ny, nd)]['disp']}] + else: + [remove_nodes.append((nx, ny, nd)) for (nx, ny, nd) in cc] + + for node in remove_nodes: + far_nodes = [] if mesh_nodes[node].get('far') is None else mesh_nodes[node]['far'] + for far_node in far_nodes: + if mesh.has_node(far_node) and mesh_nodes[far_node].get('near') is not None and node in mesh_nodes[far_node]['near']: + mesh_nodes[far_node]['near'].remove(node) + near_nodes = [] if mesh_nodes[node].get('near') is None else mesh_nodes[node]['near'] + for near_node in near_nodes: + if mesh.has_node(near_node) and mesh_nodes[near_node].get('far') is not None and node in mesh_nodes[near_node]['far']: + mesh_nodes[near_node]['far'].remove(node) + + [mesh.remove_node(node) for node in remove_nodes] + + return mesh, info_on_pix + +def get_neighbors(mesh, node): + return [*mesh.neighbors(node)] + +def generate_face(mesh, info_on_pix, config): + H, W = mesh.graph['H'], mesh.graph['W'] + str_faces = [] + num_node = len(mesh.nodes) + ply_flag = config.get('save_ply') or config.get('save_obj') + def out_fmt(input, cur_id_b, cur_id_self, cur_id_a, ply_flag): + if ply_flag is True: + input.append(' '.join(['3', cur_id_b, cur_id_self, cur_id_a]) + '\n') + else: + input.append([cur_id_b, cur_id_self, cur_id_a]) + mesh_nodes = mesh.nodes + for node in mesh_nodes: + cur_id_self = mesh_nodes[node]['cur_id'] + ne_nodes = get_neighbors(mesh, node) + four_dir_nes = {'up': [], 'left': [], + 'down': [], 'right': []} + for ne_node in ne_nodes: + store_tuple = [ne_node, mesh_nodes[ne_node]['cur_id']] + if ne_node[0] == node[0]: + if ne_node[1] == ne_node[1] - 1: + four_dir_nes['left'].append(store_tuple) + else: + four_dir_nes['right'].append(store_tuple) + else: + if ne_node[0] == ne_node[0] - 1: + four_dir_nes['up'].append(store_tuple) + else: + four_dir_nes['down'].append(store_tuple) + for node_a, cur_id_a in four_dir_nes['up']: + for node_b, cur_id_b in four_dir_nes['right']: + out_fmt(str_faces, cur_id_b, cur_id_self, cur_id_a, ply_flag) + for node_a, cur_id_a in four_dir_nes['right']: + for node_b, cur_id_b in four_dir_nes['down']: + out_fmt(str_faces, cur_id_b, cur_id_self, cur_id_a, ply_flag) + for node_a, cur_id_a in four_dir_nes['down']: + for node_b, cur_id_b in four_dir_nes['left']: + out_fmt(str_faces, cur_id_b, cur_id_self, cur_id_a, ply_flag) + for node_a, cur_id_a in four_dir_nes['left']: + for node_b, cur_id_b in four_dir_nes['up']: + out_fmt(str_faces, cur_id_b, cur_id_self, cur_id_a, ply_flag) + + return str_faces + +def reassign_floating_island(mesh, info_on_pix, image, depth): + H, W = mesh.graph['H'], mesh.graph['W'], + mesh_nodes = mesh.nodes + bord_up, bord_down = mesh.graph['bord_up'], mesh.graph['bord_down'] + bord_left, bord_right = mesh.graph['bord_left'], mesh.graph['bord_right'] + W = mesh.graph['W'] + lost_map = np.zeros((H, W)) + + ''' + (5) is_inside(x, y, xmin, xmax, ymin, ymax) : Check if a pixel(x, y) is inside the border. + (6) get_cross_nes(x, y) : Get the four cross neighbors of pixel(x, y). + ''' + key_exist = lambda d, k: k in d + is_inside = lambda x, y, xmin, xmax, ymin, ymax: xmin <= x < xmax and ymin <= y < ymax + get_cross_nes = lambda x, y: [(x + 1, y), (x - 1, y), (x, y - 1), (x, y + 1)] + ''' + (A) Highlight the pixels on isolated floating island. + (B) Number those isolated floating islands with connected component analysis. + (C) For each isolated island: + (1) Find its longest surrounded depth edge. + (2) Propogate depth from that depth edge to the pixels on the isolated island. + (3) Build the connection between the depth edge and that isolated island. + ''' + for x in range(H): + for y in range(W): + if is_inside(x, y, bord_up, bord_down, bord_left, bord_right) and not(key_exist(info_on_pix, (x, y))): + lost_map[x, y] = 1 + _, label_lost_map = cv2.connectedComponents(lost_map.astype(np.uint8), connectivity=4) + mask = np.zeros((H, W)) + mask[bord_up:bord_down, bord_left:bord_right] = 1 + label_lost_map = (label_lost_map * mask).astype(int) + + for i in range(1, label_lost_map.max()+1): + lost_xs, lost_ys = np.where(label_lost_map == i) + surr_edge_ids = {} + for lost_x, lost_y in zip(lost_xs, lost_ys): + #if (lost_x, lost_y) == (295, 389) or (lost_x, lost_y) == (296, 389): + # import pdb; pdb.set_trace() + for ne in get_cross_nes(lost_x, lost_y): + if key_exist(info_on_pix, ne): + for info in info_on_pix[ne]: + ne_node = (ne[0], ne[1], info['depth']) + if key_exist(mesh_nodes[ne_node], 'edge_id'): + edge_id = mesh_nodes[ne_node]['edge_id'] + surr_edge_ids[edge_id] = surr_edge_ids[edge_id] + [ne_node] if \ + key_exist(surr_edge_ids, edge_id) else [ne_node] + if len(surr_edge_ids) == 0: + continue + edge_id, edge_nodes = sorted([*surr_edge_ids.items()], key=lambda x: len(x[1]), reverse=True)[0] + edge_depth_map = np.zeros((H, W)) + for node in edge_nodes: + edge_depth_map[node[0], node[1]] = node[2] + lost_xs, lost_ys = np.where(label_lost_map == i) + while lost_xs.shape[0] > 0: + lost_xs, lost_ys = np.where(label_lost_map == i) + for lost_x, lost_y in zip(lost_xs, lost_ys): + propagated_depth = [] + real_nes = [] + for ne in get_cross_nes(lost_x, lost_y): + if not(is_inside(ne[0], ne[1], bord_up, bord_down, bord_left, bord_right)) or \ + edge_depth_map[ne[0], ne[1]] == 0: + continue + propagated_depth.append(edge_depth_map[ne[0], ne[1]]) + real_nes.append(ne) + if len(real_nes) == 0: + continue + reassign_depth = np.mean(propagated_depth) + label_lost_map[lost_x, lost_y] = 0 + edge_depth_map[lost_x, lost_y] = reassign_depth + depth[lost_x, lost_y] = -reassign_depth + mesh.add_node((lost_x, lost_y, reassign_depth), color=image[lost_x, lost_y], + synthesis=False, + disp=1./reassign_depth, + cc_id=set()) + info_on_pix[(lost_x, lost_y)] = [{'depth':reassign_depth, + 'color':image[lost_x, lost_y], + 'synthesis':False, + 'disp':1./reassign_depth}] + new_connections = [((lost_x, lost_y, reassign_depth), + (ne[0], ne[1], edge_depth_map[ne[0], ne[1]])) for ne in real_nes] + mesh.add_edges_from(new_connections) + + return mesh, info_on_pix, depth + +def remove_node_feat(mesh, *feats): + mesh_nodes = mesh.nodes + for node in mesh_nodes: + for feat in feats: + mesh_nodes[node][feat] = None + + return mesh + +def update_status(mesh, info_on_pix, depth=None): + ''' + (2) clear_node_feat(G, *fts) : Clear all the node feature on graph G. + (6) get_cross_nes(x, y) : Get the four cross neighbors of pixel(x, y). + ''' + key_exist = lambda d, k: d.get(k) is not None + is_inside = lambda x, y, xmin, xmax, ymin, ymax: xmin <= x < xmax and ymin <= y < ymax + get_cross_nes = lambda x, y: [(x + 1, y), (x - 1, y), (x, y - 1), (x, y + 1)] + append_element = lambda d, k, x: d[k] + [x] if key_exist(d, k) else [x] + + def clear_node_feat(G, fts): + le_nodes = G.nodes + for k in le_nodes: + v = le_nodes[k] + for ft in fts: + if ft in v: + v[ft] = None + + clear_node_feat(mesh, ['edge_id', 'far', 'near']) + bord_up, bord_down = mesh.graph['bord_up'], mesh.graph['bord_down'] + bord_left, bord_right = mesh.graph['bord_left'], mesh.graph['bord_right'] + + le_nodes = mesh.nodes + + for node_key in le_nodes: + if mesh.neighbors(node_key).__length_hint__() == 4: + continue + four_nes = [xx for xx in get_cross_nes(node_key[0], node_key[1]) if + is_inside(xx[0], xx[1], bord_up, bord_down, bord_left, bord_right) and + xx in info_on_pix] + [four_nes.remove((ne_node[0], ne_node[1])) for ne_node in mesh.neighbors(node_key)] + for ne in four_nes: + for info in info_on_pix[ne]: + assert mesh.has_node((ne[0], ne[1], info['depth'])), "No node_key" + ind_node = le_nodes[node_key] + if abs(node_key[2]) > abs(info['depth']): + ind_node['near'] = append_element(ind_node, 'near', (ne[0], ne[1], info['depth'])) + else: + ind_node['far'] = append_element(ind_node, 'far', (ne[0], ne[1], info['depth'])) + if depth is not None: + for key, value in info_on_pix.items(): + if depth[key[0], key[1]] != abs(value[0]['depth']): + value[0]['disp'] = 1. / value[0]['depth'] + depth[key[0], key[1]] = abs(value[0]['depth']) + + return mesh, depth, info_on_pix + else: + return mesh + +def group_edges(LDI, config, image, remove_conflict_ordinal, spdb=False): + + ''' + (1) add_new_node(G, node) : add "node" to graph "G" + (2) add_new_edge(G, node_a, node_b) : add edge "node_a--node_b" to graph "G" + (3) exceed_thre(x, y, thre) : Check if difference between "x" and "y" exceed threshold "thre" + (4) key_exist(d, k) : Check if key "k' exists in dictionary "d" + (5) comm_opp_bg(G, x, y) : Check if node "x" and "y" in graph "G" treat the same opposite node as background + (6) comm_opp_fg(G, x, y) : Check if node "x" and "y" in graph "G" treat the same opposite node as foreground + ''' + add_new_node = lambda G, node: None if G.has_node(node) else G.add_node(node) + add_new_edge = lambda G, node_a, node_b: None if G.has_edge(node_a, node_b) else G.add_edge(node_a, node_b) + exceed_thre = lambda x, y, thre: (abs(x) - abs(y)) > thre + key_exist = lambda d, k: d.get(k) is not None + comm_opp_bg = lambda G, x, y: key_exist(G.nodes[x], 'far') and key_exist(G.nodes[y], 'far') and \ + not(set(G.nodes[x]['far']).isdisjoint(set(G.nodes[y]['far']))) + comm_opp_fg = lambda G, x, y: key_exist(G.nodes[x], 'near') and key_exist(G.nodes[y], 'near') and \ + not(set(G.nodes[x]['near']).isdisjoint(set(G.nodes[y]['near']))) + discont_graph = netx.Graph() + ''' + (A) Skip the pixel at image boundary, we don't want to deal with them. + (B) Identify discontinuity by the number of its neighbor(degree). + If the degree < 4(up/right/buttom/left). We will go through following steps: + (1) Add the discontinuity pixel "node" to graph "discont_graph". + (2) Find "node"'s cross neighbor(up/right/buttom/left) "ne_node". + - If the cross neighbor "ne_node" is a discontinuity pixel(degree("ne_node") < 4), + (a) add it to graph "discont_graph" and build the connection between "ne_node" and "node". + (b) label its cross neighbor as invalid pixels "inval_diag_candi" to avoid building + connection between original discontinuity pixel "node" and "inval_diag_candi". + - Otherwise, find "ne_node"'s cross neighbors, called diagonal candidate "diag_candi". + - The "diag_candi" is diagonal to the original discontinuity pixel "node". + - If "diag_candi" exists, go to step(3). + (3) A diagonal candidate "diag_candi" will be : + - added to the "discont_graph" if its degree < 4. + - connected to the original discontinuity pixel "node" if it satisfied either + one of following criterion: + (a) the difference of disparity between "diag_candi" and "node" is smaller than default threshold. + (b) the "diag_candi" and "node" face the same opposite pixel. (See. function "tear_edges") + (c) Both of "diag_candi" and "node" must_connect to each other. (See. function "combine_end_node") + (C) Aggregate each connected part in "discont_graph" into "discont_ccs" (A.K.A. depth edge). + ''' + for node in LDI.nodes: + if not(LDI.graph['bord_up'] + 1 <= node[0] <= LDI.graph['bord_down'] - 2 and \ + LDI.graph['bord_left'] + 1 <= node[1] <= LDI.graph['bord_right'] - 2): + continue + neighbors = [*LDI.neighbors(node)] + if len(neighbors) < 4: + add_new_node(discont_graph, node) + diag_candi_anc, inval_diag_candi, discont_nes = set(), set(), set() + for ne_node in neighbors: + if len([*LDI.neighbors(ne_node)]) < 4: + add_new_node(discont_graph, ne_node) + add_new_edge(discont_graph, ne_node, node) + discont_nes.add(ne_node) + else: + diag_candi_anc.add(ne_node) + inval_diag_candi = set([inval_diagonal for ne_node in discont_nes for inval_diagonal in LDI.neighbors(ne_node) if \ + abs(inval_diagonal[0] - node[0]) < 2 and abs(inval_diagonal[1] - node[1]) < 2]) + for ne_node in diag_candi_anc: + if ne_node[0] == node[0]: + diagonal_xys = [[ne_node[0] + 1, ne_node[1]], [ne_node[0] - 1, ne_node[1]]] + elif ne_node[1] == node[1]: + diagonal_xys = [[ne_node[0], ne_node[1] + 1], [ne_node[0], ne_node[1] - 1]] + for diag_candi in LDI.neighbors(ne_node): + if [diag_candi[0], diag_candi[1]] in diagonal_xys and LDI.degree(diag_candi) < 4: + if diag_candi not in inval_diag_candi: + if not exceed_thre(1./node[2], 1./diag_candi[2], config['depth_threshold']) or \ + (comm_opp_bg(LDI, diag_candi, node) and comm_opp_fg(LDI, diag_candi, node)): + add_new_node(discont_graph, diag_candi) + add_new_edge(discont_graph, diag_candi, node) + if key_exist(LDI.nodes[diag_candi], 'must_connect') and node in LDI.nodes[diag_candi]['must_connect'] and \ + key_exist(LDI.nodes[node], 'must_connect') and diag_candi in LDI.nodes[node]['must_connect']: + add_new_node(discont_graph, diag_candi) + add_new_edge(discont_graph, diag_candi, node) + if spdb == True: + import pdb; pdb.set_trace() + discont_ccs = [*netx.connected_components(discont_graph)] + ''' + In some corner case, a depth edge "discont_cc" will contain both + foreground(FG) and background(BG) pixels. This violate the assumption that + a depth edge can only composite by one type of pixel(FG or BG). + We need to further divide this depth edge into several sub-part so that the + assumption is satisfied. + (A) A depth edge is invalid if both of its "far_flag"(BG) and + "near_flag"(FG) are True. + (B) If the depth edge is invalid, we need to do: + (1) Find the role("oridinal") of each pixel on the depth edge. + "-1" --> Its opposite pixels has smaller depth(near) than it. + It is a backgorund pixel. + "+1" --> Its opposite pixels has larger depth(far) than it. + It is a foregorund pixel. + "0" --> Some of opposite pixels has larger depth(far) than it, + and some has smaller pixel than it. + It is an ambiguous pixel. + (2) For each pixel "discont_node", check if its neigbhors' roles are consistent. + - If not, break the connection between the neighbor "ne_node" that has a role + different from "discont_node". + - If yes, remove all the role that are inconsistent to its neighbors "ne_node". + (3) Connected component analysis to re-identified those divided depth edge. + (C) Aggregate each connected part in "discont_graph" into "discont_ccs" (A.K.A. depth edge). + ''' + if remove_conflict_ordinal: + new_discont_ccs = [] + num_new_cc = 0 + for edge_id, discont_cc in enumerate(discont_ccs): + near_flag = False + far_flag = False + for discont_node in discont_cc: + near_flag = True if key_exist(LDI.nodes[discont_node], 'far') else near_flag + far_flag = True if key_exist(LDI.nodes[discont_node], 'near') else far_flag + if far_flag and near_flag: + break + if far_flag and near_flag: + for discont_node in discont_cc: + discont_graph.nodes[discont_node]['ordinal'] = \ + np.array([key_exist(LDI.nodes[discont_node], 'far'), + key_exist(LDI.nodes[discont_node], 'near')]) * \ + np.array([-1, 1]) + discont_graph.nodes[discont_node]['ordinal'] = \ + np.sum(discont_graph.nodes[discont_node]['ordinal']) + remove_nodes, remove_edges = [], [] + for discont_node in discont_cc: + ordinal_relation = np.sum([discont_graph.nodes[xx]['ordinal'] \ + for xx in discont_graph.neighbors(discont_node)]) + near_side = discont_graph.nodes[discont_node]['ordinal'] <= 0 + if abs(ordinal_relation) < len([*discont_graph.neighbors(discont_node)]): + remove_nodes.append(discont_node) + for ne_node in discont_graph.neighbors(discont_node): + remove_flag = (near_side and not(key_exist(LDI.nodes[ne_node], 'far'))) or \ + (not near_side and not(key_exist(LDI.nodes[ne_node], 'near'))) + remove_edges += [(discont_node, ne_node)] if remove_flag else [] + else: + if near_side and key_exist(LDI.nodes[discont_node], 'near'): + LDI.nodes[discont_node].pop('near') + elif not(near_side) and key_exist(LDI.nodes[discont_node], 'far'): + LDI.nodes[discont_node].pop('far') + discont_graph.remove_edges_from(remove_edges) + sub_mesh = discont_graph.subgraph(list(discont_cc)).copy() + sub_discont_ccs = [*netx.connected_components(sub_mesh)] + is_redun_near = lambda xx: len(xx) == 1 and xx[0] in remove_nodes and key_exist(LDI.nodes[xx[0]], 'far') + for sub_discont_cc in sub_discont_ccs: + if is_redun_near(list(sub_discont_cc)): + LDI.nodes[list(sub_discont_cc)[0]].pop('far') + new_discont_ccs.append(sub_discont_cc) + else: + new_discont_ccs.append(discont_cc) + discont_ccs = new_discont_ccs + new_discont_ccs = None + if spdb == True: + import pdb; pdb.set_trace() + + for edge_id, edge_cc in enumerate(discont_ccs): + for node in edge_cc: + LDI.nodes[node]['edge_id'] = edge_id + + return discont_ccs, LDI, discont_graph + +def combine_end_node(mesh, edge_mesh, edge_ccs, depth): + import collections + mesh_nodes = mesh.nodes + connect_dict = dict() + for valid_edge_id, valid_edge_cc in enumerate(edge_ccs): + connect_info = [] + for valid_edge_node in valid_edge_cc: + single_connect = set() + for ne_node in mesh.neighbors(valid_edge_node): + if mesh_nodes[ne_node].get('far') is not None: + for fn in mesh_nodes[ne_node].get('far'): + if mesh.has_node(fn) and mesh_nodes[fn].get('edge_id') is not None: + single_connect.add(mesh_nodes[fn]['edge_id']) + if mesh_nodes[ne_node].get('near') is not None: + for fn in mesh_nodes[ne_node].get('near'): + if mesh.has_node(fn) and mesh_nodes[fn].get('edge_id') is not None: + single_connect.add(mesh_nodes[fn]['edge_id']) + connect_info.extend([*single_connect]) + connect_dict[valid_edge_id] = collections.Counter(connect_info) + + end_maps = np.zeros((mesh.graph['H'], mesh.graph['W'])) + edge_maps = np.zeros((mesh.graph['H'], mesh.graph['W'])) - 1 + for valid_edge_id, valid_edge_cc in enumerate(edge_ccs): + for valid_edge_node in valid_edge_cc: + edge_maps[valid_edge_node[0], valid_edge_node[1]] = valid_edge_id + if len([*edge_mesh.neighbors(valid_edge_node)]) == 1: + num_ne = 1 + if num_ne == 1: + end_maps[valid_edge_node[0], valid_edge_node[1]] = valid_edge_node[2] + nxs, nys = np.where(end_maps != 0) + invalid_nodes = set() + for nx, ny in zip(nxs, nys): + if mesh.has_node((nx, ny, end_maps[nx, ny])) is False: + invalid_nodes.add((nx, ny)) + continue + four_nes = [xx for xx in [(nx - 1, ny), (nx + 1, ny), (nx, ny - 1), (nx, ny + 1)] \ + if 0 <= xx[0] < mesh.graph['H'] and 0 <= xx[1] < mesh.graph['W'] and \ + end_maps[xx[0], xx[1]] != 0] + mesh_nes = [*mesh.neighbors((nx, ny, end_maps[nx, ny]))] + remove_num = 0 + for fne in four_nes: + if (fne[0], fne[1], end_maps[fne[0], fne[1]]) in mesh_nes: + remove_num += 1 + if remove_num == len(four_nes): + invalid_nodes.add((nx, ny)) + for invalid_node in invalid_nodes: + end_maps[invalid_node[0], invalid_node[1]] = 0 + + nxs, nys = np.where(end_maps != 0) + invalid_nodes = set() + for nx, ny in zip(nxs, nys): + if mesh_nodes[(nx, ny, end_maps[nx, ny])].get('edge_id') is None: + continue + else: + self_id = mesh_nodes[(nx, ny, end_maps[nx, ny])].get('edge_id') + self_connect = connect_dict[self_id] if connect_dict.get(self_id) is not None else dict() + four_nes = [xx for xx in [(nx - 1, ny), (nx + 1, ny), (nx, ny - 1), (nx, ny + 1)] \ + if 0 <= xx[0] < mesh.graph['H'] and 0 <= xx[1] < mesh.graph['W'] and \ + end_maps[xx[0], xx[1]] != 0] + for fne in four_nes: + if mesh_nodes[(fne[0], fne[1], end_maps[fne[0], fne[1]])].get('edge_id') is None: + continue + else: + ne_id = mesh_nodes[(fne[0], fne[1], end_maps[fne[0], fne[1]])]['edge_id'] + if self_connect.get(ne_id) is None or self_connect.get(ne_id) == 1: + continue + else: + invalid_nodes.add((nx, ny)) + for invalid_node in invalid_nodes: + end_maps[invalid_node[0], invalid_node[1]] = 0 + nxs, nys = np.where(end_maps != 0) + invalid_nodes = set() + for nx, ny in zip(nxs, nys): + four_nes = [xx for xx in [(nx - 1, ny), (nx + 1, ny), (nx, ny - 1), (nx, ny + 1)] \ + if 0 <= xx[0] < mesh.graph['H'] and 0 <= xx[1] < mesh.graph['W'] and \ + end_maps[xx[0], xx[1]] != 0] + for fne in four_nes: + if mesh.has_node((fne[0], fne[1], end_maps[fne[0], fne[1]])): + node_a, node_b = (fne[0], fne[1], end_maps[fne[0], fne[1]]), (nx, ny, end_maps[nx, ny]) + mesh.add_edge(node_a, node_b) + mesh_nodes[node_b]['must_connect'] = set() if mesh_nodes[node_b].get('must_connect') is None else mesh_nodes[node_b]['must_connect'] + mesh_nodes[node_b]['must_connect'].add(node_a) + mesh_nodes[node_b]['must_connect'] |= set([xx for xx in [*edge_mesh.neighbors(node_a)] if \ + (xx[0] - node_b[0]) < 2 and (xx[1] - node_b[1]) < 2]) + mesh_nodes[node_a]['must_connect'] = set() if mesh_nodes[node_a].get('must_connect') is None else mesh_nodes[node_a]['must_connect'] + mesh_nodes[node_a]['must_connect'].add(node_b) + mesh_nodes[node_a]['must_connect'] |= set([xx for xx in [*edge_mesh.neighbors(node_b)] if \ + (xx[0] - node_a[0]) < 2 and (xx[1] - node_a[1]) < 2]) + invalid_nodes.add((nx, ny)) + for invalid_node in invalid_nodes: + end_maps[invalid_node[0], invalid_node[1]] = 0 + + return mesh + +def remove_redundant_edge(mesh, edge_mesh, edge_ccs, info_on_pix, config, redundant_number=1000, invalid=False, spdb=False): + point_to_amount = {} + point_to_id = {} + end_maps = np.zeros((mesh.graph['H'], mesh.graph['W'])) - 1 + for valid_edge_id, valid_edge_cc in enumerate(edge_ccs): + for valid_edge_node in valid_edge_cc: + point_to_amount[valid_edge_node] = len(valid_edge_cc) + point_to_id[valid_edge_node] = valid_edge_id + if edge_mesh.has_node(valid_edge_node) is True: + if len([*edge_mesh.neighbors(valid_edge_node)]) == 1: + end_maps[valid_edge_node[0], valid_edge_node[1]] = valid_edge_id + nxs, nys = np.where(end_maps > -1) + point_to_adjoint = {} + for nx, ny in zip(nxs, nys): + adjoint_edges = set([end_maps[x, y] for x, y in [(nx + 1, ny), (nx - 1, ny), (nx, ny + 1), (nx, ny - 1)] if end_maps[x, y] != -1]) + point_to_adjoint[end_maps[nx, ny]] = (point_to_adjoint[end_maps[nx, ny]] | adjoint_edges) if point_to_adjoint.get(end_maps[nx, ny]) is not None else adjoint_edges + valid_edge_ccs = filter_edge(mesh, edge_ccs, config, invalid=invalid) + edge_canvas = np.zeros((mesh.graph['H'], mesh.graph['W'])) - 1 + for valid_edge_id, valid_edge_cc in enumerate(valid_edge_ccs): + for valid_edge_node in valid_edge_cc: + edge_canvas[valid_edge_node[0], valid_edge_node[1]] = valid_edge_id + if spdb is True: + plt.imshow(edge_canvas); plt.show() + import pdb; pdb.set_trace() + for valid_edge_id, valid_edge_cc in enumerate(valid_edge_ccs): + end_number = 0 + four_end_number = 0 + eight_end_number = 0 + db_eight_end_number = 0 + if len(valid_edge_cc) > redundant_number: + continue + for valid_edge_node in valid_edge_cc: + if len([*edge_mesh.neighbors(valid_edge_node)]) == 3: + break + elif len([*edge_mesh.neighbors(valid_edge_node)]) == 1: + hx, hy, hz = valid_edge_node + if invalid is False: + eight_nes = [(x, y) for x, y in [(hx + 1, hy), (hx - 1, hy), (hx, hy + 1), (hx, hy - 1), + (hx + 1, hy + 1), (hx - 1, hy - 1), (hx - 1, hy + 1), (hx + 1, hy - 1)] \ + if info_on_pix.get((x, y)) is not None and edge_canvas[x, y] != -1 and edge_canvas[x, y] != valid_edge_id] + if len(eight_nes) == 0: + end_number += 1 + if invalid is True: + four_nes = []; eight_nes = []; db_eight_nes = [] + four_nes = [(x, y) for x, y in [(hx + 1, hy), (hx - 1, hy), (hx, hy + 1), (hx, hy - 1)] \ + if info_on_pix.get((x, y)) is not None and edge_canvas[x, y] != -1 and edge_canvas[x, y] != valid_edge_id] + eight_nes = [(x, y) for x, y in [(hx + 1, hy), (hx - 1, hy), (hx, hy + 1), (hx, hy - 1), \ + (hx + 1, hy + 1), (hx - 1, hy - 1), (hx - 1, hy + 1), (hx + 1, hy - 1)] \ + if info_on_pix.get((x, y)) is not None and edge_canvas[x, y] != -1 and edge_canvas[x, y] != valid_edge_id] + db_eight_nes = [(x, y) for x in range(hx - 2, hx + 3) for y in range(hy - 2, hy + 3) \ + if info_on_pix.get((x, y)) is not None and edge_canvas[x, y] != -1 and edge_canvas[x, y] != valid_edge_id and (x, y) != (hx, hy)] + if len(four_nes) == 0 or len(eight_nes) == 0: + end_number += 1 + if len(four_nes) == 0: + four_end_number += 1 + if len(eight_nes) == 0: + eight_end_number += 1 + if len(db_eight_nes) == 0: + db_eight_end_number += 1 + elif len([*edge_mesh.neighbors(valid_edge_node)]) == 0: + hx, hy, hz = valid_edge_node + four_nes = [(x, y, info_on_pix[(x, y)][0]['depth']) for x, y in [(hx + 1, hy), (hx - 1, hy), (hx, hy + 1), (hx, hy - 1)] \ + if info_on_pix.get((x, y)) is not None and \ + mesh.has_edge(valid_edge_node, (x, y, info_on_pix[(x, y)][0]['depth'])) is False] + for ne in four_nes: + try: + if invalid is True or (point_to_amount.get(ne) is None or point_to_amount[ne] < redundant_number) or \ + point_to_id[ne] in point_to_adjoint.get(point_to_id[valid_edge_node], set()): + mesh.add_edge(valid_edge_node, ne) + except: + import pdb; pdb.set_trace() + if (invalid is not True and end_number >= 1) or (invalid is True and end_number >= 2 and eight_end_number >= 1 and db_eight_end_number >= 1): + for valid_edge_node in valid_edge_cc: + hx, hy, _ = valid_edge_node + four_nes = [(x, y, info_on_pix[(x, y)][0]['depth']) for x, y in [(hx + 1, hy), (hx - 1, hy), (hx, hy + 1), (hx, hy - 1)] \ + if info_on_pix.get((x, y)) is not None and \ + mesh.has_edge(valid_edge_node, (x, y, info_on_pix[(x, y)][0]['depth'])) is False and \ + (edge_canvas[x, y] == -1 or edge_canvas[x, y] == valid_edge_id)] + for ne in four_nes: + if invalid is True or (point_to_amount.get(ne) is None or point_to_amount[ne] < redundant_number) or \ + point_to_id[ne] in point_to_adjoint.get(point_to_id[valid_edge_node], set()): + mesh.add_edge(valid_edge_node, ne) + + return mesh + +def judge_dangle(mark, mesh, node): + if not (1 <= node[0] < mesh.graph['H']-1) or not(1 <= node[1] < mesh.graph['W']-1): + return mark + mesh_neighbors = [*mesh.neighbors(node)] + mesh_neighbors = [xx for xx in mesh_neighbors if 0 < xx[0] < mesh.graph['H'] - 1 and 0 < xx[1] < mesh.graph['W'] - 1] + if len(mesh_neighbors) >= 3: + return mark + elif len(mesh_neighbors) <= 1: + mark[node[0], node[1]] = (len(mesh_neighbors) + 1) + else: + dan_ne_node_a = mesh_neighbors[0] + dan_ne_node_b = mesh_neighbors[1] + if abs(dan_ne_node_a[0] - dan_ne_node_b[0]) > 1 or \ + abs(dan_ne_node_a[1] - dan_ne_node_b[1]) > 1: + mark[node[0], node[1]] = 3 + + return mark + +def remove_dangling(mesh, edge_ccs, edge_mesh, info_on_pix, image, depth, config): + + tmp_edge_ccs = copy.deepcopy(edge_ccs) + for edge_cc_id, valid_edge_cc in enumerate(tmp_edge_ccs): + if len(valid_edge_cc) > 1 or len(valid_edge_cc) == 0: + continue + single_edge_node = [*valid_edge_cc][0] + hx, hy, hz = single_edge_node + eight_nes = set([(x, y, info_on_pix[(x, y)][0]['depth']) for x, y in [(hx + 1, hy), (hx - 1, hy), (hx, hy + 1), (hx, hy - 1), + (hx + 1, hy + 1), (hx - 1, hy - 1), (hx - 1, hy + 1), (hx + 1, hy - 1)] \ + if info_on_pix.get((x, y)) is not None]) + four_nes = [(x, y, info_on_pix[(x, y)][0]['depth']) for x, y in [(hx + 1, hy), (hx - 1, hy), (hx, hy + 1), (hx, hy - 1)] \ + if info_on_pix.get((x, y)) is not None] + sub_mesh = mesh.subgraph(eight_nes).copy() + ccs = netx.connected_components(sub_mesh) + four_ccs = [] + for cc_id, _cc in enumerate(ccs): + four_ccs.append(set()) + for cc_node in _cc: + if abs(cc_node[0] - hx) + abs(cc_node[1] - hy) < 2: + four_ccs[cc_id].add(cc_node) + largest_cc = sorted(four_ccs, key=lambda x: (len(x), -np.sum([abs(xx[2] - hz) for xx in x])))[-1] + if len(largest_cc) < 2: + for ne in four_nes: + mesh.add_edge(single_edge_node, ne) + else: + mesh.remove_edges_from([(single_edge_node, ne) for ne in mesh.neighbors(single_edge_node)]) + new_depth = np.mean([xx[2] for xx in largest_cc]) + info_on_pix[(hx, hy)][0]['depth'] = new_depth + info_on_pix[(hx, hy)][0]['disp'] = 1./new_depth + new_node = (hx, hy, new_depth) + mesh = refresh_node(single_edge_node, mesh.nodes[single_edge_node], new_node, dict(), mesh) + edge_ccs[edge_cc_id] = set([new_node]) + for ne in largest_cc: + mesh.add_edge(new_node, ne) + + mark = np.zeros((mesh.graph['H'], mesh.graph['W'])) + for edge_idx, edge_cc in enumerate(edge_ccs): + for edge_node in edge_cc: + if not (mesh.graph['bord_up'] <= edge_node[0] < mesh.graph['bord_down']-1) or \ + not (mesh.graph['bord_left'] <= edge_node[1] < mesh.graph['bord_right']-1): + continue + mesh_neighbors = [*mesh.neighbors(edge_node)] + mesh_neighbors = [xx for xx in mesh_neighbors \ + if mesh.graph['bord_up'] < xx[0] < mesh.graph['bord_down'] - 1 and \ + mesh.graph['bord_left'] < xx[1] < mesh.graph['bord_right'] - 1] + if len([*mesh.neighbors(edge_node)]) >= 3: + continue + elif len([*mesh.neighbors(edge_node)]) <= 1: + mark[edge_node[0], edge_node[1]] += (len([*mesh.neighbors(edge_node)]) + 1) + else: + dan_ne_node_a = [*mesh.neighbors(edge_node)][0] + dan_ne_node_b = [*mesh.neighbors(edge_node)][1] + if abs(dan_ne_node_a[0] - dan_ne_node_b[0]) > 1 or \ + abs(dan_ne_node_a[1] - dan_ne_node_b[1]) > 1: + mark[edge_node[0], edge_node[1]] += 3 + mxs, mys = np.where(mark == 1) + conn_0_nodes = [(x[0], x[1], info_on_pix[(x[0], x[1])][0]['depth']) for x in zip(mxs, mys) \ + if mesh.has_node((x[0], x[1], info_on_pix[(x[0], x[1])][0]['depth']))] + mxs, mys = np.where(mark == 2) + conn_1_nodes = [(x[0], x[1], info_on_pix[(x[0], x[1])][0]['depth']) for x in zip(mxs, mys) \ + if mesh.has_node((x[0], x[1], info_on_pix[(x[0], x[1])][0]['depth']))] + for node in conn_0_nodes: + hx, hy = node[0], node[1] + four_nes = [(x, y, info_on_pix[(x, y)][0]['depth']) for x, y in [(hx + 1, hy), (hx - 1, hy), (hx, hy + 1), (hx, hy - 1)] \ + if info_on_pix.get((x, y)) is not None] + re_depth = {'value' : 0, 'count': 0} + for ne in four_nes: + mesh.add_edge(node, ne) + re_depth['value'] += cc_node[2] + re_depth['count'] += 1. + re_depth = re_depth['value'] / re_depth['count'] + mapping_dict = {node: (node[0], node[1], re_depth)} + info_on_pix, mesh, edge_mesh = update_info(mapping_dict, info_on_pix, mesh, edge_mesh) + depth[node[0], node[1]] = abs(re_depth) + mark[node[0], node[1]] = 0 + for node in conn_1_nodes: + hx, hy = node[0], node[1] + eight_nes = set([(x, y, info_on_pix[(x, y)][0]['depth']) for x, y in [(hx + 1, hy), (hx - 1, hy), (hx, hy + 1), (hx, hy - 1), + (hx + 1, hy + 1), (hx - 1, hy - 1), (hx - 1, hy + 1), (hx + 1, hy - 1)] \ + if info_on_pix.get((x, y)) is not None]) + self_nes = set([ne2 for ne1 in mesh.neighbors(node) for ne2 in mesh.neighbors(ne1) if ne2 in eight_nes]) + eight_nes = [*(eight_nes - self_nes)] + sub_mesh = mesh.subgraph(eight_nes).copy() + ccs = netx.connected_components(sub_mesh) + largest_cc = sorted(ccs, key=lambda x: (len(x), -np.sum([abs(xx[0] - node[0]) + abs(xx[1] - node[1]) for xx in x])))[-1] + + mesh.remove_edges_from([(xx, node) for xx in mesh.neighbors(node)]) + re_depth = {'value' : 0, 'count': 0} + for cc_node in largest_cc: + if cc_node[0] == node[0] and cc_node[1] == node[1]: + continue + re_depth['value'] += cc_node[2] + re_depth['count'] += 1. + if abs(cc_node[0] - node[0]) + abs(cc_node[1] - node[1]) < 2: + mesh.add_edge(cc_node, node) + try: + re_depth = re_depth['value'] / re_depth['count'] + except: + re_depth = node[2] + renode = (node[0], node[1], re_depth) + mapping_dict = {node: renode} + info_on_pix, mesh, edge_mesh = update_info(mapping_dict, info_on_pix, mesh, edge_mesh) + depth[node[0], node[1]] = abs(re_depth) + mark[node[0], node[1]] = 0 + edge_mesh, mesh, mark, info_on_pix = recursive_add_edge(edge_mesh, mesh, info_on_pix, renode, mark) + mxs, mys = np.where(mark == 3) + conn_2_nodes = [(x[0], x[1], info_on_pix[(x[0], x[1])][0]['depth']) for x in zip(mxs, mys) \ + if mesh.has_node((x[0], x[1], info_on_pix[(x[0], x[1])][0]['depth'])) and \ + mesh.degree((x[0], x[1], info_on_pix[(x[0], x[1])][0]['depth'])) == 2] + sub_mesh = mesh.subgraph(conn_2_nodes).copy() + ccs = netx.connected_components(sub_mesh) + for cc in ccs: + candidate_nodes = [xx for xx in cc if sub_mesh.degree(xx) == 1] + for node in candidate_nodes: + if mesh.has_node(node) is False: + continue + ne_node = [xx for xx in mesh.neighbors(node) if xx not in cc][0] + hx, hy = node[0], node[1] + eight_nes = set([(x, y, info_on_pix[(x, y)][0]['depth']) for x, y in [(hx + 1, hy), (hx - 1, hy), (hx, hy + 1), (hx, hy - 1), + (hx + 1, hy + 1), (hx - 1, hy - 1), (hx - 1, hy + 1), (hx + 1, hy - 1)] \ + if info_on_pix.get((x, y)) is not None and (x, y, info_on_pix[(x, y)][0]['depth']) not in cc]) + ne_sub_mesh = mesh.subgraph(eight_nes).copy() + ne_ccs = netx.connected_components(ne_sub_mesh) + try: + ne_cc = [ne_cc for ne_cc in ne_ccs if ne_node in ne_cc][0] + except: + import pdb; pdb.set_trace() + largest_cc = [xx for xx in ne_cc if abs(xx[0] - node[0]) + abs(xx[1] - node[1]) == 1] + mesh.remove_edges_from([(xx, node) for xx in mesh.neighbors(node)]) + re_depth = {'value' : 0, 'count': 0} + for cc_node in largest_cc: + re_depth['value'] += cc_node[2] + re_depth['count'] += 1. + mesh.add_edge(cc_node, node) + try: + re_depth = re_depth['value'] / re_depth['count'] + except: + re_depth = node[2] + renode = (node[0], node[1], re_depth) + mapping_dict = {node: renode} + info_on_pix, mesh, edge_mesh = update_info(mapping_dict, info_on_pix, mesh, edge_mesh) + depth[node[0], node[1]] = abs(re_depth) + mark[node[0], node[1]] = 0 + edge_mesh, mesh, mark, info_on_pix = recursive_add_edge(edge_mesh, mesh, info_on_pix, renode, mark) + break + if len(cc) == 1: + node = [node for node in cc][0] + hx, hy = node[0], node[1] + nine_nes = set([(x, y, info_on_pix[(x, y)][0]['depth']) for x, y in [(hx, hy), (hx + 1, hy), (hx - 1, hy), (hx, hy + 1), (hx, hy - 1), + (hx + 1, hy + 1), (hx - 1, hy - 1), (hx - 1, hy + 1), (hx + 1, hy - 1)] \ + if info_on_pix.get((x, y)) is not None and mesh.has_node((x, y, info_on_pix[(x, y)][0]['depth']))]) + ne_sub_mesh = mesh.subgraph(nine_nes).copy() + ne_ccs = netx.connected_components(ne_sub_mesh) + for ne_cc in ne_ccs: + if node in ne_cc: + re_depth = {'value' : 0, 'count': 0} + for ne in ne_cc: + if abs(ne[0] - node[0]) + abs(ne[1] - node[1]) == 1: + mesh.add_edge(node, ne) + re_depth['value'] += ne[2] + re_depth['count'] += 1. + re_depth = re_depth['value'] / re_depth['count'] + mapping_dict = {node: (node[0], node[1], re_depth)} + info_on_pix, mesh, edge_mesh = update_info(mapping_dict, info_on_pix, mesh, edge_mesh) + depth[node[0], node[1]] = abs(re_depth) + mark[node[0], node[1]] = 0 + + + return mesh, info_on_pix, edge_mesh, depth, mark + +def context_and_holes(mesh, edge_ccs, config, specific_edge_id, specific_edge_loc, depth_feat_model, + connect_points_ccs=None, inpaint_iter=0, filter_edge=False, vis_edge_id=None): + edge_maps = np.zeros((mesh.graph['H'], mesh.graph['W'])) - 1 + mask_info = {} + for edge_id, edge_cc in enumerate(edge_ccs): + for edge_node in edge_cc: + edge_maps[edge_node[0], edge_node[1]] = edge_id + + context_ccs = [set() for x in range(len(edge_ccs))] + extend_context_ccs = [set() for x in range(len(edge_ccs))] + extend_erode_context_ccs = [set() for x in range(len(edge_ccs))] + extend_edge_ccs = [set() for x in range(len(edge_ccs))] + accomp_extend_context_ccs = [set() for x in range(len(edge_ccs))] + erode_context_ccs = [set() for x in range(len(edge_ccs))] + broken_mask_ccs = [set() for x in range(len(edge_ccs))] + invalid_extend_edge_ccs = [set() for x in range(len(edge_ccs))] + intouched_ccs = [set() for x in range(len(edge_ccs))] + redundant_ccs = [set() for x in range(len(edge_ccs))] + if inpaint_iter == 0: + background_thickness = config['background_thickness'] + context_thickness = config['context_thickness'] + else: + background_thickness = config['background_thickness_2'] + context_thickness = config['context_thickness_2'] + + mesh_nodes = mesh.nodes + for edge_id, edge_cc in enumerate(edge_ccs): + if context_thickness == 0 or (len(specific_edge_id) > 0 and edge_id not in specific_edge_id): + continue + edge_group = {} + for edge_node in edge_cc: + far_nodes = mesh_nodes[edge_node].get('far') + if far_nodes is None: + continue + for far_node in far_nodes: + if far_node in edge_cc: + continue + context_ccs[edge_id].add(far_node) + if mesh_nodes[far_node].get('edge_id') is not None: + if edge_group.get(mesh_nodes[far_node]['edge_id']) is None: + edge_group[mesh_nodes[far_node]['edge_id']] = set() + edge_group[mesh_nodes[far_node]['edge_id']].add(far_node) + if len(edge_cc) > 2: + for edge_key in [*edge_group.keys()]: + if len(edge_group[edge_key]) == 1: + context_ccs[edge_id].remove([*edge_group[edge_key]][0]) + for edge_id, edge_cc in enumerate(edge_ccs): + if inpaint_iter != 0: + continue + tmp_intouched_nodes = set() + for edge_node in edge_cc: + raw_intouched_nodes = set(mesh_nodes[edge_node].get('near')) if mesh_nodes[edge_node].get('near') is not None else set() + tmp_intouched_nodes |= set([xx for xx in raw_intouched_nodes if mesh_nodes[xx].get('edge_id') is not None and \ + len(context_ccs[mesh_nodes[xx].get('edge_id')]) > 0]) + intouched_ccs[edge_id] |= tmp_intouched_nodes + tmp_intouched_nodes = None + mask_ccs = copy.deepcopy(edge_ccs) + forbidden_len = 3 + forbidden_map = np.ones((mesh.graph['H'] - forbidden_len, mesh.graph['W'] - forbidden_len)) + forbidden_map = np.pad(forbidden_map, ((forbidden_len, forbidden_len), (forbidden_len, forbidden_len)), mode='constant').astype(bool) + cur_tmp_mask_map = np.zeros_like(forbidden_map).astype(bool) + #passive_background = 10 if 10 is not None else background_thickness + #passive_context = 1 if 1 is not None else context_thickness + passive_background = 10 #if 10 is not None else background_thickness + passive_context = 1 #if 1 is not None else context_thickness + + for edge_id, edge_cc in enumerate(edge_ccs): + cur_mask_cc = None; cur_mask_cc = [] + cur_context_cc = None; cur_context_cc = [] + cur_accomp_near_cc = None; cur_accomp_near_cc = [] + cur_invalid_extend_edge_cc = None; cur_invalid_extend_edge_cc = [] + cur_comp_far_cc = None; cur_comp_far_cc = [] + tmp_erode = [] + if len(context_ccs[edge_id]) == 0 or (len(specific_edge_id) > 0 and edge_id not in specific_edge_id): + continue + for i in range(max(background_thickness, context_thickness)): + cur_tmp_mask_map.fill(False) + if i == 0: + tmp_mask_nodes = copy.deepcopy(mask_ccs[edge_id]) + tmp_intersect_nodes = [] + tmp_intersect_context_nodes = [] + mask_map = np.zeros((mesh.graph['H'], mesh.graph['W']), dtype=bool) + context_depth = np.zeros((mesh.graph['H'], mesh.graph['W'])) + comp_cnt_depth = np.zeros((mesh.graph['H'], mesh.graph['W'])) + connect_map = np.zeros((mesh.graph['H'], mesh.graph['W'])) + for node in tmp_mask_nodes: + mask_map[node[0], node[1]] = True + depth_count = 0 + if mesh_nodes[node].get('far') is not None: + for comp_cnt_node in mesh_nodes[node]['far']: + comp_cnt_depth[node[0], node[1]] += abs(comp_cnt_node[2]) + depth_count += 1 + if depth_count > 0: + comp_cnt_depth[node[0], node[1]] = comp_cnt_depth[node[0], node[1]] / depth_count + connect_node = [] + if mesh_nodes[node].get('connect_point_id') is not None: + connect_node.append(mesh_nodes[node]['connect_point_id']) + connect_point_id = np.bincount(connect_node).argmax() if len(connect_node) > 0 else -1 + if connect_point_id > -1 and connect_points_ccs is not None: + for xx in connect_points_ccs[connect_point_id]: + if connect_map[xx[0], xx[1]] == 0: + connect_map[xx[0], xx[1]] = xx[2] + if mesh_nodes[node].get('connect_point_exception') is not None: + for xx in mesh_nodes[node]['connect_point_exception']: + if connect_map[xx[0], xx[1]] == 0: + connect_map[xx[0], xx[1]] = xx[2] + tmp_context_nodes = [*context_ccs[edge_id]] + tmp_erode.append([*context_ccs[edge_id]]) + context_map = np.zeros((mesh.graph['H'], mesh.graph['W']), dtype=bool) + if (context_map.astype(np.uint8) * mask_map.astype(np.uint8)).max() > 0: + import pdb; pdb.set_trace() + for node in tmp_context_nodes: + context_map[node[0], node[1]] = True + context_depth[node[0], node[1]] = node[2] + context_map[mask_map == True] = False + if (context_map.astype(np.uint8) * mask_map.astype(np.uint8)).max() > 0: + import pdb; pdb.set_trace() + tmp_intouched_nodes = [*intouched_ccs[edge_id]] + intouched_map = np.zeros((mesh.graph['H'], mesh.graph['W']), dtype=bool) + for node in tmp_intouched_nodes: intouched_map[node[0], node[1]] = True + intouched_map[mask_map == True] = False + tmp_redundant_nodes = set() + tmp_noncont_nodes = set() + noncont_map = np.zeros((mesh.graph['H'], mesh.graph['W']), dtype=bool) + intersect_map = np.zeros((mesh.graph['H'], mesh.graph['W']), dtype=bool) + intersect_context_map = np.zeros((mesh.graph['H'], mesh.graph['W']), dtype=bool) + if i > passive_background and inpaint_iter == 0: + new_tmp_intersect_nodes = None + new_tmp_intersect_nodes = [] + for node in tmp_intersect_nodes: + nes = mesh.neighbors(node) + for ne in nes: + if bool(context_map[ne[0], ne[1]]) is False and \ + bool(mask_map[ne[0], ne[1]]) is False and \ + bool(forbidden_map[ne[0], ne[1]]) is True and \ + bool(intouched_map[ne[0], ne[1]]) is False and\ + bool(intersect_map[ne[0], ne[1]]) is False and\ + bool(intersect_context_map[ne[0], ne[1]]) is False: + break_flag = False + if (i - passive_background) % 2 == 0 and (i - passive_background) % 8 != 0: + four_nes = [xx for xx in[[ne[0] - 1, ne[1]], [ne[0] + 1, ne[1]], [ne[0], ne[1] - 1], [ne[0], ne[1] + 1]] \ + if 0 <= xx[0] < mesh.graph['H'] and 0 <= xx[1] < mesh.graph['W']] + for fne in four_nes: + if bool(mask_map[fne[0], fne[1]]) is True: + break_flag = True + break + if break_flag is True: + continue + intersect_map[ne[0], ne[1]] = True + new_tmp_intersect_nodes.append(ne) + tmp_intersect_nodes = None + tmp_intersect_nodes = new_tmp_intersect_nodes + + if i > passive_context and inpaint_iter == 1: + new_tmp_intersect_context_nodes = None + new_tmp_intersect_context_nodes = [] + for node in tmp_intersect_context_nodes: + nes = mesh.neighbors(node) + for ne in nes: + if bool(context_map[ne[0], ne[1]]) is False and \ + bool(mask_map[ne[0], ne[1]]) is False and \ + bool(forbidden_map[ne[0], ne[1]]) is True and \ + bool(intouched_map[ne[0], ne[1]]) is False and\ + bool(intersect_map[ne[0], ne[1]]) is False and \ + bool(intersect_context_map[ne[0], ne[1]]) is False: + intersect_context_map[ne[0], ne[1]] = True + new_tmp_intersect_context_nodes.append(ne) + tmp_intersect_context_nodes = None + tmp_intersect_context_nodes = new_tmp_intersect_context_nodes + + new_tmp_mask_nodes = None + new_tmp_mask_nodes = [] + for node in tmp_mask_nodes: + four_nes = {xx:[] for xx in [(node[0] - 1, node[1]), (node[0] + 1, node[1]), (node[0], node[1] - 1), (node[0], node[1] + 1)] if \ + 0 <= xx[0] < connect_map.shape[0] and 0 <= xx[1] < connect_map.shape[1]} + if inpaint_iter > 0: + for ne in four_nes.keys(): + if connect_map[ne[0], ne[1]] == True: + tmp_context_nodes.append((ne[0], ne[1], connect_map[ne[0], ne[1]])) + context_map[ne[0], ne[1]] = True + nes = mesh.neighbors(node) + if inpaint_iter > 0: + for ne in nes: four_nes[(ne[0], ne[1])].append(ne[2]) + nes = [] + for kfne, vfnes in four_nes.items(): vfnes.sort(key = lambda xx: abs(xx), reverse=True) + for kfne, vfnes in four_nes.items(): + for vfne in vfnes: nes.append((kfne[0], kfne[1], vfne)) + for ne in nes: + if bool(context_map[ne[0], ne[1]]) is False and \ + bool(mask_map[ne[0], ne[1]]) is False and \ + bool(forbidden_map[ne[0], ne[1]]) is True and \ + bool(intouched_map[ne[0], ne[1]]) is False and \ + bool(intersect_map[ne[0], ne[1]]) is False and \ + bool(intersect_context_map[ne[0], ne[1]]) is False: + if i == passive_background and inpaint_iter == 0: + if np.any(context_map[max(ne[0] - 1, 0):min(ne[0] + 2, mesh.graph['H']), max(ne[1] - 1, 0):min(ne[1] + 2, mesh.graph['W'])]) == True: + intersect_map[ne[0], ne[1]] = True + tmp_intersect_nodes.append(ne) + continue + if i < background_thickness: + if inpaint_iter == 0: + cur_mask_cc.append(ne) + elif mesh_nodes[ne].get('inpaint_id') == 1: + cur_mask_cc.append(ne) + else: + continue + mask_ccs[edge_id].add(ne) + if inpaint_iter == 0: + if comp_cnt_depth[node[0], node[1]] > 0 and comp_cnt_depth[ne[0], ne[1]] == 0: + comp_cnt_depth[ne[0], ne[1]] = comp_cnt_depth[node[0], node[1]] + if mesh_nodes[ne].get('far') is not None: + for comp_far_node in mesh_nodes[ne]['far']: + cur_comp_far_cc.append(comp_far_node) + cur_accomp_near_cc.append(ne) + cur_invalid_extend_edge_cc.append(comp_far_node) + if mesh_nodes[ne].get('edge_id') is not None and \ + len(context_ccs[mesh_nodes[ne].get('edge_id')]) > 0: + intouched_fars = set(mesh_nodes[ne].get('far')) if mesh_nodes[ne].get('far') is not None else set() + accum_intouched_fars = set(intouched_fars) + for intouched_far in intouched_fars: + accum_intouched_fars |= set([*mesh.neighbors(intouched_far)]) + for intouched_far in accum_intouched_fars: + if bool(mask_map[intouched_far[0], intouched_far[1]]) is True or \ + bool(context_map[intouched_far[0], intouched_far[1]]) is True: + continue + tmp_redundant_nodes.add(intouched_far) + intouched_map[intouched_far[0], intouched_far[1]] = True + if mesh_nodes[ne].get('near') is not None: + intouched_nears = set(mesh_nodes[ne].get('near')) + for intouched_near in intouched_nears: + if bool(mask_map[intouched_near[0], intouched_near[1]]) is True or \ + bool(context_map[intouched_near[0], intouched_near[1]]) is True: + continue + tmp_redundant_nodes.add(intouched_near) + intouched_map[intouched_near[0], intouched_near[1]] = True + if not (mesh_nodes[ne].get('inpaint_id') != 1 and inpaint_iter == 1): + new_tmp_mask_nodes.append(ne) + mask_map[ne[0], ne[1]] = True + tmp_mask_nodes = new_tmp_mask_nodes + + new_tmp_context_nodes = None + new_tmp_context_nodes = [] + for node in tmp_context_nodes: + nes = mesh.neighbors(node) + if inpaint_iter > 0: + four_nes = {(node[0] - 1, node[1]):[], (node[0] + 1, node[1]):[], (node[0], node[1] - 1):[], (node[0], node[1] + 1):[]} + for ne in nes: four_nes[(ne[0], ne[1])].append(ne[2]) + nes = [] + for kfne, vfnes in four_nes.items(): vfnes.sort(key = lambda xx: abs(xx), reverse=True) + for kfne, vfnes in four_nes.items(): + for vfne in vfnes: nes.append((kfne[0], kfne[1], vfne)) + for ne in nes: + mask_flag = (bool(mask_map[ne[0], ne[1]]) is False) + if bool(context_map[ne[0], ne[1]]) is False and mask_flag and \ + bool(forbidden_map[ne[0], ne[1]]) is True and bool(noncont_map[ne[0], ne[1]]) is False and \ + bool(intersect_context_map[ne[0], ne[1]]) is False: + if i == passive_context and inpaint_iter == 1: + mnes = mesh.neighbors(ne) + if any([mask_map[mne[0], mne[1]] == True for mne in mnes]) is True: + intersect_context_map[ne[0], ne[1]] = True + tmp_intersect_context_nodes.append(ne) + continue + if False and mesh_nodes[ne].get('near') is not None and mesh_nodes[ne].get('edge_id') != edge_id: + noncont_nears = set(mesh_nodes[ne].get('near')) + for noncont_near in noncont_nears: + if bool(context_map[noncont_near[0], noncont_near[1]]) is False: + tmp_noncont_nodes.add(noncont_near) + noncont_map[noncont_near[0], noncont_near[1]] = True + new_tmp_context_nodes.append(ne) + context_map[ne[0], ne[1]] = True + context_depth[ne[0], ne[1]] = ne[2] + cur_context_cc.extend(new_tmp_context_nodes) + tmp_erode.append(new_tmp_context_nodes) + tmp_context_nodes = None + tmp_context_nodes = new_tmp_context_nodes + new_tmp_intouched_nodes = None; new_tmp_intouched_nodes = [] + + for node in tmp_intouched_nodes: + if bool(context_map[node[0], node[1]]) is True or bool(mask_map[node[0], node[1]]) is True: + continue + nes = mesh.neighbors(node) + + for ne in nes: + if bool(context_map[ne[0], ne[1]]) is False and \ + bool(mask_map[ne[0], ne[1]]) is False and \ + bool(intouched_map[ne[0], ne[1]]) is False and \ + bool(forbidden_map[ne[0], ne[1]]) is True: + new_tmp_intouched_nodes.append(ne) + intouched_map[ne[0], ne[1]] = True + tmp_intouched_nodes = None + tmp_intouched_nodes = set(new_tmp_intouched_nodes) + new_tmp_redundant_nodes = None; new_tmp_redundant_nodes = [] + for node in tmp_redundant_nodes: + if bool(context_map[node[0], node[1]]) is True or \ + bool(mask_map[node[0], node[1]]) is True: + continue + nes = mesh.neighbors(node) + + for ne in nes: + if bool(context_map[ne[0], ne[1]]) is False and \ + bool(mask_map[ne[0], ne[1]]) is False and \ + bool(intouched_map[ne[0], ne[1]]) is False and \ + bool(forbidden_map[ne[0], ne[1]]) is True: + new_tmp_redundant_nodes.append(ne) + intouched_map[ne[0], ne[1]] = True + tmp_redundant_nodes = None + tmp_redundant_nodes = set(new_tmp_redundant_nodes) + new_tmp_noncont_nodes = None; new_tmp_noncont_nodes = [] + for node in tmp_noncont_nodes: + if bool(context_map[node[0], node[1]]) is True or \ + bool(mask_map[node[0], node[1]]) is True: + continue + nes = mesh.neighbors(node) + rmv_flag = False + for ne in nes: + if bool(context_map[ne[0], ne[1]]) is False and \ + bool(mask_map[ne[0], ne[1]]) is False and \ + bool(noncont_map[ne[0], ne[1]]) is False and \ + bool(forbidden_map[ne[0], ne[1]]) is True: + patch_context_map = context_map[max(ne[0] - 1, 0):min(ne[0] + 2, context_map.shape[0]), + max(ne[1] - 1, 0):min(ne[1] + 2, context_map.shape[1])] + if bool(np.any(patch_context_map)) is True: + new_tmp_noncont_nodes.append(ne) + noncont_map[ne[0], ne[1]] = True + tmp_noncont_nodes = None + tmp_noncont_nodes = set(new_tmp_noncont_nodes) + if inpaint_iter == 0: + depth_dict = get_depth_from_maps(context_map, mask_map, context_depth, mesh.graph['H'], mesh.graph['W'], log_depth=config['log_depth']) + mask_size = get_valid_size(depth_dict['mask']) + mask_size = dilate_valid_size(mask_size, depth_dict['mask'], dilate=[20, 20]) + context_size = get_valid_size(depth_dict['context']) + context_size = dilate_valid_size(context_size, depth_dict['context'], dilate=[20, 20]) + union_size = size_operation(mask_size, context_size, operation='+') + depth_dict = depth_inpainting(None, None, None, None, mesh, config, union_size, depth_feat_model, None, given_depth_dict=depth_dict, spdb=False) + near_depth_map, raw_near_depth_map = np.zeros((mesh.graph['H'], mesh.graph['W'])), np.zeros((mesh.graph['H'], mesh.graph['W'])) + filtered_comp_far_cc, filtered_accomp_near_cc = set(), set() + for node in cur_accomp_near_cc: + near_depth_map[node[0], node[1]] = depth_dict['output'][node[0], node[1]] + raw_near_depth_map[node[0], node[1]] = node[2] + for node in cur_comp_far_cc: + four_nes = [xx for xx in [(node[0] - 1, node[1]), (node[0] + 1, node[1]), (node[0], node[1] - 1), (node[0], node[1] + 1)] \ + if 0 <= xx[0] < mesh.graph['H'] and 0 <= xx[1] < mesh.graph['W'] and \ + near_depth_map[xx[0], xx[1]] != 0 and \ + abs(near_depth_map[xx[0], xx[1]]) < abs(node[2])] + if len(four_nes) > 0: + filtered_comp_far_cc.add(node) + for ne in four_nes: + filtered_accomp_near_cc.add((ne[0], ne[1], -abs(raw_near_depth_map[ne[0], ne[1]]))) + cur_comp_far_cc, cur_accomp_near_cc = filtered_comp_far_cc, filtered_accomp_near_cc + mask_ccs[edge_id] |= set(cur_mask_cc) + context_ccs[edge_id] |= set(cur_context_cc) + accomp_extend_context_ccs[edge_id] |= set(cur_accomp_near_cc).intersection(cur_mask_cc) + extend_edge_ccs[edge_id] |= set(cur_accomp_near_cc).intersection(cur_mask_cc) + extend_context_ccs[edge_id] |= set(cur_comp_far_cc) + invalid_extend_edge_ccs[edge_id] |= set(cur_invalid_extend_edge_cc) + erode_size = [0] + for tmp in tmp_erode: + erode_size.append(len(tmp)) + if len(erode_size) > 1: + erode_size[-1] += erode_size[-2] + if inpaint_iter == 0: + tmp_width = config['depth_edge_dilate'] + else: + tmp_width = 0 + while float(erode_size[tmp_width]) / (erode_size[-1] + 1e-6) > 0.3: + tmp_width = tmp_width - 1 + try: + if tmp_width == 0: + erode_context_ccs[edge_id] = set([]) + else: + erode_context_ccs[edge_id] = set(reduce(lambda x, y : x + y, [] + tmp_erode[:tmp_width])) + except: + import pdb; pdb.set_trace() + erode_context_cc = copy.deepcopy(erode_context_ccs[edge_id]) + for erode_context_node in erode_context_cc: + if (inpaint_iter != 0 and (mesh_nodes[erode_context_node].get('inpaint_id') is None or + mesh_nodes[erode_context_node].get('inpaint_id') == 0)): + erode_context_ccs[edge_id].remove(erode_context_node) + else: + context_ccs[edge_id].remove(erode_context_node) + context_map = np.zeros((mesh.graph['H'], mesh.graph['W'])) + for context_node in context_ccs[edge_id]: + context_map[context_node[0], context_node[1]] = 1 + extend_context_ccs[edge_id] = extend_context_ccs[edge_id] - mask_ccs[edge_id] - accomp_extend_context_ccs[edge_id] + if inpaint_iter == 0: + all_ecnt_cc = set() + for ecnt_id, ecnt_cc in enumerate(extend_context_ccs): + constraint_context_ids = set() + constraint_context_cc = set() + constraint_erode_context_cc = set() + tmp_mask_cc = set() + accum_context_cc = None; accum_context_cc = [] + for ecnt_node in accomp_extend_context_ccs[ecnt_id]: + if edge_maps[ecnt_node[0], ecnt_node[1]] > -1: + constraint_context_ids.add(int(round(edge_maps[ecnt_node[0], ecnt_node[1]]))) + constraint_erode_context_cc = erode_context_ccs[ecnt_id] + for constraint_context_id in constraint_context_ids: + constraint_context_cc = constraint_context_cc | context_ccs[constraint_context_id] | erode_context_ccs[constraint_context_id] + constraint_erode_context_cc = constraint_erode_context_cc | erode_context_ccs[constraint_context_id] + for i in range(background_thickness): + if i == 0: + tmp_context_nodes = copy.deepcopy(ecnt_cc) + tmp_invalid_context_nodes = copy.deepcopy(invalid_extend_edge_ccs[ecnt_id]) + tmp_mask_nodes = copy.deepcopy(accomp_extend_context_ccs[ecnt_id]) + tmp_context_map = np.zeros((mesh.graph['H'], mesh.graph['W'])).astype(bool) + tmp_mask_map = np.zeros((mesh.graph['H'], mesh.graph['W'])).astype(bool) + tmp_invalid_context_map = np.zeros((mesh.graph['H'], mesh.graph['W'])).astype(bool) + for node in tmp_mask_nodes: + tmp_mask_map[node[0], node[1]] = True + for node in context_ccs[ecnt_id]: + tmp_context_map[node[0], node[1]] = True + for node in erode_context_ccs[ecnt_id]: + tmp_context_map[node[0], node[1]] = True + for node in extend_context_ccs[ecnt_id]: + tmp_context_map[node[0], node[1]] = True + for node in invalid_extend_edge_ccs[ecnt_id]: + tmp_invalid_context_map[node[0], node[1]] = True + init_invalid_context_map = tmp_invalid_context_map.copy() + init_context_map = tmp + if (tmp_mask_map.astype(np.uint8) * tmp_context_map.astype(np.uint8)).max() > 0: + import pdb; pdb.set_trace() + if vis_edge_id is not None and ecnt_id == vis_edge_id: + f, ((ax1, ax2)) = plt.subplots(1, 2, sharex=True, sharey=True) + ax1.imshow(tmp_context_map * 1); ax2.imshow(init_invalid_context_map * 1 + tmp_context_map * 2) + plt.show() + import pdb; pdb.set_trace() + else: + tmp_context_nodes = new_tmp_context_nodes + new_tmp_context_nodes = None + tmp_mask_nodes = new_tmp_mask_nodes + new_tmp_mask_nodes = None + tmp_invalid_context_nodes = new_tmp_invalid_context_nodes + new_tmp_invalid_context_nodes = None + new_tmp_context_nodes = None + new_tmp_context_nodes = [] + new_tmp_invalid_context_nodes = None + new_tmp_invalid_context_nodes = [] + new_tmp_mask_nodes = set([]) + for node in tmp_context_nodes: + for ne in mesh.neighbors(node): + if ne in constraint_context_cc and \ + bool(tmp_mask_map[ne[0], ne[1]]) is False and \ + bool(tmp_context_map[ne[0], ne[1]]) is False and \ + bool(forbidden_map[ne[0], ne[1]]) is True: + new_tmp_context_nodes.append(ne) + tmp_context_map[ne[0], ne[1]] = True + accum_context_cc.extend(new_tmp_context_nodes) + for node in tmp_invalid_context_nodes: + for ne in mesh.neighbors(node): + if bool(tmp_mask_map[ne[0], ne[1]]) is False and \ + bool(tmp_context_map[ne[0], ne[1]]) is False and \ + bool(tmp_invalid_context_map[ne[0], ne[1]]) is False and \ + bool(forbidden_map[ne[0], ne[1]]) is True: + tmp_invalid_context_map[ne[0], ne[1]] = True + new_tmp_invalid_context_nodes.append(ne) + for node in tmp_mask_nodes: + for ne in mesh.neighbors(node): + if bool(tmp_mask_map[ne[0], ne[1]]) is False and \ + bool(tmp_context_map[ne[0], ne[1]]) is False and \ + bool(tmp_invalid_context_map[ne[0], ne[1]]) is False and \ + bool(forbidden_map[ne[0], ne[1]]) is True: + new_tmp_mask_nodes.add(ne) + tmp_mask_map[ne[0], ne[1]] = True + init_invalid_context_map[tmp_context_map] = False + _, tmp_label_map = cv2.connectedComponents((init_invalid_context_map | tmp_context_map).astype(np.uint8), connectivity=8) + tmp_label_ids = set(np.unique(tmp_label_map[init_invalid_context_map])) + if (tmp_mask_map.astype(np.uint8) * tmp_context_map.astype(np.uint8)).max() > 0: + import pdb; pdb.set_trace() + if vis_edge_id is not None and ecnt_id == vis_edge_id: + f, ((ax1, ax2)) = plt.subplots(1, 2, sharex=True, sharey=True) + ax1.imshow(tmp_label_map); ax2.imshow(init_invalid_context_map * 1 + tmp_context_map * 2) + plt.show() + import pdb; pdb.set_trace() + extend_context_ccs[ecnt_id] |= set(accum_context_cc) + extend_context_ccs[ecnt_id] = extend_context_ccs[ecnt_id] - mask_ccs[ecnt_id] + extend_erode_context_ccs[ecnt_id] = extend_context_ccs[ecnt_id] & constraint_erode_context_cc + extend_context_ccs[ecnt_id] = extend_context_ccs[ecnt_id] - extend_erode_context_ccs[ecnt_id] - erode_context_ccs[ecnt_id] + tmp_context_cc = context_ccs[ecnt_id] - extend_erode_context_ccs[ecnt_id] - erode_context_ccs[ecnt_id] + if len(tmp_context_cc) > 0: + context_ccs[ecnt_id] = tmp_context_cc + tmp_mask_cc = tmp_mask_cc - context_ccs[ecnt_id] - erode_context_ccs[ecnt_id] + mask_ccs[ecnt_id] = mask_ccs[ecnt_id] | tmp_mask_cc + + return context_ccs, mask_ccs, broken_mask_ccs, edge_ccs, erode_context_ccs, invalid_extend_edge_ccs, edge_maps, extend_context_ccs, extend_edge_ccs, extend_erode_context_ccs + +def DL_inpaint_edge(mesh, + info_on_pix, + config, + image, + depth, + context_ccs, + erode_context_ccs, + extend_context_ccs, + extend_erode_context_ccs, + mask_ccs, + broken_mask_ccs, + edge_ccs, + extend_edge_ccs, + init_mask_connect, + edge_maps, + rgb_model=None, + depth_edge_model=None, + depth_edge_model_init=None, + depth_feat_model=None, + specific_edge_id=-1, + specific_edge_loc=None, + inpaint_iter=0): + + if isinstance(config["gpu_ids"], int) and (config["gpu_ids"] >= 0): + device = config["gpu_ids"] + else: + device = "cpu" + + edge_map = np.zeros_like(depth) + new_edge_ccs = [set() for _ in range(len(edge_ccs))] + edge_maps_with_id = edge_maps + edge_condition = lambda x, m: m.nodes[x].get('far') is not None and len(m.nodes[x].get('far')) > 0 + edge_map = get_map_from_ccs(edge_ccs, mesh.graph['H'], mesh.graph['W'], mesh, edge_condition) + np_depth, np_image = depth.copy(), image.copy() + image_c = image.shape[-1] + image = torch.FloatTensor(image.transpose(2, 0, 1)).unsqueeze(0).to(device) + if depth.ndim < 3: + depth = depth[..., None] + depth = torch.FloatTensor(depth.transpose(2, 0, 1)).unsqueeze(0).to(device) + mesh.graph['max_edge_id'] = len(edge_ccs) + connnect_points_ccs = [set() for _ in range(len(edge_ccs))] + gp_time, tmp_mesh_time, bilateral_time = 0, 0, 0 + edges_infos = dict() + edges_in_mask = [set() for _ in range(len(edge_ccs))] + tmp_specific_edge_id = [] + for edge_id, (context_cc, mask_cc, erode_context_cc, extend_context_cc, edge_cc) in enumerate(zip(context_ccs, mask_ccs, erode_context_ccs, extend_context_ccs, edge_ccs)): + if len(specific_edge_id) > 0: + if edge_id not in specific_edge_id: + continue + if len(context_cc) < 1 or len(mask_cc) < 1: + continue + edge_dict = get_edge_from_nodes(context_cc | extend_context_cc, erode_context_cc | extend_erode_context_ccs[edge_id], mask_cc, edge_cc, extend_edge_ccs[edge_id], + mesh.graph['H'], mesh.graph['W'], mesh) + edge_dict['edge'], end_depth_maps, _ = \ + filter_irrelevant_edge_new(edge_dict['self_edge'], edge_dict['comp_edge'], + edge_map, + edge_maps_with_id, + edge_id, + edge_dict['context'], + edge_dict['depth'], mesh, context_cc | erode_context_cc | extend_context_cc | extend_erode_context_ccs[edge_id], spdb=False) + if specific_edge_loc is not None and \ + (specific_edge_loc is not None and edge_dict['mask'][specific_edge_loc[0], specific_edge_loc[1]] == 0): + continue + mask_size = get_valid_size(edge_dict['mask']) + mask_size = dilate_valid_size(mask_size, edge_dict['mask'], dilate=[20, 20]) + context_size = get_valid_size(edge_dict['context']) + context_size = dilate_valid_size(context_size, edge_dict['context'], dilate=[20, 20]) + union_size = size_operation(mask_size, context_size, operation='+') + patch_edge_dict = dict() + patch_edge_dict['mask'], patch_edge_dict['context'], patch_edge_dict['rgb'], \ + patch_edge_dict['disp'], patch_edge_dict['edge'] = \ + crop_maps_by_size(union_size, edge_dict['mask'], edge_dict['context'], + edge_dict['rgb'], edge_dict['disp'], edge_dict['edge']) + x_anchor, y_anchor = [union_size['x_min'], union_size['x_max']], [union_size['y_min'], union_size['y_max']] + tensor_edge_dict = convert2tensor(patch_edge_dict) + input_edge_feat = torch.cat((tensor_edge_dict['rgb'], + tensor_edge_dict['disp'], + tensor_edge_dict['edge'], + 1 - tensor_edge_dict['context'], + tensor_edge_dict['mask']), dim=1) + if require_depth_edge(patch_edge_dict['edge'], patch_edge_dict['mask']) and inpaint_iter == 0: + with torch.no_grad(): + depth_edge_output = depth_edge_model.forward_3P(tensor_edge_dict['mask'], + tensor_edge_dict['context'], + tensor_edge_dict['rgb'], + tensor_edge_dict['disp'], + tensor_edge_dict['edge'], + unit_length=128, + cuda=device) + depth_edge_output = depth_edge_output.cpu() + tensor_edge_dict['output'] = (depth_edge_output> config['ext_edge_threshold']).float() * tensor_edge_dict['mask'] + tensor_edge_dict['edge'] + else: + tensor_edge_dict['output'] = tensor_edge_dict['edge'] + depth_edge_output = tensor_edge_dict['edge'] + 0 + patch_edge_dict['output'] = tensor_edge_dict['output'].squeeze().data.cpu().numpy() + edge_dict['output'] = np.zeros((mesh.graph['H'], mesh.graph['W'])) + edge_dict['output'][union_size['x_min']:union_size['x_max'], union_size['y_min']:union_size['y_max']] = \ + patch_edge_dict['output'] + if require_depth_edge(patch_edge_dict['edge'], patch_edge_dict['mask']) and inpaint_iter == 0: + if ((depth_edge_output> config['ext_edge_threshold']).float() * tensor_edge_dict['mask']).max() > 0: + try: + edge_dict['fpath_map'], edge_dict['npath_map'], break_flag, npaths, fpaths, invalid_edge_id = \ + clean_far_edge_new(edge_dict['output'], end_depth_maps, edge_dict['mask'], edge_dict['context'], mesh, info_on_pix, edge_dict['self_edge'], inpaint_iter, config) + except: + import pdb; pdb.set_trace() + pre_npath_map = edge_dict['npath_map'].copy() + if config.get('repeat_inpaint_edge') is True: + for _ in range(2): + tmp_input_edge = ((edge_dict['npath_map'] > -1) + edge_dict['edge']).clip(0, 1) + patch_tmp_input_edge = crop_maps_by_size(union_size, tmp_input_edge)[0] + tensor_input_edge = torch.FloatTensor(patch_tmp_input_edge)[None, None, ...] + depth_edge_output = depth_edge_model.forward_3P(tensor_edge_dict['mask'], + tensor_edge_dict['context'], + tensor_edge_dict['rgb'], + tensor_edge_dict['disp'], + tensor_input_edge, + unit_length=128, + cuda=device) + depth_edge_output = depth_edge_output.cpu() + depth_edge_output = (depth_edge_output> config['ext_edge_threshold']).float() * tensor_edge_dict['mask'] + tensor_edge_dict['edge'] + depth_edge_output = depth_edge_output.squeeze().data.cpu().numpy() + full_depth_edge_output = np.zeros((mesh.graph['H'], mesh.graph['W'])) + full_depth_edge_output[union_size['x_min']:union_size['x_max'], union_size['y_min']:union_size['y_max']] = \ + depth_edge_output + edge_dict['fpath_map'], edge_dict['npath_map'], break_flag, npaths, fpaths, invalid_edge_id = \ + clean_far_edge_new(full_depth_edge_output, end_depth_maps, edge_dict['mask'], edge_dict['context'], mesh, info_on_pix, edge_dict['self_edge'], inpaint_iter, config) + for nid in npaths.keys(): + npath, fpath = npaths[nid], fpaths[nid] + start_mx, start_my, end_mx, end_my = -1, -1, -1, -1 + if end_depth_maps[npath[0][0], npath[0][1]] != 0: + start_mx, start_my = npath[0][0], npath[0][1] + if end_depth_maps[npath[-1][0], npath[-1][1]] != 0: + end_mx, end_my = npath[-1][0], npath[-1][1] + if start_mx == -1: + import pdb; pdb.set_trace() + valid_end_pt = () if end_mx == -1 else (end_mx, end_my, info_on_pix[(end_mx, end_my)][0]['depth']) + new_edge_info = dict(fpath=fpath, + npath=npath, + cont_end_pts=valid_end_pt, + mask_id=edge_id, + comp_edge_id=nid, + depth=end_depth_maps[start_mx, start_my]) + if edges_infos.get((start_mx, start_my)) is None: + edges_infos[(start_mx, start_my)] = [] + edges_infos[(start_mx, start_my)].append(new_edge_info) + edges_in_mask[edge_id].add((start_mx, start_my)) + if len(valid_end_pt) > 0: + new_edge_info = dict(fpath=fpath[::-1], + npath=npath[::-1], + cont_end_pts=(start_mx, start_my, info_on_pix[(start_mx, start_my)][0]['depth']), + mask_id=edge_id, + comp_edge_id=nid, + depth=end_depth_maps[end_mx, end_my]) + if edges_infos.get((end_mx, end_my)) is None: + edges_infos[(end_mx, end_my)] = [] + edges_infos[(end_mx, end_my)].append(new_edge_info) + edges_in_mask[edge_id].add((end_mx, end_my)) + for edge_id, (context_cc, mask_cc, erode_context_cc, extend_context_cc, edge_cc) in enumerate(zip(context_ccs, mask_ccs, erode_context_ccs, extend_context_ccs, edge_ccs)): + if len(specific_edge_id) > 0: + if edge_id not in specific_edge_id: + continue + if len(context_cc) < 1 or len(mask_cc) < 1: + continue + edge_dict = get_edge_from_nodes(context_cc | extend_context_cc, erode_context_cc | extend_erode_context_ccs[edge_id], mask_cc, edge_cc, extend_edge_ccs[edge_id], + mesh.graph['H'], mesh.graph['W'], mesh) + if specific_edge_loc is not None and \ + (specific_edge_loc is not None and edge_dict['mask'][specific_edge_loc[0], specific_edge_loc[1]] == 0): + continue + else: + tmp_specific_edge_id.append(edge_id) + edge_dict['edge'], end_depth_maps, _ = \ + filter_irrelevant_edge_new(edge_dict['self_edge'], edge_dict['comp_edge'], + edge_map, + edge_maps_with_id, + edge_id, + edge_dict['context'], + edge_dict['depth'], mesh, context_cc | erode_context_cc | extend_context_cc | extend_erode_context_ccs[edge_id], spdb=False) + discard_map = np.zeros_like(edge_dict['edge']) + mask_size = get_valid_size(edge_dict['mask']) + mask_size = dilate_valid_size(mask_size, edge_dict['mask'], dilate=[20, 20]) + context_size = get_valid_size(edge_dict['context']) + context_size = dilate_valid_size(context_size, edge_dict['context'], dilate=[20, 20]) + union_size = size_operation(mask_size, context_size, operation='+') + patch_edge_dict = dict() + patch_edge_dict['mask'], patch_edge_dict['context'], patch_edge_dict['rgb'], \ + patch_edge_dict['disp'], patch_edge_dict['edge'] = \ + crop_maps_by_size(union_size, edge_dict['mask'], edge_dict['context'], + edge_dict['rgb'], edge_dict['disp'], edge_dict['edge']) + x_anchor, y_anchor = [union_size['x_min'], union_size['x_max']], [union_size['y_min'], union_size['y_max']] + tensor_edge_dict = convert2tensor(patch_edge_dict) + input_edge_feat = torch.cat((tensor_edge_dict['rgb'], + tensor_edge_dict['disp'], + tensor_edge_dict['edge'], + 1 - tensor_edge_dict['context'], + tensor_edge_dict['mask']), dim=1) + edge_dict['output'] = edge_dict['edge'].copy() + + if require_depth_edge(patch_edge_dict['edge'], patch_edge_dict['mask']) and inpaint_iter == 0: + edge_dict['fpath_map'], edge_dict['npath_map'] = edge_dict['fpath_map'] * 0 - 1, edge_dict['npath_map'] * 0 - 1 + end_pts = edges_in_mask[edge_id] + for end_pt in end_pts: + cur_edge_infos = edges_infos[(end_pt[0], end_pt[1])] + cur_info = [xx for xx in cur_edge_infos if xx['mask_id'] == edge_id][0] + other_infos = [xx for xx in cur_edge_infos if xx['mask_id'] != edge_id and len(xx['cont_end_pts']) > 0] + if len(cur_info['cont_end_pts']) > 0 or (len(cur_info['cont_end_pts']) == 0 and len(other_infos) == 0): + for fnode in cur_info['fpath']: + edge_dict['fpath_map'][fnode[0], fnode[1]] = cur_info['comp_edge_id'] + for fnode in cur_info['npath']: + edge_dict['npath_map'][fnode[0], fnode[1]] = cur_info['comp_edge_id'] + fnmap = edge_dict['fpath_map'] * 1 + fnmap[edge_dict['npath_map'] != -1] = edge_dict['npath_map'][edge_dict['npath_map'] != -1] + for end_pt in end_pts: + cur_edge_infos = edges_infos[(end_pt[0], end_pt[1])] + cur_info = [xx for xx in cur_edge_infos if xx['mask_id'] == edge_id][0] + cur_depth = cur_info['depth'] + other_infos = [xx for xx in cur_edge_infos if xx['mask_id'] != edge_id and len(xx['cont_end_pts']) > 0] + comp_edge_id = cur_info['comp_edge_id'] + if len(cur_info['cont_end_pts']) == 0 and len(other_infos) > 0: + other_infos = sorted(other_infos, key=lambda aa: abs(abs(aa['cont_end_pts'][2]) - abs(cur_depth))) + for other_info in other_infos: + tmp_fmap, tmp_nmap = np.zeros((mesh.graph['H'], mesh.graph['W'])) - 1, np.zeros((mesh.graph['H'], mesh.graph['W'])) - 1 + for fnode in other_info['fpath']: + if fnmap[fnode[0], fnode[1]] != -1: + tmp_fmap = tmp_fmap * 0 - 1 + break + else: + tmp_fmap[fnode[0], fnode[1]] = comp_edge_id + if fnmap[fnode[0], fnode[1]] != -1: + continue + for fnode in other_info['npath']: + if fnmap[fnode[0], fnode[1]] != -1: + tmp_nmap = tmp_nmap * 0 - 1 + break + else: + tmp_nmap[fnode[0], fnode[1]] = comp_edge_id + if fnmap[fnode[0], fnode[1]] != -1: + continue + break + if min(tmp_fmap.max(), tmp_nmap.max()) != -1: + edge_dict['fpath_map'] = tmp_fmap + edge_dict['fpath_map'][edge_dict['valid_area'] == 0] = -1 + edge_dict['npath_map'] = tmp_nmap + edge_dict['npath_map'][edge_dict['valid_area'] == 0] = -1 + discard_map = ((tmp_nmap != -1).astype(np.uint8) + (tmp_fmap != -1).astype(np.uint8)) * edge_dict['mask'] + else: + for fnode in cur_info['fpath']: + edge_dict['fpath_map'][fnode[0], fnode[1]] = cur_info['comp_edge_id'] + for fnode in cur_info['npath']: + edge_dict['npath_map'][fnode[0], fnode[1]] = cur_info['comp_edge_id'] + if edge_dict['npath_map'].min() == 0 or edge_dict['fpath_map'].min() == 0: + import pdb; pdb.set_trace() + edge_dict['output'] = (edge_dict['npath_map'] > -1) * edge_dict['mask'] + edge_dict['context'] * edge_dict['edge'] + mesh, _, _, _ = create_placeholder(edge_dict['context'], edge_dict['mask'], + edge_dict['depth'], edge_dict['fpath_map'], + edge_dict['npath_map'], mesh, inpaint_iter, + edge_ccs, + extend_edge_ccs[edge_id], + edge_maps_with_id, + edge_id) + + dxs, dys = np.where(discard_map != 0) + for dx, dy in zip(dxs, dys): + mesh.nodes[(dx, dy)]['inpaint_twice'] = False + depth_dict = depth_inpainting(context_cc, extend_context_cc, erode_context_cc | extend_erode_context_ccs[edge_id], mask_cc, mesh, config, union_size, depth_feat_model, edge_dict['output']) + refine_depth_output = depth_dict['output']*depth_dict['mask'] + for near_id in np.unique(edge_dict['npath_map'])[1:]: + refine_depth_output = refine_depth_around_edge(refine_depth_output.copy(), + (edge_dict['fpath_map'] == near_id).astype(np.uint8) * edge_dict['mask'], + (edge_dict['fpath_map'] == near_id).astype(np.uint8), + (edge_dict['npath_map'] == near_id).astype(np.uint8) * edge_dict['mask'], + depth_dict['mask'].copy(), + depth_dict['output'] * depth_dict['context'], + config) + depth_dict['output'][depth_dict['mask'] > 0] = refine_depth_output[depth_dict['mask'] > 0] + rgb_dict = get_rgb_from_nodes(context_cc | extend_context_cc, + erode_context_cc | extend_erode_context_ccs[edge_id], mask_cc, mesh.graph['H'], mesh.graph['W'], mesh) + if np.all(rgb_dict['mask'] == edge_dict['mask']) is False: + import pdb; pdb.set_trace() + rgb_dict['edge'] = edge_dict['output'] + patch_rgb_dict = dict() + patch_rgb_dict['mask'], patch_rgb_dict['context'], patch_rgb_dict['rgb'], \ + patch_rgb_dict['edge'] = crop_maps_by_size(union_size, rgb_dict['mask'], + rgb_dict['context'], rgb_dict['rgb'], + rgb_dict['edge']) + tensor_rgb_dict = convert2tensor(patch_rgb_dict) + resize_rgb_dict = {k: v.clone() for k, v in tensor_rgb_dict.items()} + max_hw = np.array([*patch_rgb_dict['mask'].shape[-2:]]).max() + init_frac = config['largest_size'] / (np.array([*patch_rgb_dict['mask'].shape[-2:]]).prod() ** 0.5) + resize_hw = [patch_rgb_dict['mask'].shape[-2] * init_frac, patch_rgb_dict['mask'].shape[-1] * init_frac] + resize_max_hw = max(resize_hw) + frac = (np.floor(resize_max_hw / 128.) * 128.) / max_hw + if frac < 1: + resize_mark = torch.nn.functional.interpolate(torch.cat((resize_rgb_dict['mask'], + resize_rgb_dict['context']), + dim=1), + scale_factor=frac, + mode='area') + resize_rgb_dict['mask'] = (resize_mark[:, 0:1] > 0).float() + resize_rgb_dict['context'] = (resize_mark[:, 1:2] == 1).float() + resize_rgb_dict['context'][resize_rgb_dict['mask'] > 0] = 0 + resize_rgb_dict['rgb'] = torch.nn.functional.interpolate(resize_rgb_dict['rgb'], + scale_factor=frac, + mode='area') + resize_rgb_dict['rgb'] = resize_rgb_dict['rgb'] * resize_rgb_dict['context'] + resize_rgb_dict['edge'] = torch.nn.functional.interpolate(resize_rgb_dict['edge'], + scale_factor=frac, + mode='area') + resize_rgb_dict['edge'] = (resize_rgb_dict['edge'] > 0).float() * 0 + resize_rgb_dict['edge'] = resize_rgb_dict['edge'] * (resize_rgb_dict['context'] + resize_rgb_dict['mask']) + rgb_input_feat = torch.cat((resize_rgb_dict['rgb'], resize_rgb_dict['edge']), dim=1) + rgb_input_feat[:, 3] = 1 - rgb_input_feat[:, 3] + resize_mask = open_small_mask(resize_rgb_dict['mask'], resize_rgb_dict['context'], 3, 41) + specified_hole = resize_mask + with torch.no_grad(): + rgb_output = rgb_model.forward_3P(specified_hole, + resize_rgb_dict['context'], + resize_rgb_dict['rgb'], + resize_rgb_dict['edge'], + unit_length=128, + cuda=device) + rgb_output = rgb_output.cpu() + if config.get('gray_image') is True: + rgb_output = rgb_output.mean(1, keepdim=True).repeat((1,3,1,1)) + rgb_output = rgb_output.cpu() + resize_rgb_dict['output'] = rgb_output * resize_rgb_dict['mask'] + resize_rgb_dict['rgb'] + tensor_rgb_dict['output'] = resize_rgb_dict['output'] + if frac < 1: + tensor_rgb_dict['output'] = torch.nn.functional.interpolate(tensor_rgb_dict['output'], + size=tensor_rgb_dict['mask'].shape[-2:], + mode='bicubic') + tensor_rgb_dict['output'] = tensor_rgb_dict['output'] * \ + tensor_rgb_dict['mask'] + (tensor_rgb_dict['rgb'] * tensor_rgb_dict['context']) + patch_rgb_dict['output'] = tensor_rgb_dict['output'].data.cpu().numpy().squeeze().transpose(1,2,0) + rgb_dict['output'] = np.zeros((mesh.graph['H'], mesh.graph['W'], 3)) + rgb_dict['output'][union_size['x_min']:union_size['x_max'], union_size['y_min']:union_size['y_max']] = \ + patch_rgb_dict['output'] + + if require_depth_edge(patch_edge_dict['edge'], patch_edge_dict['mask']) or inpaint_iter > 0: + edge_occlusion = True + else: + edge_occlusion = False + for node in erode_context_cc: + if rgb_dict['mask'][node[0], node[1]] > 0: + for info in info_on_pix[(node[0], node[1])]: + if abs(info['depth']) == abs(node[2]): + info['update_color'] = (rgb_dict['output'][node[0], node[1]] * 255).astype(np.uint8) + if frac < 1.: + depth_edge_dilate_2_color_flag = False + else: + depth_edge_dilate_2_color_flag = True + hxs, hys = np.where((rgb_dict['mask'] > 0) & (rgb_dict['erode'] == 0)) + for hx, hy in zip(hxs, hys): + real_depth = None + if abs(depth_dict['output'][hx, hy]) <= abs(np_depth[hx, hy]): + depth_dict['output'][hx, hy] = np_depth[hx, hy] + 0.01 + node = (hx, hy, -depth_dict['output'][hx, hy]) + if info_on_pix.get((node[0], node[1])) is not None: + for info in info_on_pix.get((node[0], node[1])): + if info.get('inpaint_id') is None or abs(info['inpaint_id'] < mesh.nodes[(hx, hy)]['inpaint_id']): + pre_depth = info['depth'] if info.get('real_depth') is None else info['real_depth'] + if abs(node[2]) < abs(pre_depth): + node = (node[0], node[1], -(abs(pre_depth) + 0.001)) + if mesh.has_node(node): + real_depth = node[2] + while True: + if mesh.has_node(node): + node = (node[0], node[1], -(abs(node[2]) + 0.001)) + else: + break + if real_depth == node[2]: + real_depth = None + cur_disp = 1./node[2] + if not(mesh.has_node(node)): + if not mesh.has_node((node[0], node[1])): + print("2D node not found.") + import pdb; pdb.set_trace() + if inpaint_iter == 1: + paint = (rgb_dict['output'][hx, hy] * 255).astype(np.uint8) + else: + paint = (rgb_dict['output'][hx, hy] * 255).astype(np.uint8) + ndict = dict(color=paint, + synthesis=True, + disp=cur_disp, + cc_id=set([edge_id]), + overlap_number=1.0, + refine_depth=False, + edge_occlusion=edge_occlusion, + depth_edge_dilate_2_color_flag=depth_edge_dilate_2_color_flag, + real_depth=real_depth) + mesh, _, _ = refresh_node((node[0], node[1]), mesh.nodes[(node[0], node[1])], node, ndict, mesh, stime=True) + if inpaint_iter == 0 and mesh.degree(node) < 4: + connnect_points_ccs[edge_id].add(node) + if info_on_pix.get((hx, hy)) is None: + info_on_pix[(hx, hy)] = [] + new_info = {'depth':node[2], + 'color': paint, + 'synthesis':True, + 'disp':cur_disp, + 'cc_id':set([edge_id]), + 'inpaint_id':inpaint_iter + 1, + 'edge_occlusion':edge_occlusion, + 'overlap_number':1.0, + 'real_depth': real_depth} + info_on_pix[(hx, hy)].append(new_info) + specific_edge_id = tmp_specific_edge_id + for erode_id, erode_context_cc in enumerate(erode_context_ccs): + if len(specific_edge_id) > 0 and erode_id not in specific_edge_id: + continue + for erode_node in erode_context_cc: + for info in info_on_pix[(erode_node[0], erode_node[1])]: + if info['depth'] == erode_node[2]: + info['color'] = info['update_color'] + mesh.nodes[erode_node]['color'] = info['update_color'] + np_image[(erode_node[0], erode_node[1])] = info['update_color'] + new_edge_ccs = [set() for _ in range(mesh.graph['max_edge_id'] + 1)] + for node in mesh.nodes: + if len(node) == 2: + mesh.remove_node(node) + continue + if mesh.nodes[node].get('edge_id') is not None and mesh.nodes[node].get('inpaint_id') == inpaint_iter + 1: + if mesh.nodes[node].get('inpaint_twice') is False: + continue + try: + new_edge_ccs[mesh.nodes[node].get('edge_id')].add(node) + except: + import pdb; pdb.set_trace() + specific_mask_nodes = None + if inpaint_iter == 0: + mesh, info_on_pix = refine_color_around_edge(mesh, info_on_pix, new_edge_ccs, config, False) + + return mesh, info_on_pix, specific_mask_nodes, new_edge_ccs, connnect_points_ccs, np_image + + +def write_mesh(image, + depth, + int_mtx, + ply_name, + config, + rgb_model, + depth_edge_model, + depth_edge_model_init, + depth_feat_model): + + mean_loc_depth = depth[depth.shape[0]//2, depth.shape[1]//2] + + pbar = tqdm.tqdm(total = 7 if config['extrapolate_border'] is True else 6) + pbar.set_description("Creating mesh") + + depth = depth.astype(np.float64) + input_mesh, xy2depth, image, depth = create_mesh(depth, image, int_mtx, config) + + H, W = input_mesh.graph['H'], input_mesh.graph['W'] + input_mesh = tear_edges(input_mesh, config['depth_threshold'], xy2depth) + input_mesh, info_on_pix = generate_init_node(input_mesh, config, min_node_in_cc=200) + edge_ccs, input_mesh, edge_mesh = group_edges(input_mesh, config, image, remove_conflict_ordinal=False) + edge_canvas = np.zeros((H, W)) - 1 + + input_mesh, info_on_pix, depth = reassign_floating_island(input_mesh, info_on_pix, image, depth) + input_mesh = update_status(input_mesh, info_on_pix) + specific_edge_id = [] + edge_ccs, input_mesh, edge_mesh = group_edges(input_mesh, config, image, remove_conflict_ordinal=True) + pre_depth = depth.copy() + input_mesh, info_on_pix, edge_mesh, depth, aft_mark = remove_dangling(input_mesh, edge_ccs, edge_mesh, info_on_pix, image, depth, config) + + input_mesh, depth, info_on_pix = update_status(input_mesh, info_on_pix, depth) + edge_ccs, input_mesh, edge_mesh = group_edges(input_mesh, config, image, remove_conflict_ordinal=True) + edge_canvas = np.zeros((H, W)) - 1 + + mesh, info_on_pix, depth = fill_missing_node(input_mesh, info_on_pix, image, depth) + if config['extrapolate_border'] is True: + pbar.update(1) + pbar.set_description("Extrapolating border") + + pre_depth = depth.copy() + input_mesh, info_on_pix, depth = refresh_bord_depth(input_mesh, info_on_pix, image, depth) + input_mesh = remove_node_feat(input_mesh, 'edge_id') + aft_depth = depth.copy() + input_mesh, info_on_pix, depth, image = enlarge_border(input_mesh, info_on_pix, depth, image, config) + noext_H, noext_W = H, W + H, W = image.shape[:2] + input_mesh, info_on_pix = fill_dummy_bord(input_mesh, info_on_pix, image, depth, config) + edge_ccs, input_mesh, edge_mesh = \ + group_edges(input_mesh, config, image, remove_conflict_ordinal=True) + input_mesh = combine_end_node(input_mesh, edge_mesh, edge_ccs, depth) + input_mesh, depth, info_on_pix = update_status(input_mesh, info_on_pix, depth) + edge_ccs, input_mesh, edge_mesh = \ + group_edges(input_mesh, config, image, remove_conflict_ordinal=True, spdb=False) + input_mesh = remove_redundant_edge(input_mesh, edge_mesh, edge_ccs, info_on_pix, config, redundant_number=config['redundant_number'], spdb=False) + input_mesh, depth, info_on_pix = update_status(input_mesh, info_on_pix, depth) + edge_ccs, input_mesh, edge_mesh = group_edges(input_mesh, config, image, remove_conflict_ordinal=True) + input_mesh = combine_end_node(input_mesh, edge_mesh, edge_ccs, depth) + input_mesh = remove_redundant_edge(input_mesh, edge_mesh, edge_ccs, info_on_pix, config, redundant_number=config['redundant_number'], invalid=True, spdb=False) + input_mesh, depth, info_on_pix = update_status(input_mesh, info_on_pix, depth) + edge_ccs, input_mesh, edge_mesh = group_edges(input_mesh, config, image, remove_conflict_ordinal=True) + input_mesh = combine_end_node(input_mesh, edge_mesh, edge_ccs, depth) + input_mesh, depth, info_on_pix = update_status(input_mesh, info_on_pix, depth) + edge_ccs, input_mesh, edge_mesh = group_edges(input_mesh, config, image, remove_conflict_ordinal=True) + edge_condition = lambda x, m: m.nodes[x].get('far') is not None and len(m.nodes[x].get('far')) > 0 + edge_map = get_map_from_ccs(edge_ccs, input_mesh.graph['H'], input_mesh.graph['W'], input_mesh, edge_condition) + other_edge_with_id = get_map_from_ccs(edge_ccs, input_mesh.graph['H'], input_mesh.graph['W'], real_id=True) + info_on_pix, input_mesh, image, depth, edge_ccs = extrapolate(input_mesh, info_on_pix, image, depth, other_edge_with_id, edge_map, edge_ccs, + depth_edge_model, depth_feat_model, rgb_model, config, direc="up") + info_on_pix, input_mesh, image, depth, edge_ccs = extrapolate(input_mesh, info_on_pix, image, depth, other_edge_with_id, edge_map, edge_ccs, + depth_edge_model, depth_feat_model, rgb_model, config, direc="left") + info_on_pix, input_mesh, image, depth, edge_ccs = extrapolate(input_mesh, info_on_pix, image, depth, other_edge_with_id, edge_map, edge_ccs, + depth_edge_model, depth_feat_model, rgb_model, config, direc="down") + info_on_pix, input_mesh, image, depth, edge_ccs = extrapolate(input_mesh, info_on_pix, image, depth, other_edge_with_id, edge_map, edge_ccs, + depth_edge_model, depth_feat_model, rgb_model, config, direc="right") + info_on_pix, input_mesh, image, depth, edge_ccs = extrapolate(input_mesh, info_on_pix, image, depth, other_edge_with_id, edge_map, edge_ccs, + depth_edge_model, depth_feat_model, rgb_model, config, direc="right-up") + info_on_pix, input_mesh, image, depth, edge_ccs = extrapolate(input_mesh, info_on_pix, image, depth, other_edge_with_id, edge_map, edge_ccs, + depth_edge_model, depth_feat_model, rgb_model, config, direc="right-down") + info_on_pix, input_mesh, image, depth, edge_ccs = extrapolate(input_mesh, info_on_pix, image, depth, other_edge_with_id, edge_map, edge_ccs, + depth_edge_model, depth_feat_model, rgb_model, config, direc="left-up") + info_on_pix, input_mesh, image, depth, edge_ccs = extrapolate(input_mesh, info_on_pix, image, depth, other_edge_with_id, edge_map, edge_ccs, + depth_edge_model, depth_feat_model, rgb_model, config, direc="left-down") + + pbar.update(1) + pbar.set_description("Context and holes") + + specific_edge_loc = None + specific_edge_id = [] + vis_edge_id = None + context_ccs, mask_ccs, broken_mask_ccs, edge_ccs, erode_context_ccs, \ + init_mask_connect, edge_maps, extend_context_ccs, extend_edge_ccs, extend_erode_context_ccs = \ + context_and_holes(input_mesh, + edge_ccs, + config, + specific_edge_id, + specific_edge_loc, + depth_feat_model, + inpaint_iter=0, + vis_edge_id=vis_edge_id) + + pbar.update(1) + pbar.set_description("Inpaint 1") + + edge_canvas = np.zeros((H, W)) + mask = np.zeros((H, W)) + context = np.zeros((H, W)) + vis_edge_ccs = filter_edge(input_mesh, edge_ccs, config) + edge_canvas = np.zeros((input_mesh.graph['H'], input_mesh.graph['W'])) - 1 + specific_edge_loc = None + FG_edge_maps = edge_maps.copy() + edge_canvas = np.zeros((input_mesh.graph['H'], input_mesh.graph['W'])) - 1 + # for cc_id, cc in enumerate(edge_ccs): + # for node in cc: + # edge_canvas[node[0], node[1]] = cc_id + # f, ((ax0, ax1, ax2)) = plt.subplots(1, 3, sharex=True, sharey=True); ax0.imshow(1./depth); ax1.imshow(image); ax2.imshow(edge_canvas); plt.show() + input_mesh, info_on_pix, specific_edge_nodes, new_edge_ccs, connect_points_ccs, image = DL_inpaint_edge(input_mesh, + info_on_pix, + config, + image, + depth, + context_ccs, + erode_context_ccs, + extend_context_ccs, + extend_erode_context_ccs, + mask_ccs, + broken_mask_ccs, + edge_ccs, + extend_edge_ccs, + init_mask_connect, + edge_maps, + rgb_model, + depth_edge_model, + depth_edge_model_init, + depth_feat_model, + specific_edge_id, + specific_edge_loc, + inpaint_iter=0) + + pbar.update(1) + pbar.set_description("Inpaint 2") + + specific_edge_id = [] + edge_canvas = np.zeros((input_mesh.graph['H'], input_mesh.graph['W'])) + connect_points_ccs = [set() for _ in connect_points_ccs] + context_ccs, mask_ccs, broken_mask_ccs, edge_ccs, erode_context_ccs, init_mask_connect, \ + edge_maps, extend_context_ccs, extend_edge_ccs, extend_erode_context_ccs = \ + context_and_holes(input_mesh, new_edge_ccs, config, specific_edge_id, specific_edge_loc, depth_feat_model, connect_points_ccs, inpaint_iter=1) + mask_canvas = np.zeros((input_mesh.graph['H'], input_mesh.graph['W'])) + context_canvas = np.zeros((input_mesh.graph['H'], input_mesh.graph['W'])) + erode_context_ccs_canvas = np.zeros((input_mesh.graph['H'], input_mesh.graph['W'])) + edge_canvas = np.zeros((input_mesh.graph['H'], input_mesh.graph['W'])) + # edge_canvas = np.zeros((input_mesh.graph['H'], input_mesh.graph['W'])) - 1 + # for cc_id, cc in enumerate(edge_ccs): + # for node in cc: + # edge_canvas[node[0], node[1]] = cc_id + specific_edge_id = [] + input_mesh, info_on_pix, specific_edge_nodes, new_edge_ccs, _, image = DL_inpaint_edge(input_mesh, + info_on_pix, + config, + image, + depth, + context_ccs, + erode_context_ccs, + extend_context_ccs, + extend_erode_context_ccs, + mask_ccs, + broken_mask_ccs, + edge_ccs, + extend_edge_ccs, + init_mask_connect, + edge_maps, + rgb_model, + depth_edge_model, + depth_edge_model_init, + depth_feat_model, + specific_edge_id, + specific_edge_loc, + inpaint_iter=1) + + pbar.update(1) + pbar.set_description("Reproject mesh") + + vertex_id = 0 + input_mesh.graph['H'], input_mesh.graph['W'] = input_mesh.graph['noext_H'], input_mesh.graph['noext_W'] + background_canvas = np.zeros((input_mesh.graph['H'], + input_mesh.graph['W'], + 3)) + ply_flag = config.get('save_ply') or config.get('save_obj') + if ply_flag is True: + node_str_list = [] + else: + node_str_color = [] + node_str_point = [] + out_fmt = lambda x, x_flag: str(x) if x_flag is True else x + point_time = 0 + hlight_time = 0 + cur_id_time = 0 + node_str_time = 0 + generate_face_time = 0 + point_list = [] + k_00, k_02, k_11, k_12 = \ + input_mesh.graph['cam_param_pix_inv'][0, 0], input_mesh.graph['cam_param_pix_inv'][0, 2], \ + input_mesh.graph['cam_param_pix_inv'][1, 1], input_mesh.graph['cam_param_pix_inv'][1, 2] + w_offset = input_mesh.graph['woffset'] + h_offset = input_mesh.graph['hoffset'] + for pix_xy, pix_list in info_on_pix.items(): + for pix_idx, pix_info in enumerate(pix_list): + pix_depth = pix_info['depth'] if pix_info.get('real_depth') is None else pix_info['real_depth'] + str_pt = [out_fmt(x, ply_flag) for x in reproject_3d_int_detail(pix_xy[0], pix_xy[1], pix_depth, + k_00, k_02, k_11, k_12, w_offset, h_offset)] + if input_mesh.has_node((pix_xy[0], pix_xy[1], pix_info['depth'])) is False: + return False + continue + if pix_info.get('overlap_number') is not None: + str_color = [out_fmt(x, ply_flag) for x in (pix_info['color']/pix_info['overlap_number']).astype(np.uint8).tolist()] + else: + str_color = [out_fmt(x, ply_flag) for x in pix_info['color'].tolist()] + if pix_info.get('edge_occlusion') is True: + str_color.append(out_fmt(4, ply_flag)) + else: + if pix_info.get('inpaint_id') is None: + str_color.append(out_fmt(1, ply_flag)) + else: + str_color.append(out_fmt(pix_info.get('inpaint_id') + 1, ply_flag)) + if pix_info.get('modified_border') is True or pix_info.get('ext_pixel') is True: + if len(str_color) == 4: + str_color[-1] = out_fmt(5, ply_flag) + else: + str_color.append(out_fmt(5, ply_flag)) + pix_info['cur_id'] = vertex_id + input_mesh.nodes[(pix_xy[0], pix_xy[1], pix_info['depth'])]['cur_id'] = out_fmt(vertex_id, ply_flag) + vertex_id += 1 + if ply_flag is True: + node_str_list.append(' '.join(str_pt) + ' ' + ' '.join(str_color) + '\n') + else: + node_str_color.append(str_color) + node_str_point.append(str_pt) + + + pbar.update(1) + pbar.set_description("Generating faces") + str_faces = generate_face(input_mesh, info_on_pix, config) + pbar.update(1) + pbar.close() + + if config['save_ply'] is True: + basename = os.path.splitext(ply_name)[0] + ply_name = basename + '.ply' + print("Writing mesh file %s ..." % ply_name) + #bty: implement binary ply + if config['ply_fmt'] == "bin": + with open(ply_name, 'wb') as ply_fi: + if 'little' == sys.byteorder: + ply_fi.write(('ply\n' + 'format binary_little_endian 1.0\n').encode('ascii')) + else: + ply_fi.write(('ply\n' + 'format binary_big_endian 1.0\n').encode('ascii')) + ply_fi.write(('comment H ' + str(int(input_mesh.graph['H'])) + '\n').encode('ascii')) + ply_fi.write(('comment W ' + str(int(input_mesh.graph['W'])) + '\n').encode('ascii')) + ply_fi.write(('comment hFov ' + str(float(input_mesh.graph['hFov'])) + '\n').encode('ascii')) + ply_fi.write(('comment vFov ' + str(float(input_mesh.graph['vFov'])) + '\n').encode('ascii')) + ply_fi.write(('comment meanLoc ' + str(float(mean_loc_depth)) + '\n').encode('ascii')) + ply_fi.write(('element vertex ' + str(len(node_str_list)) + '\n').encode('ascii')) + ply_fi.write(('property float x\n' + \ + 'property float y\n' + \ + 'property float z\n' + \ + 'property uchar red\n' + \ + 'property uchar green\n' + \ + 'property uchar blue\n' + \ + 'property uchar alpha\n').encode('ascii')) + ply_fi.write(('element face ' + str(len(str_faces)) + '\n').encode('ascii')) + ply_fi.write(('property list uchar int vertex_index\n').encode('ascii')) + ply_fi.write(('end_header\n').encode('ascii')) + + pbar = tqdm.tqdm(total = len(node_str_list)+len(str_faces)) + pbar.set_description("Saving vertices") + + for v in node_str_list: + x, y, z, r, g, b, a = v.split(' ') + ply_fi.write(struct.pack('fffBBBB', float(x), float(y), float(z), int(r), int(g), int(b), int(a))) + pbar.update(1) + + pbar.set_description("Saving faces") + for f in str_faces: + n, a, b, c = f.split(' ') + ply_fi.write(bytearray([int(n)])) + ply_fi.write(struct.pack('III', int(a), int(b), int(c))) + pbar.update(1) + pbar.close() + ply_fi.close() + + else: + with open(ply_name, 'w') as ply_fi: + ply_fi.write('ply\n' + 'format ascii 1.0\n') + ply_fi.write('comment H ' + str(int(input_mesh.graph['H'])) + '\n') + ply_fi.write('comment W ' + str(int(input_mesh.graph['W'])) + '\n') + ply_fi.write('comment hFov ' + str(float(input_mesh.graph['hFov'])) + '\n') + ply_fi.write('comment vFov ' + str(float(input_mesh.graph['vFov'])) + '\n') + ply_fi.write('comment meanLoc ' + str(float(mean_loc_depth)) + '\n') + ply_fi.write('element vertex ' + str(len(node_str_list)) + '\n') + ply_fi.write('property float x\n' + \ + 'property float y\n' + \ + 'property float z\n' + \ + 'property uchar red\n' + \ + 'property uchar green\n' + \ + 'property uchar blue\n' + \ + 'property uchar alpha\n') + ply_fi.write('element face ' + str(len(str_faces)) + '\n') + ply_fi.write('property list uchar int vertex_index\n') + ply_fi.write('end_header\n') + ply_fi.writelines(node_str_list) + ply_fi.writelines(str_faces) + ply_fi.close() + + + if config['save_obj'] is True: + basename = os.path.splitext(ply_name)[0] + obj_name = basename + '.obj' + print("Writing mesh file %s ..." % obj_name) + with open(obj_name, 'w') as obj_fi: + obj_fi.write('# depthmap-script\n') + obj_fi.write('# H ' + str(int(input_mesh.graph['H'])) + '\n') + obj_fi.write('# W ' + str(int(input_mesh.graph['W'])) + '\n') + obj_fi.write('# hFov ' + str(float(input_mesh.graph['hFov'])) + '\n') + obj_fi.write('# vFov ' + str(float(input_mesh.graph['vFov'])) + '\n') + obj_fi.write('# meanLoc ' + str(float(mean_loc_depth)) + '\n') + obj_fi.write('# vertices ' + str(len(node_str_list)) + '\n') + obj_fi.write('# faces ' + str(len(str_faces)) + '\n') + obj_fi.write('o depthmap\n') + + pbar = tqdm.tqdm(total = len(node_str_list)+len(str_faces)) + pbar.set_description("Saving vertices") + for v in node_str_list: + x, y, z, r, g, b, a = v.split(' ') + x = float(x) + y = float(y) + z = float(z) + r = float(r) / 255.0 + g = float(g) / 255.0 + b = float(b) / 255.0 + obj_fi.write(f"v {x:.8f} {y:.8f} {z:.8f} {r:.4f} {g:.4f} {b:.4f}\n") + pbar.update(1) + + pbar.set_description("Saving faces") + for face in str_faces: + n, a, b, c = face.split(' ') + a = int(a) + 1 + b = int(b) + 1 + c = int(c) + 1 + obj_fi.write(f"f {a} {b} {c}\n") + pbar.update(1) + pbar.close() + obj_fi.close() + + return input_mesh + + if config['save_obj'] is False and config['save_ply'] is False: + H = int(input_mesh.graph['H']) + W = int(input_mesh.graph['W']) + hFov = input_mesh.graph['hFov'] + vFov = input_mesh.graph['vFov'] + node_str_color = np.array(node_str_color).astype(np.float32) + node_str_color[..., :3] = node_str_color[..., :3] / 255. + node_str_point = np.array(node_str_point) + str_faces = np.array(str_faces) + + return node_str_point, node_str_color, str_faces, H, W, hFov, vFov + +def read_mesh(mesh_fi): + ext = os.path.splitext(mesh_fi)[1] + if ext == '.ply': + return read_ply(mesh_fi) + elif ext == '.obj': + return read_obj(mesh_fi) + else: + raise Exception('Unknown file format') + +def read_obj(mesh_fi): + mfile = open(mesh_fi, 'r', encoding="utf8") + Height = None + Width = None + hFov = None + vFov = None + mean_loc_depth = None + + firstline = mfile.readline().split('\n')[0] + if not firstline.startswith('# depthmap-script'): + raise Exception('This requires a 3D inpainted mesh generated by this extension.') + + while True: + line = mfile.readline().split('\n')[0] + if line.startswith('#'): + if line.split(' ')[1] == 'H': + Height = int(line.split(' ')[-1].split('\n')[0]) + elif line.split(' ')[1] == 'W': + Width = int(line.split(' ')[-1].split('\n')[0]) + elif line.split(' ')[1] == 'hFov': + hFov = float(line.split(' ')[-1].split('\n')[0]) + elif line.split(' ')[1] == 'vFov': + vFov = float(line.split(' ')[-1].split('\n')[0]) + elif line.split(' ')[1] == 'meanLoc': + mean_loc_depth = float(line.split(' ')[-1].split('\n')[0]) + elif line.split(' ')[1] == 'vertices': + num_vertex = int(line.split(' ')[-1]) + elif line.split(' ')[1] == 'faces': + num_face = int(line.split(' ')[-1]) + # check for start of object + elif line.startswith('o depthmap'): + break + + contents = mfile.readlines() + mfile.close() + + vertex_infos = contents[:num_vertex] + face_infos = contents[num_vertex:] + + verts = [None] * num_vertex + colors = [None] * num_vertex + faces = [None] * num_face + i = 0 + for v_info in vertex_infos: + str_info = [float(v) for v in v_info.split('\n')[0].split(' ')[1:]] + vx, vy, vz, r, g, b = str_info + verts[i] = [vx, vy, vz] + colors[i] = [r, g, b] + i = i + 1 + verts = np.array(verts) + colors = np.array(colors) + + i = 0 + for f_info in face_infos: + v1, v2, v3 = [int(f) for f in f_info.split('\n')[0].split(' ')[1:]] + faces[i] = [v1 - 1, v2 - 1, v3 - 1] + i = i + 1 + faces = np.array(faces) + + return verts, colors, faces, Height, Width, hFov, vFov, mean_loc_depth + +def read_ply(mesh_fi): + #bty: implement binary support (assume same endianness for now) + # read header in text mode + ply_fi = open(mesh_fi, 'r', encoding="utf8", errors='ignore') # required to readline in bin file + Height = None + Width = None + hFov = None + vFov = None + mean_loc_depth = None + isBinary = True + # read ascii header + while True: + line = ply_fi.readline().split('\n')[0] + if line.startswith('element vertex'): + num_vertex = int(line.split(' ')[-1]) + elif line.startswith('element face'): + num_face = int(line.split(' ')[-1]) + elif line.startswith('comment'): + if line.split(' ')[1] == 'H': + Height = int(line.split(' ')[-1].split('\n')[0]) + if line.split(' ')[1] == 'W': + Width = int(line.split(' ')[-1].split('\n')[0]) + if line.split(' ')[1] == 'hFov': + hFov = float(line.split(' ')[-1].split('\n')[0]) + if line.split(' ')[1] == 'vFov': + vFov = float(line.split(' ')[-1].split('\n')[0]) + #bty: this was the only value for which it needed the depthmap, so store it in the ply too + if line.split(' ')[1] == 'meanLoc': + mean_loc_depth = float(line.split(' ')[-1].split('\n')[0]) + # check format + elif line.startswith('format ascii'): + isBinary = False + elif line.startswith('end_header'): + break + + if isBinary: + # grab current file offset and re-open in binary mode + endheader = ply_fi.tell() + ply_fi.close() + ply_fi = open(mesh_fi, 'rb') + ply_fi.seek(endheader) + + verts = [None] * num_vertex + colors = [None] * num_vertex + faces = [None] * num_face + + pbar = tqdm.tqdm(total = num_vertex+num_face) + pbar.set_description("Loading vertices") + for i in range(num_vertex): + x, y, z, r, g, b, a = struct.unpack('fffBBBB', ply_fi.read(16)) + verts[i] = [x, y, z] + colors[i] = [float(r), float(g), float(b), float(a)] + pbar.update(1) + verts = np.array(verts) + colors = np.array(colors) + colors[..., :3] = colors[..., :3] / 255. + + pbar.set_description("Loading faces") + for i in range(num_face): + c = int.from_bytes(ply_fi.read(1), "little") + if c == 3: + v1, v2, v3 = struct.unpack('III', ply_fi.read(12)) + faces[i] = [v1, v2, v3] + pbar.update(1) + faces = np.array(faces) + ply_fi.close() + pbar.close() + + else: + # read ascii mode file + contents = ply_fi.readlines() + ply_fi.close() + vertex_infos = contents[:num_vertex] + face_infos = contents[num_vertex:] + #bty: optimize by pre-allocating + verts = [None] * num_vertex + colors = [None] * num_vertex + faces = [None] * num_face + i = 0 + for v_info in vertex_infos: + str_info = [float(v) for v in v_info.split('\n')[0].split(' ')] + if len(str_info) == 6: + vx, vy, vz, r, g, b = str_info + else: + vx, vy, vz, r, g, b, hi = str_info + + verts[i] = [vx, vy, vz] + colors[i] = [r, g, b, hi] + i = i + 1 + verts = np.array(verts) + colors = np.array(colors) + colors[..., :3] = colors[..., :3]/255. + + i = 0 + for f_info in face_infos: + _, v1, v2, v3 = [int(f) for f in f_info.split('\n')[0].split(' ')] + faces[i] = [v1, v2, v3] + i = i + 1 + faces = np.array(faces) + + return verts, colors, faces, Height, Width, hFov, vFov, mean_loc_depth + + +class Canvas_view(): + def __init__(self, + fov, + verts, + faces, + colors, + canvas_size, + factor=1, + bgcolor='gray', + proj='perspective', + ): + self.canvas = scene.SceneCanvas(bgcolor=bgcolor, size=(canvas_size*factor, canvas_size*factor)) + self.view = self.canvas.central_widget.add_view() + self.view.camera = 'perspective' + self.view.camera.fov = fov + self.mesh = visuals.Mesh(shading=None) + self.mesh.attach(Alpha(1.0)) + self.view.add(self.mesh) + self.tr = self.view.camera.transform + self.mesh.set_data(vertices=verts, faces=faces, vertex_colors=colors[:, :3]) + self.translate([0,0,0]) + self.rotate(axis=[1,0,0], angle=180) + self.view_changed() + + def translate(self, trans=[0,0,0]): + self.tr.translate(trans) + + def rotate(self, axis=[1,0,0], angle=0): + self.tr.rotate(axis=axis, angle=angle) + + def view_changed(self): + self.view.camera.view_changed() + + def render(self): + return self.canvas.render() + + def reinit_mesh(self, verts, faces, colors): + self.mesh.set_data(vertices=verts, faces=faces, vertex_colors=colors[:, :3]) + + def reinit_camera(self, fov): + self.view.camera.fov = fov + self.view.camera.view_changed() + + +def output_3d_photo(verts, colors, faces, Height, Width, hFov, vFov, tgt_poses, video_traj_types, ref_pose, + output_dir, ref_image, int_mtx, config, image, videos_poses, video_basename, original_H=None, original_W=None, + border=None, depth=None, normal_canvas=None, all_canvas=None, mean_loc_depth=None, dolly=False, fnExt="mp4"): + + cam_mesh = netx.Graph() + cam_mesh.graph['H'] = Height + cam_mesh.graph['W'] = Width + cam_mesh.graph['original_H'] = original_H + cam_mesh.graph['original_W'] = original_W + int_mtx_real_x = int_mtx[0] * Width + int_mtx_real_y = int_mtx[1] * Height + cam_mesh.graph['hFov'] = 2 * np.arctan((1. / 2.) * ((cam_mesh.graph['original_W']) / int_mtx_real_x[0])) + cam_mesh.graph['vFov'] = 2 * np.arctan((1. / 2.) * ((cam_mesh.graph['original_H']) / int_mtx_real_y[1])) + colors = colors[..., :3] + + fov_in_rad = max(cam_mesh.graph['vFov'], cam_mesh.graph['hFov']) + fov = (fov_in_rad * 180 / np.pi) + print("fov: " + str(fov)) + init_factor = 1 + #if config.get('anti_flickering') is True: + # init_factor = 3 + #bty: basically Supersample Anti-Aliasing (SSAA) + init_factor = config['ssaa'] + if (cam_mesh.graph['original_H'] is not None) and (cam_mesh.graph['original_W'] is not None): + canvas_w = cam_mesh.graph['original_W'] + canvas_h = cam_mesh.graph['original_H'] + else: + canvas_w = cam_mesh.graph['W'] + canvas_h = cam_mesh.graph['H'] + canvas_size = max(canvas_h, canvas_w) + if normal_canvas is None: + normal_canvas = Canvas_view(fov, + verts, + faces, + colors, + canvas_size=canvas_size, + factor=init_factor, + bgcolor='gray', + proj='perspective') + else: + normal_canvas.reinit_mesh(verts, faces, colors) + normal_canvas.reinit_camera(fov) + img = normal_canvas.render() + #backup_img, backup_all_img, all_img_wo_bound = img.copy(), img.copy() * 0, img.copy() * 0 + img = cv2.resize(img, (int(img.shape[1] / init_factor), int(img.shape[0] / init_factor)), interpolation=cv2.INTER_AREA) + if border is None: + border = [0, img.shape[0], 0, img.shape[1]] + H, W = cam_mesh.graph['H'], cam_mesh.graph['W'] + if (cam_mesh.graph['original_H'] is not None) and (cam_mesh.graph['original_W'] is not None): + aspect_ratio = cam_mesh.graph['original_H'] / cam_mesh.graph['original_W'] + else: + aspect_ratio = cam_mesh.graph['H'] / cam_mesh.graph['W'] + if aspect_ratio > 1: + img_h_len = cam_mesh.graph['H'] if cam_mesh.graph.get('original_H') is None else cam_mesh.graph['original_H'] + img_w_len = img_h_len / aspect_ratio + anchor = [0, + img.shape[0], + int(max(0, int((img.shape[1])//2 - img_w_len//2))), + int(min(int((img.shape[1])//2 + img_w_len//2), (img.shape[1])-1))] + elif aspect_ratio <= 1: + img_w_len = cam_mesh.graph['W'] if cam_mesh.graph.get('original_W') is None else cam_mesh.graph['original_W'] + img_h_len = img_w_len * aspect_ratio + anchor = [int(max(0, int((img.shape[0])//2 - img_h_len//2))), + int(min(int((img.shape[0])//2 + img_h_len//2), (img.shape[0])-1)), + 0, + img.shape[1]] + anchor = np.array(anchor) + plane_width = np.tan(fov_in_rad/2.) * np.abs(mean_loc_depth) + fn_saved = [] + for video_pose, video_traj_type in zip(videos_poses, video_traj_types): + print("\nRendering frames ..") + stereos = [] + #tops = []; buttoms = []; lefts = []; rights = [] + for tp_id, tp in enumerate(video_pose): + rel_pose = np.linalg.inv(np.dot(tp, np.linalg.inv(ref_pose))) + axis, angle = transforms3d.axangles.mat2axangle(rel_pose[0:3, 0:3]) + normal_canvas.rotate(axis=axis, angle=(angle*180)/np.pi) + normal_canvas.translate(rel_pose[:3,3]) + new_mean_loc_depth = mean_loc_depth - float(rel_pose[2, 3]) + #if 'dolly' in video_traj_type: + if dolly or 'dolly' in video_traj_type: + new_fov = float((np.arctan2(plane_width, np.array([np.abs(new_mean_loc_depth)])) * 180. / np.pi) * 2) + normal_canvas.reinit_camera(new_fov) + else: + normal_canvas.reinit_camera(fov) + normal_canvas.view_changed() + img = normal_canvas.render() + img = cv2.GaussianBlur(img,(int(init_factor//2 * 2 + 1), int(init_factor//2 * 2 + 1)), 0) + img = cv2.resize(img, (int(img.shape[1] / init_factor), int(img.shape[0] / init_factor)), interpolation=cv2.INTER_AREA) + img = img[anchor[0]:anchor[1], anchor[2]:anchor[3]] + img = img[int(border[0]):int(border[1]), int(border[2]):int(border[3])] + + if any(np.array(config['crop_border']) > 0.0): + H_c, W_c, _ = img.shape + o_t = int(H_c * config['crop_border'][0]) + o_l = int(W_c * config['crop_border'][1]) + o_b = int(H_c * config['crop_border'][2]) + o_r = int(W_c * config['crop_border'][3]) + img = img[o_t:H_c-o_b, o_l:W_c-o_r] + #bty: fix crop size + #img = cv2.resize(img, (W_c, H_c), interpolation=cv2.INTER_CUBIC) + + """ + img = cv2.resize(img, (int(img.shape[1] / init_factor), int(img.shape[0] / init_factor)), interpolation=cv2.INTER_CUBIC) + img = img[anchor[0]:anchor[1], anchor[2]:anchor[3]] + img = img[int(border[0]):int(border[1]), int(border[2]):int(border[3])] + + if config['crop_border'] is True: + top, buttom, left, right = find_largest_rect(img, bg_color=(128, 128, 128)) + tops.append(top); buttoms.append(buttom); lefts.append(left); rights.append(right) + """ + stereos.append(img[..., :3]) + normal_canvas.translate(-rel_pose[:3,3]) + normal_canvas.rotate(axis=axis, angle=-(angle*180)/np.pi) + normal_canvas.view_changed() + """ + if config['crop_border'] is True: + atop, abuttom = min(max(tops), img.shape[0]//2 - 10), max(min(buttoms), img.shape[0]//2 + 10) + aleft, aright = min(max(lefts), img.shape[1]//2 - 10), max(min(rights), img.shape[1]//2 + 10) + atop -= atop % 2; abuttom -= abuttom % 2; aleft -= aleft % 2; aright -= aright % 2 + else: + atop = 0; abuttom = img.shape[0] - img.shape[0] % 2; aleft = 0; aright = img.shape[1] - img.shape[1] % 2 + """ + atop = 0; abuttom = img.shape[0] - img.shape[0] % 2; aleft = 0; aright = img.shape[1] - img.shape[1] % 2 + crop_stereos = [] + for stereo in stereos: + crop_stereos.append((stereo[atop:abuttom, aleft:aright, :3] * 1).astype(np.uint8)) + stereos = crop_stereos + clip = ImageSequenceClip(stereos, fps=config['fps']) + if isinstance(video_basename, list): + video_basename = video_basename[0] + fn = os.path.join(output_dir, video_basename + '_' + video_traj_type + '.' + fnExt) + fn_saved.append(fn) + clip.write_videofile(fn, fps=config['fps']) + + return normal_canvas, all_canvas, fn_saved diff --git a/inpaint/mesh_tools.py b/inpaint/mesh_tools.py new file mode 100644 index 0000000000000000000000000000000000000000..2cbcafc5ad6ecd712d25ad4ca6591ceae78a7b59 --- /dev/null +++ b/inpaint/mesh_tools.py @@ -0,0 +1,1083 @@ +import os +import numpy as np +try: + import cynetworkx as netx +except ImportError: + import networkx as netx + +import json +import scipy.misc as misc +#import OpenEXR +import scipy.signal as signal +import matplotlib.pyplot as plt +import cv2 +import scipy.misc as misc +from skimage import io +from functools import partial +from vispy import scene, io +from vispy.scene import visuals +from functools import reduce +# from moviepy.editor import ImageSequenceClip +import scipy.misc as misc +from vispy.visuals.filters import Alpha +import cv2 +from skimage.transform import resize +import copy +import torch +import os +from inpaint.utils import refine_depth_around_edge, smooth_cntsyn_gap +from inpaint.utils import require_depth_edge, filter_irrelevant_edge_new, open_small_mask +from skimage.feature import canny +from scipy import ndimage +import time +import transforms3d + +def relabel_node(mesh, nodes, cur_node, new_node): + if cur_node == new_node: + return mesh + mesh.add_node(new_node) + for key, value in nodes[cur_node].items(): + nodes[new_node][key] = value + for ne in mesh.neighbors(cur_node): + mesh.add_edge(new_node, ne) + mesh.remove_node(cur_node) + + return mesh + +def filter_edge(mesh, edge_ccs, config, invalid=False): + context_ccs = [set() for _ in edge_ccs] + mesh_nodes = mesh.nodes + for edge_id, edge_cc in enumerate(edge_ccs): + if config['context_thickness'] == 0: + continue + edge_group = {} + for edge_node in edge_cc: + far_nodes = mesh_nodes[edge_node].get('far') + if far_nodes is None: + continue + for far_node in far_nodes: + context_ccs[edge_id].add(far_node) + if mesh_nodes[far_node].get('edge_id') is not None: + if edge_group.get(mesh_nodes[far_node]['edge_id']) is None: + edge_group[mesh_nodes[far_node]['edge_id']] = set() + edge_group[mesh_nodes[far_node]['edge_id']].add(far_node) + if len(edge_cc) > 2: + for edge_key in [*edge_group.keys()]: + if len(edge_group[edge_key]) == 1: + context_ccs[edge_id].remove([*edge_group[edge_key]][0]) + valid_edge_ccs = [] + for xidx, yy in enumerate(edge_ccs): + if invalid is not True and len(context_ccs[xidx]) > 0: + # if len(context_ccs[xidx]) > 0: + valid_edge_ccs.append(yy) + elif invalid is True and len(context_ccs[xidx]) == 0: + valid_edge_ccs.append(yy) + else: + valid_edge_ccs.append(set()) + # valid_edge_ccs = [yy for xidx, yy in enumerate(edge_ccs) if len(context_ccs[xidx]) > 0] + + return valid_edge_ccs + +def extrapolate(global_mesh, + info_on_pix, + image, + depth, + other_edge_with_id, + edge_map, + edge_ccs, + depth_edge_model, + depth_feat_model, + rgb_feat_model, + config, + direc='right-up'): + h_off, w_off = global_mesh.graph['hoffset'], global_mesh.graph['woffset'] + noext_H, noext_W = global_mesh.graph['noext_H'], global_mesh.graph['noext_W'] + + if "up" in direc.lower() and "-" not in direc.lower(): + all_anchor = [0, h_off + config['context_thickness'], w_off, w_off + noext_W] + global_shift = [all_anchor[0], all_anchor[2]] + mask_anchor = [0, h_off, w_off, w_off + noext_W] + context_anchor = [h_off, h_off + config['context_thickness'], w_off, w_off + noext_W] + valid_line_anchor = [h_off, h_off + 1, w_off, w_off + noext_W] + valid_anchor = [min(mask_anchor[0], context_anchor[0]), max(mask_anchor[1], context_anchor[1]), + min(mask_anchor[2], context_anchor[2]), max(mask_anchor[3], context_anchor[3])] + elif "down" in direc.lower() and "-" not in direc.lower(): + all_anchor = [h_off + noext_H - config['context_thickness'], 2 * h_off + noext_H, w_off, w_off + noext_W] + global_shift = [all_anchor[0], all_anchor[2]] + mask_anchor = [h_off + noext_H, 2 * h_off + noext_H, w_off, w_off + noext_W] + context_anchor = [h_off + noext_H - config['context_thickness'], h_off + noext_H, w_off, w_off + noext_W] + valid_line_anchor = [h_off + noext_H - 1, h_off + noext_H, w_off, w_off + noext_W] + valid_anchor = [min(mask_anchor[0], context_anchor[0]), max(mask_anchor[1], context_anchor[1]), + min(mask_anchor[2], context_anchor[2]), max(mask_anchor[3], context_anchor[3])] + elif "left" in direc.lower() and "-" not in direc.lower(): + all_anchor = [h_off, h_off + noext_H, 0, w_off + config['context_thickness']] + global_shift = [all_anchor[0], all_anchor[2]] + mask_anchor = [h_off, h_off + noext_H, 0, w_off] + context_anchor = [h_off, h_off + noext_H, w_off, w_off + config['context_thickness']] + valid_line_anchor = [h_off, h_off + noext_H, w_off, w_off + 1] + valid_anchor = [min(mask_anchor[0], context_anchor[0]), max(mask_anchor[1], context_anchor[1]), + min(mask_anchor[2], context_anchor[2]), max(mask_anchor[3], context_anchor[3])] + elif "right" in direc.lower() and "-" not in direc.lower(): + all_anchor = [h_off, h_off + noext_H, w_off + noext_W - config['context_thickness'], 2 * w_off + noext_W] + global_shift = [all_anchor[0], all_anchor[2]] + mask_anchor = [h_off, h_off + noext_H, w_off + noext_W, 2 * w_off + noext_W] + context_anchor = [h_off, h_off + noext_H, w_off + noext_W - config['context_thickness'], w_off + noext_W] + valid_line_anchor = [h_off, h_off + noext_H, w_off + noext_W - 1, w_off + noext_W] + valid_anchor = [min(mask_anchor[0], context_anchor[0]), max(mask_anchor[1], context_anchor[1]), + min(mask_anchor[2], context_anchor[2]), max(mask_anchor[3], context_anchor[3])] + elif "left" in direc.lower() and "up" in direc.lower() and "-" in direc.lower(): + all_anchor = [0, h_off + config['context_thickness'], 0, w_off + config['context_thickness']] + global_shift = [all_anchor[0], all_anchor[2]] + mask_anchor = [0, h_off, 0, w_off] + context_anchor = "inv-mask" + valid_line_anchor = None + valid_anchor = all_anchor + elif "left" in direc.lower() and "down" in direc.lower() and "-" in direc.lower(): + all_anchor = [h_off + noext_H - config['context_thickness'], 2 * h_off + noext_H, 0, w_off + config['context_thickness']] + global_shift = [all_anchor[0], all_anchor[2]] + mask_anchor = [h_off + noext_H, 2 * h_off + noext_H, 0, w_off] + context_anchor = "inv-mask" + valid_line_anchor = None + valid_anchor = all_anchor + elif "right" in direc.lower() and "up" in direc.lower() and "-" in direc.lower(): + all_anchor = [0, h_off + config['context_thickness'], w_off + noext_W - config['context_thickness'], 2 * w_off + noext_W] + global_shift = [all_anchor[0], all_anchor[2]] + mask_anchor = [0, h_off, w_off + noext_W, 2 * w_off + noext_W] + context_anchor = "inv-mask" + valid_line_anchor = None + valid_anchor = all_anchor + elif "right" in direc.lower() and "down" in direc.lower() and "-" in direc.lower(): + all_anchor = [h_off + noext_H - config['context_thickness'], 2 * h_off + noext_H, w_off + noext_W - config['context_thickness'], 2 * w_off + noext_W] + global_shift = [all_anchor[0], all_anchor[2]] + mask_anchor = [h_off + noext_H, 2 * h_off + noext_H, w_off + noext_W, 2 * w_off + noext_W] + context_anchor = "inv-mask" + valid_line_anchor = None + valid_anchor = all_anchor + + global_mask = np.zeros_like(depth) + global_mask[mask_anchor[0]:mask_anchor[1],mask_anchor[2]:mask_anchor[3]] = 1 + mask = global_mask[valid_anchor[0]:valid_anchor[1], valid_anchor[2]:valid_anchor[3]] * 1 + context = 1 - mask + global_context = np.zeros_like(depth) + global_context[all_anchor[0]:all_anchor[1],all_anchor[2]:all_anchor[3]] = context + # context = global_context[valid_anchor[0]:valid_anchor[1], valid_anchor[2]:valid_anchor[3]] * 1 + + + + valid_area = mask + context + input_rgb = image[valid_anchor[0]:valid_anchor[1], valid_anchor[2]:valid_anchor[3]] / 255. * context[..., None] + input_depth = depth[valid_anchor[0]:valid_anchor[1], valid_anchor[2]:valid_anchor[3]] * context + log_depth = np.log(input_depth + 1e-8) + log_depth[mask > 0] = 0 + input_mean_depth = np.mean(log_depth[context > 0]) + input_zero_mean_depth = (log_depth - input_mean_depth) * context + input_disp = 1./np.abs(input_depth) + input_disp[mask > 0] = 0 + input_disp = input_disp / input_disp.max() + valid_line = np.zeros_like(depth) + if valid_line_anchor is not None: + valid_line[valid_line_anchor[0]:valid_line_anchor[1], valid_line_anchor[2]:valid_line_anchor[3]] = 1 + valid_line = valid_line[all_anchor[0]:all_anchor[1], all_anchor[2]:all_anchor[3]] + # f, ((ax1, ax2)) = plt.subplots(1, 2, sharex=True, sharey=True); ax1.imshow(global_context * 1 + global_mask * 2); ax2.imshow(image); plt.show() + # f, ((ax1, ax2, ax3)) = plt.subplots(1, 3, sharex=True, sharey=True); ax1.imshow(context * 1 + mask * 2); ax2.imshow(input_rgb); ax3.imshow(valid_line); plt.show() + # import pdb; pdb.set_trace() + # return + input_edge_map = edge_map[all_anchor[0]:all_anchor[1], all_anchor[2]:all_anchor[3]] * context + input_other_edge_with_id = other_edge_with_id[all_anchor[0]:all_anchor[1], all_anchor[2]:all_anchor[3]] + end_depth_maps = ((valid_line * input_edge_map) > 0) * input_depth + + + if isinstance(config["gpu_ids"], int) and (config["gpu_ids"] >= 0): + device = config["gpu_ids"] + else: + device = "cpu" + + valid_edge_ids = sorted(list(input_other_edge_with_id[(valid_line * input_edge_map) > 0])) + valid_edge_ids = valid_edge_ids[1:] if (len(valid_edge_ids) > 0 and valid_edge_ids[0] == -1) else valid_edge_ids + edge = reduce(lambda x, y: (x + (input_other_edge_with_id == y).astype(np.uint8)).clip(0, 1), [np.zeros_like(mask)] + list(valid_edge_ids)) + t_edge = torch.FloatTensor(edge).to(device)[None, None, ...] + t_rgb = torch.FloatTensor(input_rgb).to(device).permute(2,0,1).unsqueeze(0) + t_mask = torch.FloatTensor(mask).to(device)[None, None, ...] + t_context = torch.FloatTensor(context).to(device)[None, None, ...] + t_disp = torch.FloatTensor(input_disp).to(device)[None, None, ...] + t_depth_zero_mean_depth = torch.FloatTensor(input_zero_mean_depth).to(device)[None, None, ...] + + depth_edge_output = depth_edge_model.forward_3P(t_mask, t_context, t_rgb, t_disp, t_edge, unit_length=128, + cuda=device) + t_output_edge = (depth_edge_output> config['ext_edge_threshold']).float() * t_mask + t_edge + output_raw_edge = t_output_edge.data.cpu().numpy().squeeze() + # import pdb; pdb.set_trace() + mesh = netx.Graph() + hxs, hys = np.where(output_raw_edge * mask > 0) + valid_map = mask + context + for hx, hy in zip(hxs, hys): + node = (hx, hy) + mesh.add_node((hx, hy)) + eight_nes = [ne for ne in [(hx + 1, hy), (hx - 1, hy), (hx, hy + 1), (hx, hy - 1), \ + (hx + 1, hy + 1), (hx - 1, hy - 1), (hx - 1, hy + 1), (hx + 1, hy - 1)]\ + if 0 <= ne[0] < output_raw_edge.shape[0] and 0 <= ne[1] < output_raw_edge.shape[1] and 0 < output_raw_edge[ne[0], ne[1]]] + for ne in eight_nes: + mesh.add_edge(node, ne, length=np.hypot(ne[0] - hx, ne[1] - hy)) + if end_depth_maps[ne[0], ne[1]] != 0: + mesh.nodes[ne[0], ne[1]]['cnt'] = True + mesh.nodes[ne[0], ne[1]]['depth'] = end_depth_maps[ne[0], ne[1]] + ccs = [*netx.connected_components(mesh)] + end_pts = [] + for cc in ccs: + end_pts.append(set()) + for node in cc: + if mesh.nodes[node].get('cnt') is not None: + end_pts[-1].add((node[0], node[1], mesh.nodes[node]['depth'])) + fpath_map = np.zeros_like(output_raw_edge) - 1 + npath_map = np.zeros_like(output_raw_edge) - 1 + for end_pt, cc in zip(end_pts, ccs): + sorted_end_pt = [] + if len(end_pt) >= 2: + continue + if len(end_pt) == 0: + continue + if len(end_pt) == 1: + sub_mesh = mesh.subgraph(list(cc)).copy() + pnodes = netx.periphery(sub_mesh) + ends = [*end_pt] + edge_id = global_mesh.nodes[(ends[0][0] + all_anchor[0], ends[0][1] + all_anchor[2], -ends[0][2])]['edge_id'] + pnodes = sorted(pnodes, + key=lambda x: np.hypot((x[0] - ends[0][0]), (x[1] - ends[0][1])), + reverse=True)[0] + npath = [*netx.shortest_path(sub_mesh, (ends[0][0], ends[0][1]), pnodes, weight='length')] + for np_node in npath: + npath_map[np_node[0], np_node[1]] = edge_id + fpath = [] + if global_mesh.nodes[(ends[0][0] + all_anchor[0], ends[0][1] + all_anchor[2], -ends[0][2])].get('far') is None: + print("None far") + import pdb; pdb.set_trace() + else: + fnodes = global_mesh.nodes[(ends[0][0] + all_anchor[0], ends[0][1] + all_anchor[2], -ends[0][2])].get('far') + fnodes = [(xx[0] - all_anchor[0], xx[1] - all_anchor[2], xx[2]) for xx in fnodes] + dmask = mask + 0 + did = 0 + while True: + did += 1 + dmask = cv2.dilate(dmask, np.ones((3, 3)), iterations=1) + if did > 3: + break + # ffnode = [fnode for fnode in fnodes if (dmask[fnode[0], fnode[1]] > 0)] + ffnode = [fnode for fnode in fnodes if (dmask[fnode[0], fnode[1]] > 0 and mask[fnode[0], fnode[1]] == 0)] + if len(ffnode) > 0: + fnode = ffnode[0] + break + if len(ffnode) == 0: + continue + fpath.append((fnode[0], fnode[1])) + for step in range(0, len(npath) - 1): + parr = (npath[step + 1][0] - npath[step][0], npath[step + 1][1] - npath[step][1]) + new_loc = (fpath[-1][0] + parr[0], fpath[-1][1] + parr[1]) + new_loc_nes = [xx for xx in [(new_loc[0] + 1, new_loc[1]), (new_loc[0] - 1, new_loc[1]), + (new_loc[0], new_loc[1] + 1), (new_loc[0], new_loc[1] - 1)]\ + if xx[0] >= 0 and xx[0] < fpath_map.shape[0] and xx[1] >= 0 and xx[1] < fpath_map.shape[1]] + if np.sum([fpath_map[nlne[0], nlne[1]] for nlne in new_loc_nes]) != -4: + break + if npath_map[new_loc[0], new_loc[1]] != -1: + if npath_map[new_loc[0], new_loc[1]] != edge_id: + break + else: + continue + if valid_area[new_loc[0], new_loc[1]] == 0: + break + new_loc_nes_eight = [xx for xx in [(new_loc[0] + 1, new_loc[1]), (new_loc[0] - 1, new_loc[1]), + (new_loc[0], new_loc[1] + 1), (new_loc[0], new_loc[1] - 1), + (new_loc[0] + 1, new_loc[1] + 1), (new_loc[0] + 1, new_loc[1] - 1), + (new_loc[0] - 1, new_loc[1] - 1), (new_loc[0] - 1, new_loc[1] + 1)]\ + if xx[0] >= 0 and xx[0] < fpath_map.shape[0] and xx[1] >= 0 and xx[1] < fpath_map.shape[1]] + if np.sum([int(npath_map[nlne[0], nlne[1]] == edge_id) for nlne in new_loc_nes_eight]) == 0: + break + fpath.append((fpath[-1][0] + parr[0], fpath[-1][1] + parr[1])) + if step != len(npath) - 2: + for xx in npath[step+1:]: + if npath_map[xx[0], xx[1]] == edge_id: + npath_map[xx[0], xx[1]] = -1 + if len(fpath) > 0: + for fp_node in fpath: + fpath_map[fp_node[0], fp_node[1]] = edge_id + # import pdb; pdb.set_trace() + far_edge = (fpath_map > -1).astype(np.uint8) + update_edge = (npath_map > -1) * mask + edge + t_update_edge = torch.FloatTensor(update_edge).to(device)[None, None, ...] + depth_output = depth_feat_model.forward_3P(t_mask, t_context, t_depth_zero_mean_depth, t_update_edge, unit_length=128, + cuda=device) + depth_output = depth_output.cpu().data.numpy().squeeze() + depth_output = np.exp(depth_output + input_mean_depth) * mask # + input_depth * context + # if "right" in direc.lower() and "-" not in direc.lower(): + # plt.imshow(depth_output); plt.show() + # import pdb; pdb.set_trace() + # f, ((ax1, ax2)) = plt.subplots(1, 2, sharex=True, sharey=True); ax1.imshow(depth_output); ax2.imshow(npath_map + fpath_map); plt.show() + for near_id in np.unique(npath_map[npath_map > -1]): + depth_output = refine_depth_around_edge(depth_output.copy(), + (fpath_map == near_id).astype(np.uint8) * mask, # far_edge_map_in_mask, + (fpath_map == near_id).astype(np.uint8), # far_edge_map, + (npath_map == near_id).astype(np.uint8) * mask, + mask.copy(), + np.zeros_like(mask), + config) + # if "right" in direc.lower() and "-" not in direc.lower(): + # plt.imshow(depth_output); plt.show() + # import pdb; pdb.set_trace() + # f, ((ax1, ax2)) = plt.subplots(1, 2, sharex=True, sharey=True); ax1.imshow(depth_output); ax2.imshow(npath_map + fpath_map); plt.show() + rgb_output = rgb_feat_model.forward_3P(t_mask, t_context, t_rgb, t_update_edge, unit_length=128, + cuda=device) + + # rgb_output = rgb_feat_model.forward_3P(t_mask, t_context, t_rgb, t_update_edge, unit_length=128, cuda=config['gpu_ids']) + if config.get('gray_image') is True: + rgb_output = rgb_output.mean(1, keepdim=True).repeat((1,3,1,1)) + rgb_output = ((rgb_output.squeeze().data.cpu().permute(1,2,0).numpy() * mask[..., None] + input_rgb) * 255).astype(np.uint8) + image[all_anchor[0]:all_anchor[1], all_anchor[2]:all_anchor[3]][mask > 0] = rgb_output[mask > 0] # np.array([255,0,0]) # rgb_output[mask > 0] + depth[all_anchor[0]:all_anchor[1], all_anchor[2]:all_anchor[3]][mask > 0] = depth_output[mask > 0] + # nxs, nys = np.where(mask > -1) + # for nx, ny in zip(nxs, nys): + # info_on_pix[(nx, ny)][0]['color'] = rgb_output[] + + + nxs, nys = np.where((npath_map > -1)) + for nx, ny in zip(nxs, nys): + n_id = npath_map[nx, ny] + four_nes = [xx for xx in [(nx + 1, ny), (nx - 1, ny), (nx, ny + 1), (nx, ny - 1)]\ + if 0 <= xx[0] < fpath_map.shape[0] and 0 <= xx[1] < fpath_map.shape[1]] + for nex, ney in four_nes: + if fpath_map[nex, ney] == n_id: + na, nb = (nx + all_anchor[0], ny + all_anchor[2], info_on_pix[(nx + all_anchor[0], ny + all_anchor[2])][0]['depth']), \ + (nex + all_anchor[0], ney + all_anchor[2], info_on_pix[(nex + all_anchor[0], ney + all_anchor[2])][0]['depth']) + if global_mesh.has_edge(na, nb): + global_mesh.remove_edge(na, nb) + nxs, nys = np.where((fpath_map > -1)) + for nx, ny in zip(nxs, nys): + n_id = fpath_map[nx, ny] + four_nes = [xx for xx in [(nx + 1, ny), (nx - 1, ny), (nx, ny + 1), (nx, ny - 1)]\ + if 0 <= xx[0] < npath_map.shape[0] and 0 <= xx[1] < npath_map.shape[1]] + for nex, ney in four_nes: + if npath_map[nex, ney] == n_id: + na, nb = (nx + all_anchor[0], ny + all_anchor[2], info_on_pix[(nx + all_anchor[0], ny + all_anchor[2])][0]['depth']), \ + (nex + all_anchor[0], ney + all_anchor[2], info_on_pix[(nex + all_anchor[0], ney + all_anchor[2])][0]['depth']) + if global_mesh.has_edge(na, nb): + global_mesh.remove_edge(na, nb) + nxs, nys = np.where(mask > 0) + for x, y in zip(nxs, nys): + x = x + all_anchor[0] + y = y + all_anchor[2] + cur_node = (x, y, 0) + new_node = (x, y, -abs(depth[x, y])) + disp = 1. / -abs(depth[x, y]) + mapping_dict = {cur_node: new_node} + info_on_pix, global_mesh = update_info(mapping_dict, info_on_pix, global_mesh) + global_mesh.nodes[new_node]['color'] = image[x, y] + global_mesh.nodes[new_node]['old_color'] = image[x, y] + global_mesh.nodes[new_node]['disp'] = disp + info_on_pix[(x, y)][0]['depth'] = -abs(depth[x, y]) + info_on_pix[(x, y)][0]['disp'] = disp + info_on_pix[(x, y)][0]['color'] = image[x, y] + + + nxs, nys = np.where((npath_map > -1)) + for nx, ny in zip(nxs, nys): + self_node = (nx + all_anchor[0], ny + all_anchor[2], info_on_pix[(nx + all_anchor[0], ny + all_anchor[2])][0]['depth']) + if global_mesh.has_node(self_node) is False: + break + n_id = int(round(npath_map[nx, ny])) + four_nes = [xx for xx in [(nx + 1, ny), (nx - 1, ny), (nx, ny + 1), (nx, ny - 1)]\ + if 0 <= xx[0] < fpath_map.shape[0] and 0 <= xx[1] < fpath_map.shape[1]] + for nex, ney in four_nes: + ne_node = (nex + all_anchor[0], ney + all_anchor[2], info_on_pix[(nex + all_anchor[0], ney + all_anchor[2])][0]['depth']) + if global_mesh.has_node(ne_node) is False: + continue + if fpath_map[nex, ney] == n_id: + if global_mesh.nodes[self_node].get('edge_id') is None: + global_mesh.nodes[self_node]['edge_id'] = n_id + edge_ccs[n_id].add(self_node) + info_on_pix[(self_node[0], self_node[1])][0]['edge_id'] = n_id + if global_mesh.has_edge(self_node, ne_node) is True: + global_mesh.remove_edge(self_node, ne_node) + if global_mesh.nodes[self_node].get('far') is None: + global_mesh.nodes[self_node]['far'] = [] + global_mesh.nodes[self_node]['far'].append(ne_node) + + global_fpath_map = np.zeros_like(other_edge_with_id) - 1 + global_fpath_map[all_anchor[0]:all_anchor[1], all_anchor[2]:all_anchor[3]] = fpath_map + fpath_ids = np.unique(global_fpath_map) + fpath_ids = fpath_ids[1:] if fpath_ids.shape[0] > 0 and fpath_ids[0] == -1 else [] + fpath_real_id_map = np.zeros_like(global_fpath_map) - 1 + for fpath_id in fpath_ids: + fpath_real_id = np.unique(((global_fpath_map == fpath_id).astype(int) * (other_edge_with_id + 1)) - 1) + fpath_real_id = fpath_real_id[1:] if fpath_real_id.shape[0] > 0 and fpath_real_id[0] == -1 else [] + fpath_real_id = fpath_real_id.astype(int) + fpath_real_id = np.bincount(fpath_real_id).argmax() + fpath_real_id_map[global_fpath_map == fpath_id] = fpath_real_id + nxs, nys = np.where((fpath_map > -1)) + for nx, ny in zip(nxs, nys): + self_node = (nx + all_anchor[0], ny + all_anchor[2], info_on_pix[(nx + all_anchor[0], ny + all_anchor[2])][0]['depth']) + n_id = fpath_map[nx, ny] + four_nes = [xx for xx in [(nx + 1, ny), (nx - 1, ny), (nx, ny + 1), (nx, ny - 1)]\ + if 0 <= xx[0] < npath_map.shape[0] and 0 <= xx[1] < npath_map.shape[1]] + for nex, ney in four_nes: + ne_node = (nex + all_anchor[0], ney + all_anchor[2], info_on_pix[(nex + all_anchor[0], ney + all_anchor[2])][0]['depth']) + if global_mesh.has_node(ne_node) is False: + continue + if npath_map[nex, ney] == n_id or global_mesh.nodes[ne_node].get('edge_id') == n_id: + if global_mesh.has_edge(self_node, ne_node) is True: + global_mesh.remove_edge(self_node, ne_node) + if global_mesh.nodes[self_node].get('near') is None: + global_mesh.nodes[self_node]['near'] = [] + if global_mesh.nodes[self_node].get('edge_id') is None: + f_id = int(round(fpath_real_id_map[self_node[0], self_node[1]])) + global_mesh.nodes[self_node]['edge_id'] = f_id + info_on_pix[(self_node[0], self_node[1])][0]['edge_id'] = f_id + edge_ccs[f_id].add(self_node) + global_mesh.nodes[self_node]['near'].append(ne_node) + + return info_on_pix, global_mesh, image, depth, edge_ccs + # for edge_cc in edge_ccs: + # for edge_node in edge_cc: + # edge_ccs + # context_ccs, mask_ccs, broken_mask_ccs, edge_ccs, erode_context_ccs, init_mask_connect, edge_maps, extend_context_ccs, extend_edge_ccs + +def get_valid_size(imap): + x_max = np.where(imap.sum(1).squeeze() > 0)[0].max() + 1 + x_min = np.where(imap.sum(1).squeeze() > 0)[0].min() + y_max = np.where(imap.sum(0).squeeze() > 0)[0].max() + 1 + y_min = np.where(imap.sum(0).squeeze() > 0)[0].min() + size_dict = {'x_max':x_max, 'y_max':y_max, 'x_min':x_min, 'y_min':y_min} + + return size_dict + +def dilate_valid_size(isize_dict, imap, dilate=[0, 0]): + osize_dict = copy.deepcopy(isize_dict) + osize_dict['x_min'] = max(0, osize_dict['x_min'] - dilate[0]) + osize_dict['x_max'] = min(imap.shape[0], osize_dict['x_max'] + dilate[0]) + osize_dict['y_min'] = max(0, osize_dict['y_min'] - dilate[0]) + osize_dict['y_max'] = min(imap.shape[1], osize_dict['y_max'] + dilate[1]) + + return osize_dict + +def size_operation(size_a, size_b, operation): + assert operation == '+' or operation == '-', "Operation must be '+' (union) or '-' (exclude)" + osize = {} + if operation == '+': + osize['x_min'] = min(size_a['x_min'], size_b['x_min']) + osize['y_min'] = min(size_a['y_min'], size_b['y_min']) + osize['x_max'] = max(size_a['x_max'], size_b['x_max']) + osize['y_max'] = max(size_a['y_max'], size_b['y_max']) + assert operation != '-', "Operation '-' is undefined !" + + return osize + +def fill_dummy_bord(mesh, info_on_pix, image, depth, config): + context = np.zeros_like(depth).astype(np.uint8) + context[mesh.graph['hoffset']:mesh.graph['hoffset'] + mesh.graph['noext_H'], + mesh.graph['woffset']:mesh.graph['woffset'] + mesh.graph['noext_W']] = 1 + mask = 1 - context + xs, ys = np.where(mask > 0) + depth = depth * context + image = image * context[..., None] + cur_depth = 0 + cur_disp = 0 + color = [0, 0, 0] + for x, y in zip(xs, ys): + cur_node = (x, y, cur_depth) + mesh.add_node(cur_node, color=color, + synthesis=False, + disp=cur_disp, + cc_id=set(), + ext_pixel=True) + info_on_pix[(x, y)] = [{'depth':cur_depth, + 'color':mesh.nodes[(x, y, cur_depth)]['color'], + 'synthesis':False, + 'disp':mesh.nodes[cur_node]['disp'], + 'ext_pixel':True}] + # for x, y in zip(xs, ys): + four_nes = [(xx, yy) for xx, yy in [(x + 1, y), (x - 1, y), (x, y + 1), (x, y - 1)] if\ + 0 <= x < mesh.graph['H'] and 0 <= y < mesh.graph['W'] and info_on_pix.get((xx, yy)) is not None] + for ne in four_nes: + # if (ne[0] - x) + (ne[1] - y) == 1 and info_on_pix.get((ne[0], ne[1])) is not None: + mesh.add_edge(cur_node, (ne[0], ne[1], info_on_pix[(ne[0], ne[1])][0]['depth'])) + + return mesh, info_on_pix + + +def enlarge_border(mesh, info_on_pix, depth, image, config): + mesh.graph['hoffset'], mesh.graph['woffset'] = config['extrapolation_thickness'], config['extrapolation_thickness'] + mesh.graph['bord_up'], mesh.graph['bord_left'], mesh.graph['bord_down'], mesh.graph['bord_right'] = \ + 0, 0, mesh.graph['H'], mesh.graph['W'] + # new_image = np.pad(image, + # pad_width=((config['extrapolation_thickness'], config['extrapolation_thickness']), + # (config['extrapolation_thickness'], config['extrapolation_thickness']), (0, 0)), + # mode='constant') + # new_depth = np.pad(depth, + # pad_width=((config['extrapolation_thickness'], config['extrapolation_thickness']), + # (config['extrapolation_thickness'], config['extrapolation_thickness'])), + # mode='constant') + + return mesh, info_on_pix, depth, image + +def fill_missing_node(mesh, info_on_pix, image, depth): + for x in range(mesh.graph['bord_up'], mesh.graph['bord_down']): + for y in range(mesh.graph['bord_left'], mesh.graph['bord_right']): + if info_on_pix.get((x, y)) is None: + print("fill missing node = ", x, y) + #import pdb; pdb.set_trace() + re_depth, re_count = 0, 0 + for ne in [(x + 1, y), (x - 1, y), (x, y + 1), (x, y - 1)]: + if info_on_pix.get(ne) is not None: + re_depth += info_on_pix[ne][0]['depth'] + re_count += 1 + if re_count == 0: + re_depth = -abs(depth[x, y]) + else: + re_depth = re_depth / re_count + depth[x, y] = abs(re_depth) + info_on_pix[(x, y)] = [{'depth':re_depth, + 'color':image[x, y], + 'synthesis':False, + 'disp':1./re_depth}] + mesh.add_node((x, y, re_depth), color=image[x, y], + synthesis=False, + disp=1./re_depth, + cc_id=set()) + return mesh, info_on_pix, depth + + + +def refresh_bord_depth(mesh, info_on_pix, image, depth): + H, W = mesh.graph['H'], mesh.graph['W'] + corner_nodes = [(mesh.graph['bord_up'], mesh.graph['bord_left']), + (mesh.graph['bord_up'], mesh.graph['bord_right'] - 1), + (mesh.graph['bord_down'] - 1, mesh.graph['bord_left']), + (mesh.graph['bord_down'] - 1, mesh.graph['bord_right'] - 1)] + # (0, W - 1), (H - 1, 0), (H - 1, W - 1)] + bord_nodes = [] + bord_nodes += [(mesh.graph['bord_up'], xx) for xx in range(mesh.graph['bord_left'] + 1, mesh.graph['bord_right'] - 1)] + bord_nodes += [(mesh.graph['bord_down'] - 1, xx) for xx in range(mesh.graph['bord_left'] + 1, mesh.graph['bord_right'] - 1)] + bord_nodes += [(xx, mesh.graph['bord_left']) for xx in range(mesh.graph['bord_up'] + 1, mesh.graph['bord_down'] - 1)] + bord_nodes += [(xx, mesh.graph['bord_right'] - 1) for xx in range(mesh.graph['bord_up'] + 1, mesh.graph['bord_down'] - 1)] + for xy in bord_nodes: + tgt_loc = None + if xy[0] == mesh.graph['bord_up']: + tgt_loc = (xy[0] + 1, xy[1])# (1, xy[1]) + elif xy[0] == mesh.graph['bord_down'] - 1: + tgt_loc = (xy[0] - 1, xy[1]) # (H - 2, xy[1]) + elif xy[1] == mesh.graph['bord_left']: + tgt_loc = (xy[0], xy[1] + 1) + elif xy[1] == mesh.graph['bord_right'] - 1: + tgt_loc = (xy[0], xy[1] - 1) + if tgt_loc is not None: + ne_infos = info_on_pix.get(tgt_loc) + if ne_infos is None: + import pdb; pdb.set_trace() + # if ne_infos is not None and len(ne_infos) == 1: + tgt_depth = ne_infos[0]['depth'] + tgt_disp = ne_infos[0]['disp'] + new_node = (xy[0], xy[1], tgt_depth) + src_node = (tgt_loc[0], tgt_loc[1], tgt_depth) + tgt_nes_loc = [(xx[0], xx[1]) \ + for xx in mesh.neighbors(src_node)] + tgt_nes_loc = [(xx[0] - tgt_loc[0] + xy[0], xx[1] - tgt_loc[1] + xy[1]) for xx in tgt_nes_loc \ + if abs(xx[0] - xy[0]) == 1 and abs(xx[1] - xy[1]) == 1] + tgt_nes_loc = [xx for xx in tgt_nes_loc if info_on_pix.get(xx) is not None] + tgt_nes_loc.append(tgt_loc) + # if (xy[0], xy[1]) == (559, 60): + # import pdb; pdb.set_trace() + if info_on_pix.get(xy) is not None and len(info_on_pix.get(xy)) > 0: + old_depth = info_on_pix[xy][0].get('depth') + old_node = (xy[0], xy[1], old_depth) + mesh.remove_edges_from([(old_ne, old_node) for old_ne in mesh.neighbors(old_node)]) + mesh.add_edges_from([((zz[0], zz[1], info_on_pix[zz][0]['depth']), old_node) for zz in tgt_nes_loc]) + mapping_dict = {old_node: new_node} + # if old_node[2] == new_node[2]: + # print("mapping_dict = ", mapping_dict) + info_on_pix, mesh = update_info(mapping_dict, info_on_pix, mesh) + else: + info_on_pix[xy] = [] + info_on_pix[xy][0] = info_on_pix[tgt_loc][0] + info_on_pix['color'] = image[xy[0], xy[1]] + info_on_pix['old_color'] = image[xy[0], xy[1]] + mesh.add_node(new_node) + mesh.add_edges_from([((zz[0], zz[1], info_on_pix[zz][0]['depth']), new_node) for zz in tgt_nes_loc]) + mesh.nodes[new_node]['far'] = None + mesh.nodes[new_node]['near'] = None + if mesh.nodes[src_node].get('far') is not None: + redundant_nodes = [ne for ne in mesh.nodes[src_node]['far'] if (ne[0], ne[1]) == xy] + [mesh.nodes[src_node]['far'].remove(aa) for aa in redundant_nodes] + if mesh.nodes[src_node].get('near') is not None: + redundant_nodes = [ne for ne in mesh.nodes[src_node]['near'] if (ne[0], ne[1]) == xy] + [mesh.nodes[src_node]['near'].remove(aa) for aa in redundant_nodes] + for xy in corner_nodes: + hx, hy = xy + four_nes = [xx for xx in [(hx + 1, hy), (hx - 1, hy), (hx, hy + 1), (hx, hy - 1)] if \ + mesh.graph['bord_up'] <= xx[0] < mesh.graph['bord_down'] and \ + mesh.graph['bord_left'] <= xx[1] < mesh.graph['bord_right']] + ne_nodes = [] + ne_depths = [] + for ne_loc in four_nes: + if info_on_pix.get(ne_loc) is not None: + ne_depths.append(info_on_pix[ne_loc][0]['depth']) + ne_nodes.append((ne_loc[0], ne_loc[1], info_on_pix[ne_loc][0]['depth'])) + new_node = (xy[0], xy[1], float(np.mean(ne_depths))) + if info_on_pix.get(xy) is not None and len(info_on_pix.get(xy)) > 0: + old_depth = info_on_pix[xy][0].get('depth') + old_node = (xy[0], xy[1], old_depth) + mesh.remove_edges_from([(old_ne, old_node) for old_ne in mesh.neighbors(old_node)]) + mesh.add_edges_from([(zz, old_node) for zz in ne_nodes]) + mapping_dict = {old_node: new_node} + info_on_pix, mesh = update_info(mapping_dict, info_on_pix, mesh) + else: + info_on_pix[xy] = [] + info_on_pix[xy][0] = info_on_pix[ne_loc[-1]][0] + info_on_pix['color'] = image[xy[0], xy[1]] + info_on_pix['old_color'] = image[xy[0], xy[1]] + mesh.add_node(new_node) + mesh.add_edges_from([(zz, new_node) for zz in ne_nodes]) + mesh.nodes[new_node]['far'] = None + mesh.nodes[new_node]['near'] = None + for xy in bord_nodes + corner_nodes: + # if (xy[0], xy[1]) == (559, 60): + # import pdb; pdb.set_trace() + depth[xy[0], xy[1]] = abs(info_on_pix[xy][0]['depth']) + for xy in bord_nodes: + cur_node = (xy[0], xy[1], info_on_pix[xy][0]['depth']) + nes = mesh.neighbors(cur_node) + four_nes = set([(xy[0] + 1, xy[1]), (xy[0] - 1, xy[1]), (xy[0], xy[1] + 1), (xy[0], xy[1] - 1)]) - \ + set([(ne[0], ne[1]) for ne in nes]) + four_nes = [ne for ne in four_nes if mesh.graph['bord_up'] <= ne[0] < mesh.graph['bord_down'] and \ + mesh.graph['bord_left'] <= ne[1] < mesh.graph['bord_right']] + four_nes = [(ne[0], ne[1], info_on_pix[(ne[0], ne[1])][0]['depth']) for ne in four_nes] + mesh.nodes[cur_node]['far'] = [] + mesh.nodes[cur_node]['near'] = [] + for ne in four_nes: + if abs(ne[2]) >= abs(cur_node[2]): + mesh.nodes[cur_node]['far'].append(ne) + else: + mesh.nodes[cur_node]['near'].append(ne) + + return mesh, info_on_pix, depth + +def get_union_size(mesh, dilate, *alls_cc): + all_cc = reduce(lambda x, y: x | y, [set()] + [*alls_cc]) + min_x, min_y, max_x, max_y = mesh.graph['H'], mesh.graph['W'], 0, 0 + H, W = mesh.graph['H'], mesh.graph['W'] + for node in all_cc: + if node[0] < min_x: + min_x = node[0] + if node[0] > max_x: + max_x = node[0] + if node[1] < min_y: + min_y = node[1] + if node[1] > max_y: + max_y = node[1] + max_x = max_x + 1 + max_y = max_y + 1 + # mask_size = dilate_valid_size(mask_size, edge_dict['mask'], dilate=[20, 20]) + osize_dict = dict() + osize_dict['x_min'] = max(0, min_x - dilate[0]) + osize_dict['x_max'] = min(H, max_x + dilate[0]) + osize_dict['y_min'] = max(0, min_y - dilate[1]) + osize_dict['y_max'] = min(W, max_y + dilate[1]) + + return osize_dict + +def incomplete_node(mesh, edge_maps, info_on_pix): + vis_map = np.zeros((mesh.graph['H'], mesh.graph['W'])) + + for node in mesh.nodes: + if mesh.nodes[node].get('synthesis') is not True: + connect_all_flag = False + nes = [xx for xx in mesh.neighbors(node) if mesh.nodes[xx].get('synthesis') is not True] + if len(nes) < 3 and 0 < node[0] < mesh.graph['H'] - 1 and 0 < node[1] < mesh.graph['W'] - 1: + if len(nes) <= 1: + connect_all_flag = True + else: + dan_ne_node_a = nes[0] + dan_ne_node_b = nes[1] + if abs(dan_ne_node_a[0] - dan_ne_node_b[0]) > 1 or \ + abs(dan_ne_node_a[1] - dan_ne_node_b[1]) > 1: + connect_all_flag = True + if connect_all_flag == True: + vis_map[node[0], node[1]] = len(nes) + four_nes = [(node[0] - 1, node[1]), (node[0] + 1, node[1]), (node[0], node[1] - 1), (node[0], node[1] + 1)] + for ne in four_nes: + for info in info_on_pix[(ne[0], ne[1])]: + ne_node = (ne[0], ne[1], info['depth']) + if info.get('synthesis') is not True and mesh.has_node(ne_node): + mesh.add_edge(node, ne_node) + break + + return mesh + +def edge_inpainting(edge_id, context_cc, erode_context_cc, mask_cc, edge_cc, extend_edge_cc, + mesh, edge_map, edge_maps_with_id, config, union_size, depth_edge_model, inpaint_iter): + edge_dict = get_edge_from_nodes(context_cc, erode_context_cc, mask_cc, edge_cc, extend_edge_cc, + mesh.graph['H'], mesh.graph['W'], mesh) + edge_dict['edge'], end_depth_maps, _ = \ + filter_irrelevant_edge_new(edge_dict['self_edge'] + edge_dict['comp_edge'], + edge_map, + edge_maps_with_id, + edge_id, + edge_dict['context'], + edge_dict['depth'], mesh, context_cc | erode_context_cc, spdb=True) + patch_edge_dict = dict() + patch_edge_dict['mask'], patch_edge_dict['context'], patch_edge_dict['rgb'], \ + patch_edge_dict['disp'], patch_edge_dict['edge'] = \ + crop_maps_by_size(union_size, edge_dict['mask'], edge_dict['context'], + edge_dict['rgb'], edge_dict['disp'], edge_dict['edge']) + tensor_edge_dict = convert2tensor(patch_edge_dict) + if require_depth_edge(patch_edge_dict['edge'], patch_edge_dict['mask']) and inpaint_iter == 0: + with torch.no_grad(): + device = config["gpu_ids"] if isinstance(config["gpu_ids"], int) and config["gpu_ids"] >= 0 else "cpu" + depth_edge_output = depth_edge_model.forward_3P(tensor_edge_dict['mask'], + tensor_edge_dict['context'], + tensor_edge_dict['rgb'], + tensor_edge_dict['disp'], + tensor_edge_dict['edge'], + unit_length=128, + cuda=device) + depth_edge_output = depth_edge_output.cpu() + tensor_edge_dict['output'] = (depth_edge_output > config['ext_edge_threshold']).float() * tensor_edge_dict['mask'] + tensor_edge_dict['edge'] + else: + tensor_edge_dict['output'] = tensor_edge_dict['edge'] + depth_edge_output = tensor_edge_dict['edge'] + 0 + patch_edge_dict['output'] = tensor_edge_dict['output'].squeeze().data.cpu().numpy() + edge_dict['output'] = np.zeros((mesh.graph['H'], mesh.graph['W'])) + edge_dict['output'][union_size['x_min']:union_size['x_max'], union_size['y_min']:union_size['y_max']] = \ + patch_edge_dict['output'] + + return edge_dict, end_depth_maps + +def depth_inpainting(context_cc, extend_context_cc, erode_context_cc, mask_cc, mesh, config, union_size, depth_feat_model, edge_output, given_depth_dict=False, spdb=False): + if given_depth_dict is False: + depth_dict = get_depth_from_nodes(context_cc | extend_context_cc, erode_context_cc, mask_cc, mesh.graph['H'], mesh.graph['W'], mesh, config['log_depth']) + if edge_output is not None: + depth_dict['edge'] = edge_output + else: + depth_dict = given_depth_dict + patch_depth_dict = dict() + patch_depth_dict['mask'], patch_depth_dict['context'], patch_depth_dict['depth'], \ + patch_depth_dict['zero_mean_depth'], patch_depth_dict['edge'] = \ + crop_maps_by_size(union_size, depth_dict['mask'], depth_dict['context'], + depth_dict['real_depth'], depth_dict['zero_mean_depth'], depth_dict['edge']) + tensor_depth_dict = convert2tensor(patch_depth_dict) + resize_mask = open_small_mask(tensor_depth_dict['mask'], tensor_depth_dict['context'], 3, 41) + with torch.no_grad(): + device = config["gpu_ids"] if isinstance(config["gpu_ids"], int) and config["gpu_ids"] >= 0 else "cpu" + depth_output = depth_feat_model.forward_3P(resize_mask, + tensor_depth_dict['context'], + tensor_depth_dict['zero_mean_depth'], + tensor_depth_dict['edge'], + unit_length=128, + cuda=device) + depth_output = depth_output.cpu() + tensor_depth_dict['output'] = torch.exp(depth_output + depth_dict['mean_depth']) * \ + tensor_depth_dict['mask'] + tensor_depth_dict['depth'] + patch_depth_dict['output'] = tensor_depth_dict['output'].data.cpu().numpy().squeeze() + depth_dict['output'] = np.zeros((mesh.graph['H'], mesh.graph['W'])) + depth_dict['output'][union_size['x_min']:union_size['x_max'], union_size['y_min']:union_size['y_max']] = \ + patch_depth_dict['output'] + depth_output = depth_dict['output'] * depth_dict['mask'] + depth_dict['depth'] * depth_dict['context'] + depth_output = smooth_cntsyn_gap(depth_dict['output'].copy() * depth_dict['mask'] + depth_dict['depth'] * depth_dict['context'], + depth_dict['mask'], depth_dict['context'], + init_mask_region=depth_dict['mask']) + if spdb is True: + f, ((ax1, ax2)) = plt.subplots(1, 2, sharex=True, sharey=True); + ax1.imshow(depth_output * depth_dict['mask'] + depth_dict['depth']); ax2.imshow(depth_dict['output'] * depth_dict['mask'] + depth_dict['depth']); plt.show() + import pdb; pdb.set_trace() + depth_dict['output'] = depth_output * depth_dict['mask'] + depth_dict['depth'] * depth_dict['context'] + + return depth_dict + +def update_info(mapping_dict, info_on_pix, *meshes): + rt_meshes = [] + for mesh in meshes: + rt_meshes.append(relabel_node(mesh, mesh.nodes, [*mapping_dict.keys()][0], [*mapping_dict.values()][0])) + x, y, _ = [*mapping_dict.keys()][0] + info_on_pix[(x, y)][0]['depth'] = [*mapping_dict.values()][0][2] + + return [info_on_pix] + rt_meshes + +def build_connection(mesh, cur_node, dst_node): + if (abs(cur_node[0] - dst_node[0]) + abs(cur_node[1] - dst_node[1])) < 2: + mesh.add_edge(cur_node, dst_node) + if abs(cur_node[0] - dst_node[0]) > 1 or abs(cur_node[1] - dst_node[1]) > 1: + return mesh + ne_nodes = [*mesh.neighbors(cur_node)].copy() + for ne_node in ne_nodes: + if mesh.has_edge(ne_node, dst_node) or ne_node == dst_node: + continue + else: + mesh = build_connection(mesh, ne_node, dst_node) + + return mesh + +def recursive_add_edge(edge_mesh, mesh, info_on_pix, cur_node, mark): + ne_nodes = [(x[0], x[1]) for x in edge_mesh.neighbors(cur_node)] + for node_xy in ne_nodes: + node = (node_xy[0], node_xy[1], info_on_pix[node_xy][0]['depth']) + if mark[node[0], node[1]] != 3: + continue + else: + mark[node[0], node[1]] = 0 + mesh.remove_edges_from([(xx, node) for xx in mesh.neighbors(node)]) + mesh = build_connection(mesh, cur_node, node) + re_info = dict(depth=0, count=0) + for re_ne in mesh.neighbors(node): + re_info['depth'] += re_ne[2] + re_info['count'] += 1. + try: + re_depth = re_info['depth'] / re_info['count'] + except: + re_depth = node[2] + re_node = (node_xy[0], node_xy[1], re_depth) + mapping_dict = {node: re_node} + info_on_pix, edge_mesh, mesh = update_info(mapping_dict, info_on_pix, edge_mesh, mesh) + + edge_mesh, mesh, mark, info_on_pix = recursive_add_edge(edge_mesh, mesh, info_on_pix, re_node, mark) + + return edge_mesh, mesh, mark, info_on_pix + +def resize_for_edge(tensor_dict, largest_size): + resize_dict = {k: v.clone() for k, v in tensor_dict.items()} + frac = largest_size / np.array([*resize_dict['edge'].shape[-2:]]).max() + if frac < 1: + resize_mark = torch.nn.functional.interpolate(torch.cat((resize_dict['mask'], + resize_dict['context']), + dim=1), + scale_factor=frac, + mode='bilinear') + resize_dict['mask'] = (resize_mark[:, 0:1] > 0).float() + resize_dict['context'] = (resize_mark[:, 1:2] == 1).float() + resize_dict['context'][resize_dict['mask'] > 0] = 0 + resize_dict['edge'] = torch.nn.functional.interpolate(resize_dict['edge'], + scale_factor=frac, + mode='bilinear') + resize_dict['edge'] = (resize_dict['edge'] > 0).float() + resize_dict['edge'] = resize_dict['edge'] * resize_dict['context'] + resize_dict['disp'] = torch.nn.functional.interpolate(resize_dict['disp'], + scale_factor=frac, + mode='nearest') + resize_dict['disp'] = resize_dict['disp'] * resize_dict['context'] + resize_dict['rgb'] = torch.nn.functional.interpolate(resize_dict['rgb'], + scale_factor=frac, + mode='bilinear') + resize_dict['rgb'] = resize_dict['rgb'] * resize_dict['context'] + return resize_dict + +def get_map_from_nodes(nodes, height, width): + omap = np.zeros((height, width)) + for n in nodes: + omap[n[0], n[1]] = 1 + + return omap + +def get_map_from_ccs(ccs, height, width, condition_input=None, condition=None, real_id=False, id_shift=0): + if condition is None: + condition = lambda x, condition_input: True + + if real_id is True: + omap = np.zeros((height, width)) + (-1) + id_shift + else: + omap = np.zeros((height, width)) + for cc_id, cc in enumerate(ccs): + for n in cc: + if condition(n, condition_input): + if real_id is True: + omap[n[0], n[1]] = cc_id + id_shift + else: + omap[n[0], n[1]] = 1 + return omap + +def revise_map_by_nodes(nodes, imap, operation, limit_constr=None): + assert operation == '+' or operation == '-', "Operation must be '+' (union) or '-' (exclude)" + omap = copy.deepcopy(imap) + revise_flag = True + if operation == '+': + for n in nodes: + omap[n[0], n[1]] = 1 + if limit_constr is not None and omap.sum() > limit_constr: + omap = imap + revise_flag = False + elif operation == '-': + for n in nodes: + omap[n[0], n[1]] = 0 + if limit_constr is not None and omap.sum() < limit_constr: + omap = imap + revise_flag = False + + return omap, revise_flag + +def repaint_info(mesh, cc, x_anchor, y_anchor, source_type): + if source_type == 'rgb': + feat = np.zeros((3, x_anchor[1] - x_anchor[0], y_anchor[1] - y_anchor[0])) + else: + feat = np.zeros((1, x_anchor[1] - x_anchor[0], y_anchor[1] - y_anchor[0])) + for node in cc: + if source_type == 'rgb': + feat[:, node[0] - x_anchor[0], node[1] - y_anchor[0]] = np.array(mesh.nodes[node]['color']) / 255. + elif source_type == 'd': + feat[:, node[0] - x_anchor[0], node[1] - y_anchor[0]] = abs(node[2]) + + return feat + +def get_context_from_nodes(mesh, cc, H, W, source_type=''): + if 'rgb' in source_type or 'color' in source_type: + feat = np.zeros((H, W, 3)) + else: + feat = np.zeros((H, W)) + context = np.zeros((H, W)) + for node in cc: + if 'rgb' in source_type or 'color' in source_type: + feat[node[0], node[1]] = np.array(mesh.nodes[node]['color']) / 255. + context[node[0], node[1]] = 1 + else: + feat[node[0], node[1]] = abs(node[2]) + + return feat, context + +def get_mask_from_nodes(mesh, cc, H, W): + mask = np.zeros((H, W)) + for node in cc: + mask[node[0], node[1]] = abs(node[2]) + + return mask + + +def get_edge_from_nodes(context_cc, erode_context_cc, mask_cc, edge_cc, extend_edge_cc, H, W, mesh): + context = np.zeros((H, W)) + mask = np.zeros((H, W)) + rgb = np.zeros((H, W, 3)) + disp = np.zeros((H, W)) + depth = np.zeros((H, W)) + real_depth = np.zeros((H, W)) + edge = np.zeros((H, W)) + comp_edge = np.zeros((H, W)) + fpath_map = np.zeros((H, W)) - 1 + npath_map = np.zeros((H, W)) - 1 + near_depth = np.zeros((H, W)) + for node in context_cc: + rgb[node[0], node[1]] = np.array(mesh.nodes[node]['color']) + disp[node[0], node[1]] = mesh.nodes[node]['disp'] + depth[node[0], node[1]] = node[2] + context[node[0], node[1]] = 1 + for node in erode_context_cc: + rgb[node[0], node[1]] = np.array(mesh.nodes[node]['color']) + disp[node[0], node[1]] = mesh.nodes[node]['disp'] + depth[node[0], node[1]] = node[2] + context[node[0], node[1]] = 1 + rgb = rgb / 255. + disp = np.abs(disp) + disp = disp / disp.max() + real_depth = depth.copy() + for node in context_cc: + if mesh.nodes[node].get('real_depth') is not None: + real_depth[node[0], node[1]] = mesh.nodes[node]['real_depth'] + for node in erode_context_cc: + if mesh.nodes[node].get('real_depth') is not None: + real_depth[node[0], node[1]] = mesh.nodes[node]['real_depth'] + for node in mask_cc: + mask[node[0], node[1]] = 1 + near_depth[node[0], node[1]] = node[2] + for node in edge_cc: + edge[node[0], node[1]] = 1 + for node in extend_edge_cc: + comp_edge[node[0], node[1]] = 1 + rt_dict = {'rgb': rgb, 'disp': disp, 'depth': depth, 'real_depth': real_depth, 'self_edge': edge, 'context': context, + 'mask': mask, 'fpath_map': fpath_map, 'npath_map': npath_map, 'comp_edge': comp_edge, 'valid_area': context + mask, + 'near_depth': near_depth} + + return rt_dict + +def get_depth_from_maps(context_map, mask_map, depth_map, H, W, log_depth=False): + context = context_map.astype(np.uint8) + mask = mask_map.astype(np.uint8).copy() + depth = np.abs(depth_map) + real_depth = depth.copy() + zero_mean_depth = np.zeros((H, W)) + + if log_depth is True: + log_depth = np.log(real_depth + 1e-8) * context + mean_depth = np.mean(log_depth[context > 0]) + zero_mean_depth = (log_depth - mean_depth) * context + else: + zero_mean_depth = real_depth + mean_depth = 0 + edge = np.zeros_like(depth) + + rt_dict = {'depth': depth, 'real_depth': real_depth, 'context': context, 'mask': mask, + 'mean_depth': mean_depth, 'zero_mean_depth': zero_mean_depth, 'edge': edge} + + return rt_dict + +def get_depth_from_nodes(context_cc, erode_context_cc, mask_cc, H, W, mesh, log_depth=False): + context = np.zeros((H, W)) + mask = np.zeros((H, W)) + depth = np.zeros((H, W)) + real_depth = np.zeros((H, W)) + zero_mean_depth = np.zeros((H, W)) + for node in context_cc: + depth[node[0], node[1]] = node[2] + context[node[0], node[1]] = 1 + for node in erode_context_cc: + depth[node[0], node[1]] = node[2] + context[node[0], node[1]] = 1 + depth = np.abs(depth) + real_depth = depth.copy() + for node in context_cc: + if mesh.nodes[node].get('real_depth') is not None: + real_depth[node[0], node[1]] = mesh.nodes[node]['real_depth'] + for node in erode_context_cc: + if mesh.nodes[node].get('real_depth') is not None: + real_depth[node[0], node[1]] = mesh.nodes[node]['real_depth'] + real_depth = np.abs(real_depth) + for node in mask_cc: + mask[node[0], node[1]] = 1 + if log_depth is True: + log_depth = np.log(real_depth + 1e-8) * context + mean_depth = np.mean(log_depth[context > 0]) + zero_mean_depth = (log_depth - mean_depth) * context + else: + zero_mean_depth = real_depth + mean_depth = 0 + + rt_dict = {'depth': depth, 'real_depth': real_depth, 'context': context, 'mask': mask, + 'mean_depth': mean_depth, 'zero_mean_depth': zero_mean_depth} + + return rt_dict + +def get_rgb_from_nodes(context_cc, erode_context_cc, mask_cc, H, W, mesh): + context = np.zeros((H, W)) + mask = np.zeros((H, W)) + rgb = np.zeros((H, W, 3)) + erode_context = np.zeros((H, W)) + for node in context_cc: + rgb[node[0], node[1]] = np.array(mesh.nodes[node]['color']) + context[node[0], node[1]] = 1 + rgb = rgb / 255. + for node in mask_cc: + mask[node[0], node[1]] = 1 + for node in erode_context_cc: + erode_context[node[0], node[1]] = 1 + mask[node[0], node[1]] = 1 + rt_dict = {'rgb': rgb, 'context': context, 'mask': mask, + 'erode': erode_context} + + return rt_dict + +def crop_maps_by_size(size, *imaps): + omaps = [] + for imap in imaps: + omaps.append(imap[size['x_min']:size['x_max'], size['y_min']:size['y_max']].copy()) + + return omaps + +def convert2tensor(input_dict): + rt_dict = {} + for key, value in input_dict.items(): + if 'rgb' in key or 'color' in key: + rt_dict[key] = torch.FloatTensor(value).permute(2, 0, 1)[None, ...] + else: + rt_dict[key] = torch.FloatTensor(value)[None, None, ...] + + return rt_dict diff --git a/inpaint/networks.py b/inpaint/networks.py new file mode 100644 index 0000000000000000000000000000000000000000..358bf17dc9095bd0159ebd29f8cd3851484de68c --- /dev/null +++ b/inpaint/networks.py @@ -0,0 +1,501 @@ +import torch +import torch.nn as nn +import numpy as np +import matplotlib.pyplot as plt +import torch.nn.functional as F + + +class BaseNetwork(nn.Module): + def __init__(self): + super(BaseNetwork, self).__init__() + + def init_weights(self, init_type='normal', gain=0.02): + ''' + initialize network's weights + init_type: normal | xavier | kaiming | orthogonal + https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/9451e70673400885567d08a9e97ade2524c700d0/models/networks.py#L39 + ''' + + def init_func(m): + classname = m.__class__.__name__ + if hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1): + if init_type == 'normal': + nn.init.normal_(m.weight.data, 0.0, gain) + elif init_type == 'xavier': + nn.init.xavier_normal_(m.weight.data, gain=gain) + elif init_type == 'kaiming': + nn.init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') + elif init_type == 'orthogonal': + nn.init.orthogonal_(m.weight.data, gain=gain) + + if hasattr(m, 'bias') and m.bias is not None: + nn.init.constant_(m.bias.data, 0.0) + + elif classname.find('BatchNorm2d') != -1: + nn.init.normal_(m.weight.data, 1.0, gain) + nn.init.constant_(m.bias.data, 0.0) + + self.apply(init_func) + +def weights_init(init_type='gaussian'): + def init_fun(m): + classname = m.__class__.__name__ + if (classname.find('Conv') == 0 or classname.find( + 'Linear') == 0) and hasattr(m, 'weight'): + if init_type == 'gaussian': + nn.init.normal_(m.weight, 0.0, 0.02) + elif init_type == 'xavier': + nn.init.xavier_normal_(m.weight, gain=math.sqrt(2)) + elif init_type == 'kaiming': + nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in') + elif init_type == 'orthogonal': + nn.init.orthogonal_(m.weight, gain=math.sqrt(2)) + elif init_type == 'default': + pass + else: + assert 0, "Unsupported initialization: {}".format(init_type) + if hasattr(m, 'bias') and m.bias is not None: + nn.init.constant_(m.bias, 0.0) + + return init_fun + +class PartialConv(nn.Module): + def __init__(self, in_channels, out_channels, kernel_size, stride=1, + padding=0, dilation=1, groups=1, bias=True): + super().__init__() + self.input_conv = nn.Conv2d(in_channels, out_channels, kernel_size, + stride, padding, dilation, groups, bias) + self.mask_conv = nn.Conv2d(in_channels, out_channels, kernel_size, + stride, padding, dilation, groups, False) + self.input_conv.apply(weights_init('kaiming')) + self.slide_winsize = in_channels * kernel_size * kernel_size + + torch.nn.init.constant_(self.mask_conv.weight, 1.0) + + # mask is not updated + for param in self.mask_conv.parameters(): + param.requires_grad = False + + def forward(self, input, mask): + # http://masc.cs.gmu.edu/wiki/partialconv + # C(X) = W^T * X + b, C(0) = b, D(M) = 1 * M + 0 = sum(M) + # W^T* (M .* X) / sum(M) + b = [C(M .* X) – C(0)] / D(M) + C(0) + output = self.input_conv(input * mask) + if self.input_conv.bias is not None: + output_bias = self.input_conv.bias.view(1, -1, 1, 1).expand_as( + output) + else: + output_bias = torch.zeros_like(output) + + with torch.no_grad(): + output_mask = self.mask_conv(mask) + + no_update_holes = output_mask == 0 + + mask_sum = output_mask.masked_fill_(no_update_holes, 1.0) + + output_pre = ((output - output_bias) * self.slide_winsize) / mask_sum + output_bias + output = output_pre.masked_fill_(no_update_holes, 0.0) + + new_mask = torch.ones_like(output) + new_mask = new_mask.masked_fill_(no_update_holes, 0.0) + + return output, new_mask + + +class PCBActiv(nn.Module): + def __init__(self, in_ch, out_ch, bn=True, sample='none-3', activ='relu', + conv_bias=False): + super().__init__() + if sample == 'down-5': + self.conv = PartialConv(in_ch, out_ch, 5, 2, 2, bias=conv_bias) + elif sample == 'down-7': + self.conv = PartialConv(in_ch, out_ch, 7, 2, 3, bias=conv_bias) + elif sample == 'down-3': + self.conv = PartialConv(in_ch, out_ch, 3, 2, 1, bias=conv_bias) + else: + self.conv = PartialConv(in_ch, out_ch, 3, 1, 1, bias=conv_bias) + + if bn: + self.bn = nn.BatchNorm2d(out_ch) + if activ == 'relu': + self.activation = nn.ReLU() + elif activ == 'leaky': + self.activation = nn.LeakyReLU(negative_slope=0.2) + + def forward(self, input, input_mask): + h, h_mask = self.conv(input, input_mask) + if hasattr(self, 'bn'): + h = self.bn(h) + if hasattr(self, 'activation'): + h = self.activation(h) + return h, h_mask + +class Inpaint_Depth_Net(nn.Module): + def __init__(self, layer_size=7, upsampling_mode='nearest'): + super().__init__() + in_channels = 4 + out_channels = 1 + self.freeze_enc_bn = False + self.upsampling_mode = upsampling_mode + self.layer_size = layer_size + self.enc_1 = PCBActiv(in_channels, 64, bn=False, sample='down-7', conv_bias=True) + self.enc_2 = PCBActiv(64, 128, sample='down-5', conv_bias=True) + self.enc_3 = PCBActiv(128, 256, sample='down-5') + self.enc_4 = PCBActiv(256, 512, sample='down-3') + for i in range(4, self.layer_size): + name = 'enc_{:d}'.format(i + 1) + setattr(self, name, PCBActiv(512, 512, sample='down-3')) + + for i in range(4, self.layer_size): + name = 'dec_{:d}'.format(i + 1) + setattr(self, name, PCBActiv(512 + 512, 512, activ='leaky')) + self.dec_4 = PCBActiv(512 + 256, 256, activ='leaky') + self.dec_3 = PCBActiv(256 + 128, 128, activ='leaky') + self.dec_2 = PCBActiv(128 + 64, 64, activ='leaky') + self.dec_1 = PCBActiv(64 + in_channels, out_channels, + bn=False, activ=None, conv_bias=True) + def add_border(self, input, mask_flag, PCONV=True): + with torch.no_grad(): + h = input.shape[-2] + w = input.shape[-1] + require_len_unit = 2 ** self.layer_size + residual_h = int(np.ceil(h / float(require_len_unit)) * require_len_unit - h) # + 2*require_len_unit + residual_w = int(np.ceil(w / float(require_len_unit)) * require_len_unit - w) # + 2*require_len_unit + enlarge_input = torch.zeros((input.shape[0], input.shape[1], h + residual_h, w + residual_w)).to(input.device) + if mask_flag: + if PCONV is False: + enlarge_input += 1.0 + enlarge_input = enlarge_input.clamp(0.0, 1.0) + else: + enlarge_input[:, 2, ...] = 0.0 + anchor_h = residual_h//2 + anchor_w = residual_w//2 + enlarge_input[..., anchor_h:anchor_h+h, anchor_w:anchor_w+w] = input + + return enlarge_input, [anchor_h, anchor_h+h, anchor_w, anchor_w+w] + + def forward_3P(self, mask, context, depth, edge, unit_length=128, cuda=None): + with torch.no_grad(): + input = torch.cat((depth, edge, context, mask), dim=1) + n, c, h, w = input.shape + residual_h = int(np.ceil(h / float(unit_length)) * unit_length - h) + residual_w = int(np.ceil(w / float(unit_length)) * unit_length - w) + anchor_h = residual_h//2 + anchor_w = residual_w//2 + enlarge_input = torch.zeros((n, c, h + residual_h, w + residual_w)).to(cuda) + enlarge_input[..., anchor_h:anchor_h+h, anchor_w:anchor_w+w] = input + # enlarge_input[:, 3] = 1. - enlarge_input[:, 3] + depth_output = self.forward(enlarge_input) + depth_output = depth_output[..., anchor_h:anchor_h+h, anchor_w:anchor_w+w] + # import pdb; pdb.set_trace() + + return depth_output + + def forward(self, input_feat, refine_border=False, sample=False, PCONV=True): + input = input_feat + input_mask = (input_feat[:, -2:-1] + input_feat[:, -1:]).clamp(0, 1).repeat(1, input.shape[1], 1, 1) + + vis_input = input.cpu().data.numpy() + vis_input_mask = input_mask.cpu().data.numpy() + H, W = input.shape[-2:] + if refine_border is True: + input, anchor = self.add_border(input, mask_flag=False) + input_mask, anchor = self.add_border(input_mask, mask_flag=True, PCONV=PCONV) + h_dict = {} # for the output of enc_N + h_mask_dict = {} # for the output of enc_N + h_dict['h_0'], h_mask_dict['h_0'] = input, input_mask + + h_key_prev = 'h_0' + for i in range(1, self.layer_size + 1): + l_key = 'enc_{:d}'.format(i) + h_key = 'h_{:d}'.format(i) + h_dict[h_key], h_mask_dict[h_key] = getattr(self, l_key)( + h_dict[h_key_prev], h_mask_dict[h_key_prev]) + h_key_prev = h_key + + h_key = 'h_{:d}'.format(self.layer_size) + h, h_mask = h_dict[h_key], h_mask_dict[h_key] + + for i in range(self.layer_size, 0, -1): + enc_h_key = 'h_{:d}'.format(i - 1) + dec_l_key = 'dec_{:d}'.format(i) + + h = F.interpolate(h, scale_factor=2, mode=self.upsampling_mode) + h_mask = F.interpolate(h_mask, scale_factor=2, mode='nearest') + + h = torch.cat([h, h_dict[enc_h_key]], dim=1) + h_mask = torch.cat([h_mask, h_mask_dict[enc_h_key]], dim=1) + h, h_mask = getattr(self, dec_l_key)(h, h_mask) + output = h + if refine_border is True: + h_mask = h_mask[..., anchor[0]:anchor[1], anchor[2]:anchor[3]] + output = output[..., anchor[0]:anchor[1], anchor[2]:anchor[3]] + + return output + +class Inpaint_Edge_Net(BaseNetwork): + def __init__(self, residual_blocks=8, init_weights=True): + super(Inpaint_Edge_Net, self).__init__() + in_channels = 7 + out_channels = 1 + self.encoder = [] + # 0 + self.encoder_0 = nn.Sequential( + nn.ReflectionPad2d(3), + spectral_norm(nn.Conv2d(in_channels=in_channels, out_channels=64, kernel_size=7, padding=0), True), + nn.InstanceNorm2d(64, track_running_stats=False), + nn.ReLU(True)) + # 1 + self.encoder_1 = nn.Sequential( + spectral_norm(nn.Conv2d(in_channels=64, out_channels=128, kernel_size=4, stride=2, padding=1), True), + nn.InstanceNorm2d(128, track_running_stats=False), + nn.ReLU(True)) + # 2 + self.encoder_2 = nn.Sequential( + spectral_norm(nn.Conv2d(in_channels=128, out_channels=256, kernel_size=4, stride=2, padding=1), True), + nn.InstanceNorm2d(256, track_running_stats=False), + nn.ReLU(True)) + # 3 + blocks = [] + for _ in range(residual_blocks): + block = ResnetBlock(256, 2) + blocks.append(block) + + self.middle = nn.Sequential(*blocks) + # + 3 + self.decoder_0 = nn.Sequential( + spectral_norm(nn.ConvTranspose2d(in_channels=256+256, out_channels=128, kernel_size=4, stride=2, padding=1), True), + nn.InstanceNorm2d(128, track_running_stats=False), + nn.ReLU(True)) + # + 2 + self.decoder_1 = nn.Sequential( + spectral_norm(nn.ConvTranspose2d(in_channels=128+128, out_channels=64, kernel_size=4, stride=2, padding=1), True), + nn.InstanceNorm2d(64, track_running_stats=False), + nn.ReLU(True)) + # + 1 + self.decoder_2 = nn.Sequential( + nn.ReflectionPad2d(3), + nn.Conv2d(in_channels=64+64, out_channels=out_channels, kernel_size=7, padding=0), + ) + + if init_weights: + self.init_weights() + + def add_border(self, input, channel_pad_1=None): + h = input.shape[-2] + w = input.shape[-1] + require_len_unit = 16 + residual_h = int(np.ceil(h / float(require_len_unit)) * require_len_unit - h) # + 2*require_len_unit + residual_w = int(np.ceil(w / float(require_len_unit)) * require_len_unit - w) # + 2*require_len_unit + enlarge_input = torch.zeros((input.shape[0], input.shape[1], h + residual_h, w + residual_w)).to(input.device) + if channel_pad_1 is not None: + for channel in channel_pad_1: + enlarge_input[:, channel] = 1 + anchor_h = residual_h//2 + anchor_w = residual_w//2 + enlarge_input[..., anchor_h:anchor_h+h, anchor_w:anchor_w+w] = input + + return enlarge_input, [anchor_h, anchor_h+h, anchor_w, anchor_w+w] + + def forward_3P(self, mask, context, rgb, disp, edge, unit_length=128, cuda=None): + with torch.no_grad(): + input = torch.cat((rgb, disp/disp.max(), edge, context, mask), dim=1) + n, c, h, w = input.shape + residual_h = int(np.ceil(h / float(unit_length)) * unit_length - h) + residual_w = int(np.ceil(w / float(unit_length)) * unit_length - w) + anchor_h = residual_h//2 + anchor_w = residual_w//2 + enlarge_input = torch.zeros((n, c, h + residual_h, w + residual_w)).to(cuda) + enlarge_input[..., anchor_h:anchor_h+h, anchor_w:anchor_w+w] = input + edge_output = self.forward(enlarge_input) + edge_output = edge_output[..., anchor_h:anchor_h+h, anchor_w:anchor_w+w] + + return edge_output + + def forward(self, x, refine_border=False): + if refine_border: + x, anchor = self.add_border(x, [5]) + x1 = self.encoder_0(x) + x2 = self.encoder_1(x1) + x3 = self.encoder_2(x2) + x4 = self.middle(x3) + x5 = self.decoder_0(torch.cat((x4, x3), dim=1)) + x6 = self.decoder_1(torch.cat((x5, x2), dim=1)) + x7 = self.decoder_2(torch.cat((x6, x1), dim=1)) + x = torch.sigmoid(x7) + if refine_border: + x = x[..., anchor[0]:anchor[1], anchor[2]:anchor[3]] + + return x + +class Inpaint_Color_Net(nn.Module): + def __init__(self, layer_size=7, upsampling_mode='nearest', add_hole_mask=False, add_two_layer=False, add_border=False): + super().__init__() + self.freeze_enc_bn = False + self.upsampling_mode = upsampling_mode + self.layer_size = layer_size + in_channels = 6 + self.enc_1 = PCBActiv(in_channels, 64, bn=False, sample='down-7') + self.enc_2 = PCBActiv(64, 128, sample='down-5') + self.enc_3 = PCBActiv(128, 256, sample='down-5') + self.enc_4 = PCBActiv(256, 512, sample='down-3') + self.enc_5 = PCBActiv(512, 512, sample='down-3') + self.enc_6 = PCBActiv(512, 512, sample='down-3') + self.enc_7 = PCBActiv(512, 512, sample='down-3') + + self.dec_7 = PCBActiv(512+512, 512, activ='leaky') + self.dec_6 = PCBActiv(512+512, 512, activ='leaky') + + self.dec_5A = PCBActiv(512 + 512, 512, activ='leaky') + self.dec_4A = PCBActiv(512 + 256, 256, activ='leaky') + self.dec_3A = PCBActiv(256 + 128, 128, activ='leaky') + self.dec_2A = PCBActiv(128 + 64, 64, activ='leaky') + self.dec_1A = PCBActiv(64 + in_channels, 3, bn=False, activ=None, conv_bias=True) + ''' + self.dec_5B = PCBActiv(512 + 512, 512, activ='leaky') + self.dec_4B = PCBActiv(512 + 256, 256, activ='leaky') + self.dec_3B = PCBActiv(256 + 128, 128, activ='leaky') + self.dec_2B = PCBActiv(128 + 64, 64, activ='leaky') + self.dec_1B = PCBActiv(64 + 4, 1, bn=False, activ=None, conv_bias=True) + ''' + def cat(self, A, B): + return torch.cat((A, B), dim=1) + + def upsample(self, feat, mask): + feat = F.interpolate(feat, scale_factor=2, mode=self.upsampling_mode) + mask = F.interpolate(mask, scale_factor=2, mode='nearest') + + return feat, mask + + def forward_3P(self, mask, context, rgb, edge, unit_length=128, cuda=None): + with torch.no_grad(): + input = torch.cat((rgb, edge, context, mask), dim=1) + n, c, h, w = input.shape + residual_h = int(np.ceil(h / float(unit_length)) * unit_length - h) # + 128 + residual_w = int(np.ceil(w / float(unit_length)) * unit_length - w) # + 256 + anchor_h = residual_h//2 + anchor_w = residual_w//2 + enlarge_input = torch.zeros((n, c, h + residual_h, w + residual_w)).to(cuda) + enlarge_input[..., anchor_h:anchor_h+h, anchor_w:anchor_w+w] = input + # enlarge_input[:, 3] = 1. - enlarge_input[:, 3] + enlarge_input = enlarge_input.to(cuda) + rgb_output = self.forward(enlarge_input) + rgb_output = rgb_output[..., anchor_h:anchor_h+h, anchor_w:anchor_w+w] + + return rgb_output + + def forward(self, input, add_border=False): + input_mask = (input[:, -2:-1] + input[:, -1:]).clamp(0, 1) + H, W = input.shape[-2:] + f_0, h_0 = input, input_mask.repeat((1,input.shape[1],1,1)) + f_1, h_1 = self.enc_1(f_0, h_0) + f_2, h_2 = self.enc_2(f_1, h_1) + f_3, h_3 = self.enc_3(f_2, h_2) + f_4, h_4 = self.enc_4(f_3, h_3) + f_5, h_5 = self.enc_5(f_4, h_4) + f_6, h_6 = self.enc_6(f_5, h_5) + f_7, h_7 = self.enc_7(f_6, h_6) + + o_7, k_7 = self.upsample(f_7, h_7) + o_6, k_6 = self.dec_7(self.cat(o_7, f_6), self.cat(k_7, h_6)) + o_6, k_6 = self.upsample(o_6, k_6) + o_5, k_5 = self.dec_6(self.cat(o_6, f_5), self.cat(k_6, h_5)) + o_5, k_5 = self.upsample(o_5, k_5) + o_5A, k_5A = o_5, k_5 + o_5B, k_5B = o_5, k_5 + ############### + o_4A, k_4A = self.dec_5A(self.cat(o_5A, f_4), self.cat(k_5A, h_4)) + o_4A, k_4A = self.upsample(o_4A, k_4A) + o_3A, k_3A = self.dec_4A(self.cat(o_4A, f_3), self.cat(k_4A, h_3)) + o_3A, k_3A = self.upsample(o_3A, k_3A) + o_2A, k_2A = self.dec_3A(self.cat(o_3A, f_2), self.cat(k_3A, h_2)) + o_2A, k_2A = self.upsample(o_2A, k_2A) + o_1A, k_1A = self.dec_2A(self.cat(o_2A, f_1), self.cat(k_2A, h_1)) + o_1A, k_1A = self.upsample(o_1A, k_1A) + o_0A, k_0A = self.dec_1A(self.cat(o_1A, f_0), self.cat(k_1A, h_0)) + + return torch.sigmoid(o_0A) + + def train(self, mode=True): + """ + Override the default train() to freeze the BN parameters + """ + super().train(mode) + if self.freeze_enc_bn: + for name, module in self.named_modules(): + if isinstance(module, nn.BatchNorm2d) and 'enc' in name: + module.eval() + +class Discriminator(BaseNetwork): + def __init__(self, use_sigmoid=True, use_spectral_norm=True, init_weights=True, in_channels=None): + super(Discriminator, self).__init__() + self.use_sigmoid = use_sigmoid + self.conv1 = self.features = nn.Sequential( + spectral_norm(nn.Conv2d(in_channels=in_channels, out_channels=64, kernel_size=4, stride=2, padding=1, bias=not use_spectral_norm), use_spectral_norm), + nn.LeakyReLU(0.2, inplace=True), + ) + + self.conv2 = nn.Sequential( + spectral_norm(nn.Conv2d(in_channels=64, out_channels=128, kernel_size=4, stride=2, padding=1, bias=not use_spectral_norm), use_spectral_norm), + nn.LeakyReLU(0.2, inplace=True), + ) + + self.conv3 = nn.Sequential( + spectral_norm(nn.Conv2d(in_channels=128, out_channels=256, kernel_size=4, stride=2, padding=1, bias=not use_spectral_norm), use_spectral_norm), + nn.LeakyReLU(0.2, inplace=True), + ) + + self.conv4 = nn.Sequential( + spectral_norm(nn.Conv2d(in_channels=256, out_channels=512, kernel_size=4, stride=1, padding=1, bias=not use_spectral_norm), use_spectral_norm), + nn.LeakyReLU(0.2, inplace=True), + ) + + self.conv5 = nn.Sequential( + spectral_norm(nn.Conv2d(in_channels=512, out_channels=1, kernel_size=4, stride=1, padding=1, bias=not use_spectral_norm), use_spectral_norm), + ) + + if init_weights: + self.init_weights() + + def forward(self, x): + conv1 = self.conv1(x) + conv2 = self.conv2(conv1) + conv3 = self.conv3(conv2) + conv4 = self.conv4(conv3) + conv5 = self.conv5(conv4) + + outputs = conv5 + if self.use_sigmoid: + outputs = torch.sigmoid(conv5) + + return outputs, [conv1, conv2, conv3, conv4, conv5] + +class ResnetBlock(nn.Module): + def __init__(self, dim, dilation=1): + super(ResnetBlock, self).__init__() + self.conv_block = nn.Sequential( + nn.ReflectionPad2d(dilation), + spectral_norm(nn.Conv2d(in_channels=dim, out_channels=dim, kernel_size=3, padding=0, dilation=dilation, bias=not True), True), + nn.InstanceNorm2d(dim, track_running_stats=False), + nn.LeakyReLU(negative_slope=0.2), + + nn.ReflectionPad2d(1), + spectral_norm(nn.Conv2d(in_channels=dim, out_channels=dim, kernel_size=3, padding=0, dilation=1, bias=not True), True), + nn.InstanceNorm2d(dim, track_running_stats=False), + ) + + def forward(self, x): + out = x + self.conv_block(x) + + # Remove ReLU at the end of the residual block + # http://torch.ch/blog/2016/02/04/resnets.html + + return out + + +def spectral_norm(module, mode=True): + if mode: + return nn.utils.spectral_norm(module) + + return module diff --git a/inpaint/requirements.txt b/inpaint/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..8e6832821d762bf27e4d5be7081c2dd6331bc485 --- /dev/null +++ b/inpaint/requirements.txt @@ -0,0 +1,7 @@ +opencv-python==4.2.0.32 +vispy==0.6.4 +moviepy==1.0.2 +transforms3d==0.3.1 +networkx==2.3 +cynetworkx +scikit-image diff --git a/inpaint/utils.py b/inpaint/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..ab824135ee408628b8ec80f03a83c1149ebca307 --- /dev/null +++ b/inpaint/utils.py @@ -0,0 +1,1416 @@ +import os +import glob +import cv2 +import scipy.misc as misc +from skimage.transform import resize +import numpy as np +from functools import reduce +from operator import mul +import torch +from torch import nn +import matplotlib.pyplot as plt +import re +try: + import cynetworkx as netx +except ImportError: + import networkx as netx +from scipy.ndimage import gaussian_filter +from skimage.feature import canny +import collections +import shutil +import imageio +import copy +from matplotlib import pyplot as plt +from mpl_toolkits.mplot3d import Axes3D +import time +from scipy.interpolate import interp1d +from collections import namedtuple + +def path_planning(num_frames, x, y, z, path_type=''): + if path_type == 'straight-line': + corner_points = np.array([[0, 0, 0], [(0 + x) * 0.5, (0 + y) * 0.5, (0 + z) * 0.5], [x, y, z]]) + corner_t = np.linspace(0, 1, len(corner_points)) + t = np.linspace(0, 1, num_frames) + cs = interp1d(corner_t, corner_points, axis=0, kind='quadratic') + spline = cs(t) + xs, ys, zs = [xx.squeeze() for xx in np.split(spline, 3, 1)] + elif path_type == 'double-straight-line': + corner_points = np.array([[-x, -y, -z], [0, 0, 0], [x, y, z]]) + corner_t = np.linspace(0, 1, len(corner_points)) + t = np.linspace(0, 1, num_frames) + cs = interp1d(corner_t, corner_points, axis=0, kind='quadratic') + spline = cs(t) + xs, ys, zs = [xx.squeeze() for xx in np.split(spline, 3, 1)] + elif path_type == 'circle': + xs, ys, zs = [], [], [] + for frame_id, bs_shift_val in enumerate(np.arange(-2.0, 2.0, (4./num_frames))): + xs += [np.cos(bs_shift_val * np.pi) * 1 * x] + ys += [np.sin(bs_shift_val * np.pi) * 1 * y] + zs += [np.cos(bs_shift_val * np.pi/2.) * 1 * z] + xs, ys, zs = np.array(xs), np.array(ys), np.array(zs) + + return xs, ys, zs + +def open_small_mask(mask, context, open_iteration, kernel): + np_mask = mask.cpu().data.numpy().squeeze().astype(np.uint8) + raw_mask = np_mask.copy() + np_context = context.cpu().data.numpy().squeeze().astype(np.uint8) + np_input = np_mask + np_context + for _ in range(open_iteration): + np_input = cv2.erode(cv2.dilate(np_input, np.ones((kernel, kernel)), iterations=1), np.ones((kernel,kernel)), iterations=1) + np_mask[(np_input - np_context) > 0] = 1 + out_mask = torch.FloatTensor(np_mask).to(mask)[None, None, ...] + + return out_mask + +def filter_irrelevant_edge_new(self_edge, comp_edge, other_edges, other_edges_with_id, current_edge_id, context, depth, mesh, context_cc, spdb=False): + other_edges = other_edges.squeeze().astype(np.uint8) + other_edges_with_id = other_edges_with_id.squeeze() + self_edge = self_edge.squeeze() + dilate_bevel_self_edge = cv2.dilate((self_edge + comp_edge).astype(np.uint8), np.array([[1,1,1],[1,1,1],[1,1,1]]), iterations=1) + dilate_cross_self_edge = cv2.dilate((self_edge + comp_edge).astype(np.uint8), np.array([[0,1,0],[1,1,1],[0,1,0]]).astype(np.uint8), iterations=1) + edge_ids = np.unique(other_edges_with_id * context + (-1) * (1 - context)).astype(int) + end_depth_maps = np.zeros_like(self_edge) + self_edge_ids = np.sort(np.unique(other_edges_with_id[self_edge > 0]).astype(int)) + self_edge_ids = self_edge_ids[1:] if self_edge_ids.shape[0] > 0 and self_edge_ids[0] == -1 else self_edge_ids + self_comp_ids = np.sort(np.unique(other_edges_with_id[comp_edge > 0]).astype(int)) + self_comp_ids = self_comp_ids[1:] if self_comp_ids.shape[0] > 0 and self_comp_ids[0] == -1 else self_comp_ids + edge_ids = edge_ids[1:] if edge_ids[0] == -1 else edge_ids + other_edges_info = [] + extend_other_edges = np.zeros_like(other_edges) + if spdb is True: + f, ((ax1, ax2, ax3)) = plt.subplots(1, 3, sharex=True, sharey=True); ax1.imshow(self_edge); ax2.imshow(context); ax3.imshow(other_edges_with_id * context + (-1) * (1 - context)); plt.show() + import pdb; pdb.set_trace() + filter_self_edge = np.zeros_like(self_edge) + for self_edge_id in self_edge_ids: + filter_self_edge[other_edges_with_id == self_edge_id] = 1 + dilate_self_comp_edge = cv2.dilate(comp_edge, kernel=np.ones((3, 3)), iterations=2) + valid_self_comp_edge = np.zeros_like(comp_edge) + for self_comp_id in self_comp_ids: + valid_self_comp_edge[self_comp_id == other_edges_with_id] = 1 + self_comp_edge = dilate_self_comp_edge * valid_self_comp_edge + filter_self_edge = (filter_self_edge + self_comp_edge).clip(0, 1) + for edge_id in edge_ids: + other_edge_locs = (other_edges_with_id == edge_id).astype(np.uint8) + condition = (other_edge_locs * other_edges * context.astype(np.uint8)) + end_cross_point = dilate_cross_self_edge * condition * (1 - filter_self_edge) + end_bevel_point = dilate_bevel_self_edge * condition * (1 - filter_self_edge) + if end_bevel_point.max() != 0: + end_depth_maps[end_bevel_point != 0] = depth[end_bevel_point != 0] + if end_cross_point.max() == 0: + nxs, nys = np.where(end_bevel_point != 0) + for nx, ny in zip(nxs, nys): + bevel_node = [xx for xx in context_cc if xx[0] == nx and xx[1] == ny][0] + for ne in mesh.neighbors(bevel_node): + if other_edges_with_id[ne[0], ne[1]] > -1 and dilate_cross_self_edge[ne[0], ne[1]] > 0: + extend_other_edges[ne[0], ne[1]] = 1 + break + else: + other_edges[other_edges_with_id == edge_id] = 0 + other_edges = (other_edges + extend_other_edges).clip(0, 1) * context + + return other_edges, end_depth_maps, other_edges_info + +def clean_far_edge_new(input_edge, end_depth_maps, mask, context, global_mesh, info_on_pix, self_edge, inpaint_id, config): + mesh = netx.Graph() + hxs, hys = np.where(input_edge * mask > 0) + valid_near_edge = (input_edge != 0).astype(np.uint8) * context + valid_map = mask + context + invalid_edge_ids = [] + for hx, hy in zip(hxs, hys): + node = (hx ,hy) + mesh.add_node((hx, hy)) + eight_nes = [ne for ne in [(hx + 1, hy), (hx - 1, hy), (hx, hy + 1), (hx, hy - 1), \ + (hx + 1, hy + 1), (hx - 1, hy - 1), (hx - 1, hy + 1), (hx + 1, hy - 1)]\ + if 0 <= ne[0] < input_edge.shape[0] and 0 <= ne[1] < input_edge.shape[1] and 0 < input_edge[ne[0], ne[1]]] # or end_depth_maps[ne[0], ne[1]] != 0] + for ne in eight_nes: + mesh.add_edge(node, ne, length=np.hypot(ne[0] - hx, ne[1] - hy)) + if end_depth_maps[ne[0], ne[1]] != 0: + mesh.nodes[ne[0], ne[1]]['cnt'] = True + if end_depth_maps[ne[0], ne[1]] == 0: + import pdb; pdb.set_trace() + mesh.nodes[ne[0], ne[1]]['depth'] = end_depth_maps[ne[0], ne[1]] + elif mask[ne[0], ne[1]] != 1: + four_nes = [nne for nne in [(ne[0] + 1, ne[1]), (ne[0] - 1, ne[1]), (ne[0], ne[1] + 1), (ne[0], ne[1] - 1)]\ + if nne[0] < end_depth_maps.shape[0] and nne[0] >= 0 and nne[1] < end_depth_maps.shape[1] and nne[1] >= 0] + for nne in four_nes: + if end_depth_maps[nne[0], nne[1]] != 0: + mesh.add_edge(nne, ne, length=np.hypot(nne[0] - ne[0], nne[1] - ne[1])) + mesh.nodes[nne[0], nne[1]]['cnt'] = True + mesh.nodes[nne[0], nne[1]]['depth'] = end_depth_maps[nne[0], nne[1]] + ccs = [*netx.connected_components(mesh)] + end_pts = [] + for cc in ccs: + end_pts.append(set()) + for node in cc: + if mesh.nodes[node].get('cnt') is not None: + end_pts[-1].add((node[0], node[1], mesh.nodes[node]['depth'])) + predef_npaths = [None for _ in range(len(ccs))] + fpath_map = np.zeros_like(input_edge) - 1 + npath_map = np.zeros_like(input_edge) - 1 + npaths, fpaths = dict(), dict() + break_flag = False + end_idx = 0 + while end_idx < len(end_pts): + end_pt, cc = [*zip(end_pts, ccs)][end_idx] + end_idx += 1 + sorted_end_pt = [] + fpath = [] + iter_fpath = [] + if len(end_pt) > 2 or len(end_pt) == 0: + if len(end_pt) > 2: + continue + continue + if len(end_pt) == 2: + ravel_end = [*end_pt] + tmp_sub_mesh = mesh.subgraph(list(cc)).copy() + tmp_npath = [*netx.shortest_path(tmp_sub_mesh, (ravel_end[0][0], ravel_end[0][1]), (ravel_end[1][0], ravel_end[1][1]), weight='length')] + fpath_map1, npath_map1, disp_diff1 = plan_path(mesh, info_on_pix, cc, ravel_end[0:1], global_mesh, input_edge, mask, valid_map, inpaint_id, npath_map=None, fpath_map=None, npath=tmp_npath) + fpath_map2, npath_map2, disp_diff2 = plan_path(mesh, info_on_pix, cc, ravel_end[1:2], global_mesh, input_edge, mask, valid_map, inpaint_id, npath_map=None, fpath_map=None, npath=tmp_npath) + tmp_disp_diff = [disp_diff1, disp_diff2] + self_end = [] + edge_len = [] + ds_edge = cv2.dilate(self_edge.astype(np.uint8), np.ones((3, 3)), iterations=1) + if ds_edge[ravel_end[0][0], ravel_end[0][1]] > 0: + self_end.append(1) + else: + self_end.append(0) + if ds_edge[ravel_end[1][0], ravel_end[1][1]] > 0: + self_end.append(1) + else: + self_end.append(0) + edge_len = [np.count_nonzero(npath_map1), np.count_nonzero(npath_map2)] + sorted_end_pts = [xx[0] for xx in sorted(zip(ravel_end, self_end, edge_len, [disp_diff1, disp_diff2]), key=lambda x: (x[1], x[2]), reverse=True)] + re_npath_map1, re_fpath_map1 = (npath_map1 != -1).astype(np.uint8), (fpath_map1 != -1).astype(np.uint8) + re_npath_map2, re_fpath_map2 = (npath_map2 != -1).astype(np.uint8), (fpath_map2 != -1).astype(np.uint8) + if np.count_nonzero(re_npath_map1 * re_npath_map2 * mask) / \ + (np.count_nonzero((re_npath_map1 + re_npath_map2) * mask) + 1e-6) > 0.5\ + and np.count_nonzero(re_fpath_map1 * re_fpath_map2 * mask) / \ + (np.count_nonzero((re_fpath_map1 + re_fpath_map2) * mask) + 1e-6) > 0.5\ + and tmp_disp_diff[0] != -1 and tmp_disp_diff[1] != -1: + my_fpath_map, my_npath_map, npath, fpath = \ + plan_path_e2e(mesh, cc, sorted_end_pts, global_mesh, input_edge, mask, valid_map, inpaint_id, npath_map=None, fpath_map=None) + npath_map[my_npath_map != -1] = my_npath_map[my_npath_map != -1] + fpath_map[my_fpath_map != -1] = my_fpath_map[my_fpath_map != -1] + if len(fpath) > 0: + edge_id = global_mesh.nodes[[*sorted_end_pts][0]]['edge_id'] + fpaths[edge_id] = fpath + npaths[edge_id] = npath + invalid_edge_ids.append(edge_id) + else: + if tmp_disp_diff[0] != -1: + ratio_a = tmp_disp_diff[0] / (np.sum(tmp_disp_diff) + 1e-8) + else: + ratio_a = 0 + if tmp_disp_diff[1] != -1: + ratio_b = tmp_disp_diff[1] / (np.sum(tmp_disp_diff) + 1e-8) + else: + ratio_b = 0 + npath_len = len(tmp_npath) + if npath_len > config['depth_edge_dilate_2'] * 2: + npath_len = npath_len - (config['depth_edge_dilate_2'] * 1) + tmp_npath_a = tmp_npath[:int(np.floor(npath_len * ratio_a))] + tmp_npath_b = tmp_npath[::-1][:int(np.floor(npath_len * ratio_b))] + tmp_merge = [] + if len(tmp_npath_a) > 0 and sorted_end_pts[0][0] == tmp_npath_a[0][0] and sorted_end_pts[0][1] == tmp_npath_a[0][1]: + if len(tmp_npath_a) > 0 and mask[tmp_npath_a[-1][0], tmp_npath_a[-1][1]] > 0: + tmp_merge.append([sorted_end_pts[:1], tmp_npath_a]) + if len(tmp_npath_b) > 0 and mask[tmp_npath_b[-1][0], tmp_npath_b[-1][1]] > 0: + tmp_merge.append([sorted_end_pts[1:2], tmp_npath_b]) + elif len(tmp_npath_b) > 0 and sorted_end_pts[0][0] == tmp_npath_b[0][0] and sorted_end_pts[0][1] == tmp_npath_b[0][1]: + if len(tmp_npath_b) > 0 and mask[tmp_npath_b[-1][0], tmp_npath_b[-1][1]] > 0: + tmp_merge.append([sorted_end_pts[:1], tmp_npath_b]) + if len(tmp_npath_a) > 0 and mask[tmp_npath_a[-1][0], tmp_npath_a[-1][1]] > 0: + tmp_merge.append([sorted_end_pts[1:2], tmp_npath_a]) + for tmp_idx in range(len(tmp_merge)): + if len(tmp_merge[tmp_idx][1]) == 0: + continue + end_pts.append(tmp_merge[tmp_idx][0]) + ccs.append(set(tmp_merge[tmp_idx][1])) + if len(end_pt) == 1: + sub_mesh = mesh.subgraph(list(cc)).copy() + pnodes = netx.periphery(sub_mesh) + if len(end_pt) == 1: + ends = [*end_pt] + elif len(sorted_end_pt) == 1: + ends = [*sorted_end_pt] + else: + import pdb; pdb.set_trace() + try: + edge_id = global_mesh.nodes[ends[0]]['edge_id'] + except: + import pdb; pdb.set_trace() + pnodes = sorted(pnodes, + key=lambda x: np.hypot((x[0] - ends[0][0]), (x[1] - ends[0][1])), + reverse=True)[0] + npath = [*netx.shortest_path(sub_mesh, (ends[0][0], ends[0][1]), pnodes, weight='length')] + for np_node in npath: + npath_map[np_node[0], np_node[1]] = edge_id + fpath = [] + if global_mesh.nodes[ends[0]].get('far') is None: + print("None far") + else: + fnodes = global_mesh.nodes[ends[0]].get('far') + dmask = mask + 0 + did = 0 + while True: + did += 1 + dmask = cv2.dilate(dmask, np.ones((3, 3)), iterations=1) + if did > 3: + break + ffnode = [fnode for fnode in fnodes if (dmask[fnode[0], fnode[1]] > 0 and mask[fnode[0], fnode[1]] == 0 and\ + global_mesh.nodes[fnode].get('inpaint_id') != inpaint_id + 1)] + if len(ffnode) > 0: + fnode = ffnode[0] + break + if len(ffnode) == 0: + continue + fpath.append((fnode[0], fnode[1])) + barrel_dir = np.array([[1, 0], [1, 1], [0, 1], [-1, 1], [-1, 0], [-1, -1], [0, -1], [1, -1]]) + n2f_dir = (int(fnode[0] - npath[0][0]), int(fnode[1] - npath[0][1])) + while True: + if barrel_dir[0, 0] == n2f_dir[0] and barrel_dir[0, 1] == n2f_dir[1]: + n2f_barrel = barrel_dir.copy() + break + barrel_dir = np.roll(barrel_dir, 1, axis=0) + for step in range(0, len(npath)): + if step == 0: + continue + elif step == 1: + next_dir = (npath[step][0] - npath[step - 1][0], npath[step][1] - npath[step - 1][1]) + while True: + if barrel_dir[0, 0] == next_dir[0] and barrel_dir[0, 1] == next_dir[1]: + next_barrel = barrel_dir.copy() + break + barrel_dir = np.roll(barrel_dir, 1, axis=0) + barrel_pair = np.stack((n2f_barrel, next_barrel), axis=0) + n2f_dir = (barrel_pair[0, 0, 0], barrel_pair[0, 0, 1]) + elif step > 1: + next_dir = (npath[step][0] - npath[step - 1][0], npath[step][1] - npath[step - 1][1]) + while True: + if barrel_pair[1, 0, 0] == next_dir[0] and barrel_pair[1, 0, 1] == next_dir[1]: + next_barrel = barrel_pair.copy() + break + barrel_pair = np.roll(barrel_pair, 1, axis=1) + n2f_dir = (barrel_pair[0, 0, 0], barrel_pair[0, 0, 1]) + new_locs = [] + if abs(n2f_dir[0]) == 1: + new_locs.append((npath[step][0] + n2f_dir[0], npath[step][1])) + if abs(n2f_dir[1]) == 1: + new_locs.append((npath[step][0], npath[step][1] + n2f_dir[1])) + if len(new_locs) > 1: + new_locs = sorted(new_locs, key=lambda xx: np.hypot((xx[0] - fpath[-1][0]), (xx[1] - fpath[-1][1]))) + break_flag = False + for new_loc in new_locs: + new_loc_nes = [xx for xx in [(new_loc[0] + 1, new_loc[1]), (new_loc[0] - 1, new_loc[1]), + (new_loc[0], new_loc[1] + 1), (new_loc[0], new_loc[1] - 1)]\ + if xx[0] >= 0 and xx[0] < fpath_map.shape[0] and xx[1] >= 0 and xx[1] < fpath_map.shape[1]] + if np.all([(fpath_map[nlne[0], nlne[1]] == -1) for nlne in new_loc_nes]) != True: + break + if npath_map[new_loc[0], new_loc[1]] != -1: + if npath_map[new_loc[0], new_loc[1]] != edge_id: + break_flag = True + break + else: + continue + if valid_map[new_loc[0], new_loc[1]] == 0: + break_flag = True + break + fpath.append(new_loc) + if break_flag is True: + break + if step != len(npath) - 1: + for xx in npath[step:]: + if npath_map[xx[0], xx[1]] == edge_id: + npath_map[xx[0], xx[1]] = -1 + npath = npath[:step] + if len(fpath) > 0: + for fp_node in fpath: + fpath_map[fp_node[0], fp_node[1]] = edge_id + fpaths[edge_id] = fpath + npaths[edge_id] = npath + fpath_map[valid_near_edge != 0] = -1 + if len(fpath) > 0: + iter_fpath = copy.deepcopy(fpaths[edge_id]) + for node in iter_fpath: + if valid_near_edge[node[0], node[1]] != 0: + fpaths[edge_id].remove(node) + + return fpath_map, npath_map, False, npaths, fpaths, invalid_edge_ids + +def plan_path_e2e(mesh, cc, end_pts, global_mesh, input_edge, mask, valid_map, inpaint_id, npath_map=None, fpath_map=None): + my_npath_map = np.zeros_like(input_edge) - 1 + my_fpath_map = np.zeros_like(input_edge) - 1 + sub_mesh = mesh.subgraph(list(cc)).copy() + ends_1, ends_2 = end_pts[0], end_pts[1] + edge_id = global_mesh.nodes[ends_1]['edge_id'] + npath = [*netx.shortest_path(sub_mesh, (ends_1[0], ends_1[1]), (ends_2[0], ends_2[1]), weight='length')] + for np_node in npath: + my_npath_map[np_node[0], np_node[1]] = edge_id + fpath = [] + if global_mesh.nodes[ends_1].get('far') is None: + print("None far") + else: + fnodes = global_mesh.nodes[ends_1].get('far') + dmask = mask + 0 + while True: + dmask = cv2.dilate(dmask, np.ones((3, 3)), iterations=1) + ffnode = [fnode for fnode in fnodes if (dmask[fnode[0], fnode[1]] > 0 and mask[fnode[0], fnode[1]] == 0 and\ + global_mesh.nodes[fnode].get('inpaint_id') != inpaint_id + 1)] + if len(ffnode) > 0: + fnode = ffnode[0] + break + e_fnodes = global_mesh.nodes[ends_2].get('far') + dmask = mask + 0 + while True: + dmask = cv2.dilate(dmask, np.ones((3, 3)), iterations=1) + e_ffnode = [e_fnode for e_fnode in e_fnodes if (dmask[e_fnode[0], e_fnode[1]] > 0 and mask[e_fnode[0], e_fnode[1]] == 0 and\ + global_mesh.nodes[e_fnode].get('inpaint_id') != inpaint_id + 1)] + if len(e_ffnode) > 0: + e_fnode = e_ffnode[0] + break + fpath.append((fnode[0], fnode[1])) + if len(e_ffnode) == 0 or len(ffnode) == 0: + return my_npath_map, my_fpath_map, [], [] + barrel_dir = np.array([[1, 0], [1, 1], [0, 1], [-1, 1], [-1, 0], [-1, -1], [0, -1], [1, -1]]) + n2f_dir = (int(fnode[0] - npath[0][0]), int(fnode[1] - npath[0][1])) + while True: + if barrel_dir[0, 0] == n2f_dir[0] and barrel_dir[0, 1] == n2f_dir[1]: + n2f_barrel = barrel_dir.copy() + break + barrel_dir = np.roll(barrel_dir, 1, axis=0) + for step in range(0, len(npath)): + if step == 0: + continue + elif step == 1: + next_dir = (npath[step][0] - npath[step - 1][0], npath[step][1] - npath[step - 1][1]) + while True: + if barrel_dir[0, 0] == next_dir[0] and barrel_dir[0, 1] == next_dir[1]: + next_barrel = barrel_dir.copy() + break + barrel_dir = np.roll(barrel_dir, 1, axis=0) + barrel_pair = np.stack((n2f_barrel, next_barrel), axis=0) + n2f_dir = (barrel_pair[0, 0, 0], barrel_pair[0, 0, 1]) + elif step > 1: + next_dir = (npath[step][0] - npath[step - 1][0], npath[step][1] - npath[step - 1][1]) + while True: + if barrel_pair[1, 0, 0] == next_dir[0] and barrel_pair[1, 0, 1] == next_dir[1]: + next_barrel = barrel_pair.copy() + break + barrel_pair = np.roll(barrel_pair, 1, axis=1) + n2f_dir = (barrel_pair[0, 0, 0], barrel_pair[0, 0, 1]) + new_locs = [] + if abs(n2f_dir[0]) == 1: + new_locs.append((npath[step][0] + n2f_dir[0], npath[step][1])) + if abs(n2f_dir[1]) == 1: + new_locs.append((npath[step][0], npath[step][1] + n2f_dir[1])) + if len(new_locs) > 1: + new_locs = sorted(new_locs, key=lambda xx: np.hypot((xx[0] - fpath[-1][0]), (xx[1] - fpath[-1][1]))) + break_flag = False + for new_loc in new_locs: + new_loc_nes = [xx for xx in [(new_loc[0] + 1, new_loc[1]), (new_loc[0] - 1, new_loc[1]), + (new_loc[0], new_loc[1] + 1), (new_loc[0], new_loc[1] - 1)]\ + if xx[0] >= 0 and xx[0] < my_fpath_map.shape[0] and xx[1] >= 0 and xx[1] < my_fpath_map.shape[1]] + if fpath_map is not None and np.sum([fpath_map[nlne[0], nlne[1]] for nlne in new_loc_nes]) != 0: + break_flag = True + break + if my_npath_map[new_loc[0], new_loc[1]] != -1: + continue + if npath_map is not None and npath_map[new_loc[0], new_loc[1]] != edge_id: + break_flag = True + break + fpath.append(new_loc) + if break_flag is True: + break + if (e_fnode[0], e_fnode[1]) not in fpath: + fpath.append((e_fnode[0], e_fnode[1])) + if step != len(npath) - 1: + for xx in npath[step:]: + if my_npath_map[xx[0], xx[1]] == edge_id: + my_npath_map[xx[0], xx[1]] = -1 + npath = npath[:step] + if len(fpath) > 0: + for fp_node in fpath: + my_fpath_map[fp_node[0], fp_node[1]] = edge_id + + return my_fpath_map, my_npath_map, npath, fpath + +def plan_path(mesh, info_on_pix, cc, end_pt, global_mesh, input_edge, mask, valid_map, inpaint_id, npath_map=None, fpath_map=None, npath=None): + my_npath_map = np.zeros_like(input_edge) - 1 + my_fpath_map = np.zeros_like(input_edge) - 1 + sub_mesh = mesh.subgraph(list(cc)).copy() + pnodes = netx.periphery(sub_mesh) + ends = [*end_pt] + edge_id = global_mesh.nodes[ends[0]]['edge_id'] + pnodes = sorted(pnodes, + key=lambda x: np.hypot((x[0] - ends[0][0]), (x[1] - ends[0][1])), + reverse=True)[0] + if npath is None: + npath = [*netx.shortest_path(sub_mesh, (ends[0][0], ends[0][1]), pnodes, weight='length')] + else: + if (ends[0][0], ends[0][1]) == npath[0]: + npath = npath + elif (ends[0][0], ends[0][1]) == npath[-1]: + npath = npath[::-1] + else: + import pdb; pdb.set_trace() + for np_node in npath: + my_npath_map[np_node[0], np_node[1]] = edge_id + fpath = [] + if global_mesh.nodes[ends[0]].get('far') is None: + print("None far") + else: + fnodes = global_mesh.nodes[ends[0]].get('far') + dmask = mask + 0 + did = 0 + while True: + did += 1 + if did > 3: + return my_fpath_map, my_npath_map, -1 + dmask = cv2.dilate(dmask, np.ones((3, 3)), iterations=1) + ffnode = [fnode for fnode in fnodes if (dmask[fnode[0], fnode[1]] > 0 and mask[fnode[0], fnode[1]] == 0 and\ + global_mesh.nodes[fnode].get('inpaint_id') != inpaint_id + 1)] + if len(ffnode) > 0: + fnode = ffnode[0] + break + + fpath.append((fnode[0], fnode[1])) + disp_diff = 0. + for n_loc in npath: + if mask[n_loc[0], n_loc[1]] != 0: + disp_diff = abs(abs(1. / info_on_pix[(n_loc[0], n_loc[1])][0]['depth']) - abs(1. / ends[0][2])) + break + barrel_dir = np.array([[1, 0], [1, 1], [0, 1], [-1, 1], [-1, 0], [-1, -1], [0, -1], [1, -1]]) + n2f_dir = (int(fnode[0] - npath[0][0]), int(fnode[1] - npath[0][1])) + while True: + if barrel_dir[0, 0] == n2f_dir[0] and barrel_dir[0, 1] == n2f_dir[1]: + n2f_barrel = barrel_dir.copy() + break + barrel_dir = np.roll(barrel_dir, 1, axis=0) + for step in range(0, len(npath)): + if step == 0: + continue + elif step == 1: + next_dir = (npath[step][0] - npath[step - 1][0], npath[step][1] - npath[step - 1][1]) + while True: + if barrel_dir[0, 0] == next_dir[0] and barrel_dir[0, 1] == next_dir[1]: + next_barrel = barrel_dir.copy() + break + barrel_dir = np.roll(barrel_dir, 1, axis=0) + barrel_pair = np.stack((n2f_barrel, next_barrel), axis=0) + n2f_dir = (barrel_pair[0, 0, 0], barrel_pair[0, 0, 1]) + elif step > 1: + next_dir = (npath[step][0] - npath[step - 1][0], npath[step][1] - npath[step - 1][1]) + while True: + if barrel_pair[1, 0, 0] == next_dir[0] and barrel_pair[1, 0, 1] == next_dir[1]: + next_barrel = barrel_pair.copy() + break + barrel_pair = np.roll(barrel_pair, 1, axis=1) + n2f_dir = (barrel_pair[0, 0, 0], barrel_pair[0, 0, 1]) + new_locs = [] + if abs(n2f_dir[0]) == 1: + new_locs.append((npath[step][0] + n2f_dir[0], npath[step][1])) + if abs(n2f_dir[1]) == 1: + new_locs.append((npath[step][0], npath[step][1] + n2f_dir[1])) + if len(new_locs) > 1: + new_locs = sorted(new_locs, key=lambda xx: np.hypot((xx[0] - fpath[-1][0]), (xx[1] - fpath[-1][1]))) + break_flag = False + for new_loc in new_locs: + new_loc_nes = [xx for xx in [(new_loc[0] + 1, new_loc[1]), (new_loc[0] - 1, new_loc[1]), + (new_loc[0], new_loc[1] + 1), (new_loc[0], new_loc[1] - 1)]\ + if xx[0] >= 0 and xx[0] < my_fpath_map.shape[0] and xx[1] >= 0 and xx[1] < my_fpath_map.shape[1]] + if fpath_map is not None and np.all([(fpath_map[nlne[0], nlne[1]] == -1) for nlne in new_loc_nes]) != True: + break_flag = True + break + if np.all([(my_fpath_map[nlne[0], nlne[1]] == -1) for nlne in new_loc_nes]) != True: + break_flag = True + break + if my_npath_map[new_loc[0], new_loc[1]] != -1: + continue + if npath_map is not None and npath_map[new_loc[0], new_loc[1]] != edge_id: + break_flag = True + break + if valid_map[new_loc[0], new_loc[1]] == 0: + break_flag = True + break + fpath.append(new_loc) + if break_flag is True: + break + if step != len(npath) - 1: + for xx in npath[step:]: + if my_npath_map[xx[0], xx[1]] == edge_id: + my_npath_map[xx[0], xx[1]] = -1 + npath = npath[:step] + if len(fpath) > 0: + for fp_node in fpath: + my_fpath_map[fp_node[0], fp_node[1]] = edge_id + + return my_fpath_map, my_npath_map, disp_diff + +def refresh_node(old_node, old_feat, new_node, new_feat, mesh, stime=False): + mesh.add_node(new_node) + mesh.nodes[new_node].update(new_feat) + mesh.nodes[new_node].update(old_feat) + for ne in mesh.neighbors(old_node): + mesh.add_edge(new_node, ne) + if mesh.nodes[new_node].get('far') is not None: + tmp_far_nodes = mesh.nodes[new_node]['far'] + for far_node in tmp_far_nodes: + if mesh.has_node(far_node) is False: + mesh.nodes[new_node]['far'].remove(far_node) + continue + if mesh.nodes[far_node].get('near') is not None: + for idx in range(len(mesh.nodes[far_node].get('near'))): + if mesh.nodes[far_node]['near'][idx][0] == new_node[0] and mesh.nodes[far_node]['near'][idx][1] == new_node[1]: + if len(mesh.nodes[far_node]['near'][idx]) == len(old_node): + mesh.nodes[far_node]['near'][idx] = new_node + if mesh.nodes[new_node].get('near') is not None: + tmp_near_nodes = mesh.nodes[new_node]['near'] + for near_node in tmp_near_nodes: + if mesh.has_node(near_node) is False: + mesh.nodes[new_node]['near'].remove(near_node) + continue + if mesh.nodes[near_node].get('far') is not None: + for idx in range(len(mesh.nodes[near_node].get('far'))): + if mesh.nodes[near_node]['far'][idx][0] == new_node[0] and mesh.nodes[near_node]['far'][idx][1] == new_node[1]: + if len(mesh.nodes[near_node]['far'][idx]) == len(old_node): + mesh.nodes[near_node]['far'][idx] = new_node + if new_node != old_node: + mesh.remove_node(old_node) + if stime is False: + return mesh + else: + return mesh, None, None + + +def create_placeholder(context, mask, depth, fpath_map, npath_map, mesh, inpaint_id, edge_ccs, extend_edge_cc, all_edge_maps, self_edge_id): + add_node_time = 0 + add_edge_time = 0 + add_far_near_time = 0 + valid_area = context + mask + H, W = mesh.graph['H'], mesh.graph['W'] + edge_cc = edge_ccs[self_edge_id] + num_com = len(edge_cc) + len(extend_edge_cc) + hxs, hys = np.where(mask > 0) + for hx, hy in zip(hxs, hys): + mesh.add_node((hx, hy), inpaint_id=inpaint_id + 1, num_context=num_com) + for hx, hy in zip(hxs, hys): + four_nes = [(x, y) for x, y in [(hx + 1, hy), (hx - 1, hy), (hx, hy + 1), (hx, hy - 1)] if\ + 0 <= x < mesh.graph['H'] and 0 <= y < mesh.graph['W'] and valid_area[x, y] != 0] + for ne in four_nes: + if mask[ne[0], ne[1]] != 0: + if not mesh.has_edge((hx, hy), ne): + mesh.add_edge((hx, hy), ne) + elif depth[ne[0], ne[1]] != 0: + if mesh.has_node((ne[0], ne[1], depth[ne[0], ne[1]])) and\ + not mesh.has_edge((hx, hy), (ne[0], ne[1], depth[ne[0], ne[1]])): + mesh.add_edge((hx, hy), (ne[0], ne[1], depth[ne[0], ne[1]])) + else: + print("Undefined context node.") + import pdb; pdb.set_trace() + near_ids = np.unique(npath_map) + if near_ids[0] == -1: near_ids = near_ids[1:] + for near_id in near_ids: + hxs, hys = np.where((fpath_map == near_id) & (mask > 0)) + if hxs.shape[0] > 0: + mesh.graph['max_edge_id'] = mesh.graph['max_edge_id'] + 1 + else: + break + for hx, hy in zip(hxs, hys): + mesh.nodes[(hx, hy)]['edge_id'] = int(round(mesh.graph['max_edge_id'])) + four_nes = [(x, y) for x, y in [(hx + 1, hy), (hx - 1, hy), (hx, hy + 1), (hx, hy - 1)] if\ + x < mesh.graph['H'] and x >= 0 and y < mesh.graph['W'] and y >= 0 and npath_map[x, y] == near_id] + for xx in four_nes: + xx_n = copy.deepcopy(xx) + if not mesh.has_node(xx_n): + if mesh.has_node((xx_n[0], xx_n[1], depth[xx_n[0], xx_n[1]])): + xx_n = (xx_n[0], xx_n[1], depth[xx_n[0], xx_n[1]]) + if mesh.has_edge((hx, hy), xx_n): + # pass + mesh.remove_edge((hx, hy), xx_n) + if mesh.nodes[(hx, hy)].get('near') is None: + mesh.nodes[(hx, hy)]['near'] = [] + mesh.nodes[(hx, hy)]['near'].append(xx_n) + connect_point_exception = set() + hxs, hys = np.where((npath_map == near_id) & (all_edge_maps > -1)) + for hx, hy in zip(hxs, hys): + unknown_id = int(round(all_edge_maps[hx, hy])) + if unknown_id != near_id and unknown_id != self_edge_id: + unknown_node = set([xx for xx in edge_ccs[unknown_id] if xx[0] == hx and xx[1] == hy]) + connect_point_exception |= unknown_node + hxs, hys = np.where((npath_map == near_id) & (mask > 0)) + if hxs.shape[0] > 0: + mesh.graph['max_edge_id'] = mesh.graph['max_edge_id'] + 1 + else: + break + for hx, hy in zip(hxs, hys): + mesh.nodes[(hx, hy)]['edge_id'] = int(round(mesh.graph['max_edge_id'])) + mesh.nodes[(hx, hy)]['connect_point_id'] = int(round(near_id)) + mesh.nodes[(hx, hy)]['connect_point_exception'] = connect_point_exception + four_nes = [(x, y) for x, y in [(hx + 1, hy), (hx - 1, hy), (hx, hy + 1), (hx, hy - 1)] if\ + x < mesh.graph['H'] and x >= 0 and y < mesh.graph['W'] and y >= 0 and fpath_map[x, y] == near_id] + for xx in four_nes: + xx_n = copy.deepcopy(xx) + if not mesh.has_node(xx_n): + if mesh.has_node((xx_n[0], xx_n[1], depth[xx_n[0], xx_n[1]])): + xx_n = (xx_n[0], xx_n[1], depth[xx_n[0], xx_n[1]]) + if mesh.has_edge((hx, hy), xx_n): + mesh.remove_edge((hx, hy), xx_n) + if mesh.nodes[(hx, hy)].get('far') is None: + mesh.nodes[(hx, hy)]['far'] = [] + mesh.nodes[(hx, hy)]['far'].append(xx_n) + + return mesh, add_node_time, add_edge_time, add_far_near_time + +def clean_far_edge(mask_edge, mask_edge_with_id, context_edge, mask, info_on_pix, global_mesh, anchor): + if isinstance(mask_edge, torch.Tensor): + if mask_edge.is_cuda: + mask_edge = mask_edge.cpu() + mask_edge = mask_edge.data + mask_edge = mask_edge.numpy() + if isinstance(context_edge, torch.Tensor): + if context_edge.is_cuda: + context_edge = context_edge.cpu() + context_edge = context_edge.data + context_edge = context_edge.numpy() + if isinstance(mask, torch.Tensor): + if mask.is_cuda: + mask = mask.cpu() + mask = mask.data + mask = mask.numpy() + mask = mask.squeeze() + mask_edge = mask_edge.squeeze() + context_edge = context_edge.squeeze() + valid_near_edge = np.zeros_like(mask_edge) + far_edge = np.zeros_like(mask_edge) + far_edge_with_id = np.ones_like(mask_edge) * -1 + near_edge_with_id = np.ones_like(mask_edge) * -1 + uncleaned_far_edge = np.zeros_like(mask_edge) + # Detect if there is any valid pixel mask_edge, if not ==> return default value + if mask_edge.sum() == 0: + return far_edge, uncleaned_far_edge, far_edge_with_id, near_edge_with_id + mask_edge_ids = dict(collections.Counter(mask_edge_with_id.flatten())).keys() + for edge_id in mask_edge_ids: + if edge_id < 0: + continue + specific_edge_map = (mask_edge_with_id == edge_id).astype(np.uint8) + _, sub_specific_edge_maps = cv2.connectedComponents(specific_edge_map.astype(np.uint8), connectivity=8) + for sub_edge_id in range(1, sub_specific_edge_maps.max() + 1): + specific_edge_map = (sub_specific_edge_maps == sub_edge_id).astype(np.uint8) + edge_pxs, edge_pys = np.where(specific_edge_map > 0) + edge_mesh = netx.Graph() + for edge_px, edge_py in zip(edge_pxs, edge_pys): + edge_mesh.add_node((edge_px, edge_py)) + for ex in [edge_px-1, edge_px, edge_px+1]: + for ey in [edge_py-1, edge_py, edge_py+1]: + if edge_px == ex and edge_py == ey: + continue + if ex < 0 or ex >= specific_edge_map.shape[0] or ey < 0 or ey >= specific_edge_map.shape[1]: + continue + if specific_edge_map[ex, ey] == 1: + if edge_mesh.has_node((ex, ey)): + edge_mesh.add_edge((ex, ey), (edge_px, edge_py)) + periphery_nodes = netx.periphery(edge_mesh) + path_diameter = netx.diameter(edge_mesh) + start_near_node = None + for node_s in periphery_nodes: + for node_e in periphery_nodes: + if node_s != node_e: + if netx.shortest_path_length(edge_mesh, node_s, node_e) == path_diameter: + if np.any(context_edge[node_s[0]-1:node_s[0]+2, node_s[1]-1:node_s[1]+2].flatten()): + start_near_node = (node_s[0], node_s[1]) + end_near_node = (node_e[0], node_e[1]) + break + if np.any(context_edge[node_e[0]-1:node_e[0]+2, node_e[1]-1:node_e[1]+2].flatten()): + start_near_node = (node_e[0], node_e[1]) + end_near_node = (node_s[0], node_s[1]) + break + if start_near_node is not None: + break + if start_near_node is None: + continue + new_specific_edge_map = np.zeros_like(mask) + for path_node in netx.shortest_path(edge_mesh, start_near_node, end_near_node): + new_specific_edge_map[path_node[0], path_node[1]] = 1 + context_near_pxs, context_near_pys = np.where(context_edge[start_near_node[0]-1:start_near_node[0]+2, start_near_node[1]-1:start_near_node[1]+2] > 0) + distance = np.abs((context_near_pxs - 1)) + np.abs((context_near_pys - 1)) + if (np.where(distance == distance.min())[0].shape[0]) > 1: + closest_pxs = context_near_pxs[np.where(distance == distance.min())[0]] + closest_pys = context_near_pys[np.where(distance == distance.min())[0]] + closest_depths = [] + for closest_px, closest_py in zip(closest_pxs, closest_pys): + if info_on_pix.get((closest_px + start_near_node[0] - 1 + anchor[0], closest_py + start_near_node[1] - 1 + anchor[2])) is not None: + for info in info_on_pix.get((closest_px + start_near_node[0] - 1 + anchor[0], closest_py + start_near_node[1] - 1 + anchor[2])): + if info['synthesis'] is False: + closest_depths.append(abs(info['depth'])) + context_near_px, context_near_py = closest_pxs[np.array(closest_depths).argmax()], closest_pys[np.array(closest_depths).argmax()] + else: + context_near_px, context_near_py = context_near_pxs[distance.argmin()], context_near_pys[distance.argmin()] + context_near_node = (start_near_node[0]-1 + context_near_px, start_near_node[1]-1 + context_near_py) + far_node_list = [] + global_context_near_node = (context_near_node[0] + anchor[0], context_near_node[1] + anchor[2]) + if info_on_pix.get(global_context_near_node) is not None: + for info in info_on_pix[global_context_near_node]: + if info['synthesis'] is False: + context_near_node_3d = (global_context_near_node[0], global_context_near_node[1], info['depth']) + if global_mesh.nodes[context_near_node_3d].get('far') is not None: + for far_node in global_mesh.nodes[context_near_node_3d].get('far'): + far_node = (far_node[0] - anchor[0], far_node[1] - anchor[2], far_node[2]) + if mask[far_node[0], far_node[1]] == 0: + far_node_list.append([far_node[0], far_node[1]]) + if len(far_node_list) > 0: + far_nodes_dist = np.sum(np.abs(np.array(far_node_list) - np.array([[edge_px, edge_py]])), axis=1) + context_far_node = tuple(far_node_list[far_nodes_dist.argmin()]) + corresponding_far_edge = np.zeros_like(mask_edge) + corresponding_far_edge[context_far_node[0], context_far_node[1]] = 1 + surround_map = cv2.dilate(new_specific_edge_map.astype(np.uint8), + np.array([[1,1,1],[1,1,1],[1,1,1]]).astype(np.uint8), + iterations=1) + specific_edge_map_wo_end_pt = new_specific_edge_map.copy() + specific_edge_map_wo_end_pt[end_near_node[0], end_near_node[1]] = 0 + surround_map_wo_end_pt = cv2.dilate(specific_edge_map_wo_end_pt.astype(np.uint8), + np.array([[1,1,1],[1,1,1],[1,1,1]]).astype(np.uint8), + iterations=1) + surround_map_wo_end_pt[new_specific_edge_map > 0] = 0 + surround_map_wo_end_pt[context_near_node[0], context_near_node[1]] = 0 + surround_map = surround_map_wo_end_pt.copy() + _, far_edge_cc = cv2.connectedComponents(surround_map.astype(np.uint8), connectivity=4) + start_far_node = None + accompany_far_node = None + if surround_map[context_far_node[0], context_far_node[1]] == 1: + start_far_node = context_far_node + else: + four_nes = [(context_far_node[0] - 1, context_far_node[1]), + (context_far_node[0] + 1, context_far_node[1]), + (context_far_node[0], context_far_node[1] - 1), + (context_far_node[0], context_far_node[1] + 1)] + candidate_bevel = [] + for ne in four_nes: + if surround_map[ne[0], ne[1]] == 1: + start_far_node = (ne[0], ne[1]) + break + elif (ne[0] != context_near_node[0] or ne[1] != context_near_node[1]) and \ + (ne[0] != start_near_node[0] or ne[1] != start_near_node[1]): + candidate_bevel.append((ne[0], ne[1])) + if start_far_node is None: + for ne in candidate_bevel: + if ne[0] == context_far_node[0]: + bevel_xys = [[ne[0] + 1, ne[1]], [ne[0] - 1, ne[1]]] + if ne[1] == context_far_node[1]: + bevel_xys = [[ne[0], ne[1] + 1], [ne[0], ne[1] - 1]] + for bevel_x, bevel_y in bevel_xys: + if surround_map[bevel_x, bevel_y] == 1: + start_far_node = (bevel_x, bevel_y) + accompany_far_node = (ne[0], ne[1]) + break + if start_far_node is not None: + break + if start_far_node is not None: + for far_edge_id in range(1, far_edge_cc.max() + 1): + specific_far_edge = (far_edge_cc == far_edge_id).astype(np.uint8) + if specific_far_edge[start_far_node[0], start_far_node[1]] == 1: + if accompany_far_node is not None: + specific_far_edge[accompany_far_node] = 1 + far_edge[specific_far_edge > 0] = 1 + far_edge_with_id[specific_far_edge > 0] = edge_id + end_far_candidates = np.zeros_like(far_edge) + end_far_candidates[end_near_node[0], end_near_node[1]] = 1 + end_far_candidates = cv2.dilate(end_far_candidates.astype(np.uint8), + np.array([[0,1,0],[1,1,1],[0,1,0]]).astype(np.uint8), + iterations=1) + end_far_candidates[end_near_node[0], end_near_node[1]] = 0 + invalid_nodes = (((far_edge_cc != far_edge_id).astype(np.uint8) * \ + (far_edge_cc != 0).astype(np.uint8)).astype(np.uint8) + \ + (new_specific_edge_map).astype(np.uint8) + \ + (mask == 0).astype(np.uint8)).clip(0, 1) + end_far_candidates[invalid_nodes > 0] = 0 + far_edge[end_far_candidates > 0] = 1 + far_edge_with_id[end_far_candidates > 0] = edge_id + + far_edge[context_far_node[0], context_far_node[1]] = 1 + far_edge_with_id[context_far_node[0], context_far_node[1]] = edge_id + near_edge_with_id[(mask_edge_with_id == edge_id) > 0] = edge_id + uncleaned_far_edge = far_edge.copy() + far_edge[mask == 0] = 0 + + return far_edge, uncleaned_far_edge, far_edge_with_id, near_edge_with_id + +def get_MiDaS_samples(image_folder, depth_folder, config, specific=None, aft_certain=None): + lines = [os.path.splitext(os.path.basename(xx))[0] for xx in glob.glob(os.path.join(image_folder, '*' + config['img_format']))] + samples = [] + generic_pose = np.eye(4) + assert len(config['traj_types']) == len(config['x_shift_range']) ==\ + len(config['y_shift_range']) == len(config['z_shift_range']) == len(config['video_postfix']), \ + "The number of elements in 'traj_types', 'x_shift_range', 'y_shift_range', 'z_shift_range' and \ + 'video_postfix' should be equal." + tgt_pose = [[generic_pose * 1]] + tgts_poses = [] + for traj_idx in range(len(config['traj_types'])): + tgt_poses = [] + sx, sy, sz = path_planning(config['num_frames'], config['x_shift_range'][traj_idx], config['y_shift_range'][traj_idx], + config['z_shift_range'][traj_idx], path_type=config['traj_types'][traj_idx]) + for xx, yy, zz in zip(sx, sy, sz): + tgt_poses.append(generic_pose * 1.) + tgt_poses[-1][:3, -1] = np.array([xx, yy, zz]) + tgts_poses += [tgt_poses] + tgt_pose = generic_pose * 1 + + aft_flag = True + if aft_certain is not None and len(aft_certain) > 0: + aft_flag = False + for seq_dir in lines: + if specific is not None and len(specific) > 0: + if specific != seq_dir: + continue + if aft_certain is not None and len(aft_certain) > 0: + if aft_certain == seq_dir: + aft_flag = True + if aft_flag is False: + continue + samples.append({}) + sdict = samples[-1] + sdict['depth_fi'] = os.path.join(depth_folder, seq_dir + config['depth_format']) + sdict['ref_img_fi'] = os.path.join(image_folder, seq_dir + config['img_format']) + H, W = imageio.imread(sdict['ref_img_fi']).shape[:2] + sdict['int_mtx'] = np.array([[max(H, W), 0, W//2], [0, max(H, W), H//2], [0, 0, 1]]).astype(np.float32) + if sdict['int_mtx'].max() > 1: + sdict['int_mtx'][0, :] = sdict['int_mtx'][0, :] / float(W) + sdict['int_mtx'][1, :] = sdict['int_mtx'][1, :] / float(H) + sdict['ref_pose'] = np.eye(4) + sdict['tgt_pose'] = tgt_pose + sdict['tgts_poses'] = tgts_poses + sdict['video_postfix'] = config['video_postfix'] + sdict['tgt_name'] = [os.path.splitext(os.path.basename(sdict['depth_fi']))[0]] + sdict['src_pair_name'] = sdict['tgt_name'][0] + + return samples + +def get_valid_size(imap): + x_max = np.where(imap.sum(1).squeeze() > 0)[0].max() + 1 + x_min = np.where(imap.sum(1).squeeze() > 0)[0].min() + y_max = np.where(imap.sum(0).squeeze() > 0)[0].max() + 1 + y_min = np.where(imap.sum(0).squeeze() > 0)[0].min() + size_dict = {'x_max':x_max, 'y_max':y_max, 'x_min':x_min, 'y_min':y_min} + + return size_dict + +def dilate_valid_size(isize_dict, imap, dilate=[0, 0]): + osize_dict = copy.deepcopy(isize_dict) + osize_dict['x_min'] = max(0, osize_dict['x_min'] - dilate[0]) + osize_dict['x_max'] = min(imap.shape[0], osize_dict['x_max'] + dilate[0]) + osize_dict['y_min'] = max(0, osize_dict['y_min'] - dilate[0]) + osize_dict['y_max'] = min(imap.shape[1], osize_dict['y_max'] + dilate[1]) + + return osize_dict + +def crop_maps_by_size(size, *imaps): + omaps = [] + for imap in imaps: + omaps.append(imap[size['x_min']:size['x_max'], size['y_min']:size['y_max']].copy()) + + return omaps + +def smooth_cntsyn_gap(init_depth_map, mask_region, context_region, init_mask_region=None): + if init_mask_region is not None: + curr_mask_region = init_mask_region * 1 + else: + curr_mask_region = mask_region * 0 + depth_map = init_depth_map.copy() + for _ in range(2): + cm_mask = context_region + curr_mask_region + depth_s1 = np.roll(depth_map, 1, 0) + depth_s2 = np.roll(depth_map, -1, 0) + depth_s3 = np.roll(depth_map, 1, 1) + depth_s4 = np.roll(depth_map, -1, 1) + mask_s1 = np.roll(cm_mask, 1, 0) + mask_s2 = np.roll(cm_mask, -1, 0) + mask_s3 = np.roll(cm_mask, 1, 1) + mask_s4 = np.roll(cm_mask, -1, 1) + fluxin_depths = (depth_s1 * mask_s1 + depth_s2 * mask_s2 + depth_s3 * mask_s3 + depth_s4 * mask_s4) / \ + ((mask_s1 + mask_s2 + mask_s3 + mask_s4) + 1e-6) + fluxin_mask = (fluxin_depths != 0) * mask_region + init_mask = (fluxin_mask * (curr_mask_region >= 0).astype(np.float32) > 0).astype(np.uint8) + depth_map[init_mask > 0] = fluxin_depths[init_mask > 0] + if init_mask.shape[-1] > curr_mask_region.shape[-1]: + curr_mask_region[init_mask.sum(-1, keepdims=True) > 0] = 1 + else: + curr_mask_region[init_mask > 0] = 1 + depth_map[fluxin_mask > 0] = fluxin_depths[fluxin_mask > 0] + + return depth_map + +def read_MiDaS_depth(disp_fi, disp_rescale=10., h=None, w=None): + if 'npy' in os.path.splitext(disp_fi)[-1]: + disp = np.load(disp_fi) + else: + disp = imageio.imread(disp_fi).astype(np.float32) + disp = disp - disp.min() + disp = cv2.blur(disp / disp.max(), ksize=(3, 3)) * disp.max() + disp = (disp / disp.max()) * disp_rescale + if h is not None and w is not None: + disp = resize(disp / disp.max(), (h, w), order=1) * disp.max() + depth = 1. / np.maximum(disp, 0.05) + + return depth + +def follow_image_aspect_ratio(depth, image): + H, W = image.shape[:2] + image_aspect_ratio = H / W + dH, dW = depth.shape[:2] + depth_aspect_ratio = dH / dW + if depth_aspect_ratio > image_aspect_ratio: + resize_H = dH + resize_W = dH / image_aspect_ratio + else: + resize_W = dW + resize_H = dW * image_aspect_ratio + depth = resize(depth / depth.max(), + (int(resize_H), + int(resize_W)), + order=0) * depth.max() + + return depth + +def depth_resize(depth, origin_size, image_size): + if origin_size[0] != 0: + max_depth = depth.max() + depth = depth / max_depth + depth = resize(depth, origin_size, order=1, mode='edge') + depth = depth * max_depth + else: + max_depth = depth.max() + depth = depth / max_depth + depth = resize(depth, image_size, order=1, mode='edge') + depth = depth * max_depth + + return depth + +def filter_irrelevant_edge(self_edge, other_edges, other_edges_with_id, current_edge_id, context, edge_ccs, mesh, anchor): + other_edges = other_edges.squeeze() + other_edges_with_id = other_edges_with_id.squeeze() + + self_edge = self_edge.squeeze() + dilate_self_edge = cv2.dilate(self_edge.astype(np.uint8), np.array([[1,1,1],[1,1,1],[1,1,1]]).astype(np.uint8), iterations=1) + edge_ids = collections.Counter(other_edges_with_id.flatten()).keys() + other_edges_info = [] + # import ipdb + # ipdb.set_trace() + for edge_id in edge_ids: + edge_id = int(edge_id) + if edge_id >= 0: + condition = ((other_edges_with_id == edge_id) * other_edges * context).astype(np.uint8) + if dilate_self_edge[condition > 0].sum() == 0: + other_edges[other_edges_with_id == edge_id] = 0 + else: + num_condition, condition_labels = cv2.connectedComponents(condition, connectivity=8) + for condition_id in range(1, num_condition): + isolate_condition = ((condition_labels == condition_id) > 0).astype(np.uint8) + num_end_group, end_group = cv2.connectedComponents(((dilate_self_edge * isolate_condition) > 0).astype(np.uint8), connectivity=8) + if num_end_group == 1: + continue + for end_id in range(1, num_end_group): + end_pxs, end_pys = np.where((end_group == end_id)) + end_px, end_py = end_pxs[0], end_pys[0] + other_edges_info.append({}) + other_edges_info[-1]['edge_id'] = edge_id + # other_edges_info[-1]['near_depth'] = None + other_edges_info[-1]['diff'] = None + other_edges_info[-1]['edge_map'] = np.zeros_like(self_edge) + other_edges_info[-1]['end_point_map'] = np.zeros_like(self_edge) + other_edges_info[-1]['end_point_map'][(end_group == end_id)] = 1 + other_edges_info[-1]['forbidden_point_map'] = np.zeros_like(self_edge) + other_edges_info[-1]['forbidden_point_map'][(end_group != end_id) * (end_group != 0)] = 1 + other_edges_info[-1]['forbidden_point_map'] = cv2.dilate(other_edges_info[-1]['forbidden_point_map'], kernel=np.array([[1,1,1],[1,1,1],[1,1,1]]), iterations=2) + for x in edge_ccs[edge_id]: + nx = x[0] - anchor[0] + ny = x[1] - anchor[1] + if nx == end_px and ny == end_py: + # other_edges_info[-1]['near_depth'] = abs(nx) + if mesh.nodes[x].get('far') is not None and len(mesh.nodes[x].get('far')) == 1: + other_edges_info[-1]['diff'] = abs(1./abs([*mesh.nodes[x].get('far')][0][2]) - 1./abs(x[2])) + else: + other_edges_info[-1]['diff'] = 0 + # if end_group[nx, ny] != end_id and end_group[nx, ny] > 0: + # continue + try: + if isolate_condition[nx, ny] == 1: + other_edges_info[-1]['edge_map'][nx, ny] = 1 + except: + pass + try: + other_edges_info = sorted(other_edges_info, key=lambda x : x['diff'], reverse=True) + except: + import pdb + pdb.set_trace() + # import pdb + # pdb.set_trace() + # other_edges = other_edges[..., None] + for other_edge in other_edges_info: + if other_edge['end_point_map'] is None: + import pdb + pdb.set_trace() + + other_edges = other_edges * context + + return other_edges, other_edges_info + +def require_depth_edge(context_edge, mask): + dilate_mask = cv2.dilate(mask, np.array([[1,1,1],[1,1,1],[1,1,1]]).astype(np.uint8), iterations=1) + if (dilate_mask * context_edge).max() == 0: + return False + else: + return True + +def refine_color_around_edge(mesh, info_on_pix, edge_ccs, config, spdb=False): + H, W = mesh.graph['H'], mesh.graph['W'] + tmp_edge_ccs = copy.deepcopy(edge_ccs) + for edge_id, edge_cc in enumerate(edge_ccs): + if len(edge_cc) == 0: + continue + near_maps = np.zeros((H, W)).astype(bool) + far_maps = np.zeros((H, W)).astype(bool) + tmp_far_nodes = set() + far_nodes = set() + near_nodes = set() + end_nodes = set() + for i in range(5): + if i == 0: + for edge_node in edge_cc: + if mesh.nodes[edge_node].get('depth_edge_dilate_2_color_flag') is not True: + break + if mesh.nodes[edge_node].get('inpaint_id') == 1: + near_nodes.add(edge_node) + tmp_node = mesh.nodes[edge_node].get('far') + tmp_node = set(tmp_node) if tmp_node is not None else set() + tmp_far_nodes |= tmp_node + rmv_tmp_far_nodes = set() + for far_node in tmp_far_nodes: + if not(mesh.has_node(far_node) and mesh.nodes[far_node].get('inpaint_id') == 1): + rmv_tmp_far_nodes.add(far_node) + if len(tmp_far_nodes - rmv_tmp_far_nodes) == 0: + break + else: + for near_node in near_nodes: + near_maps[near_node[0], near_node[1]] = True + mesh.nodes[near_node]['refine_rgbd'] = True + mesh.nodes[near_node]['backup_depth'] = near_node[2] \ + if mesh.nodes[near_node].get('real_depth') is None else mesh.nodes[near_node]['real_depth'] + mesh.nodes[near_node]['backup_color'] = mesh.nodes[near_node]['color'] + for far_node in tmp_far_nodes: + if mesh.has_node(far_node) and mesh.nodes[far_node].get('inpaint_id') == 1: + far_nodes.add(far_node) + far_maps[far_node[0], far_node[1]] = True + mesh.nodes[far_node]['refine_rgbd'] = True + mesh.nodes[far_node]['backup_depth'] = far_node[2] \ + if mesh.nodes[far_node].get('real_depth') is None else mesh.nodes[far_node]['real_depth'] + mesh.nodes[far_node]['backup_color'] = mesh.nodes[far_node]['color'] + tmp_far_nodes = far_nodes + tmp_near_nodes = near_nodes + else: + tmp_far_nodes = new_tmp_far_nodes + tmp_near_nodes = new_tmp_near_nodes + new_tmp_far_nodes = None + new_tmp_near_nodes = None + new_tmp_far_nodes = set() + new_tmp_near_nodes = set() + for node in tmp_near_nodes: + for ne_node in mesh.neighbors(node): + if far_maps[ne_node[0], ne_node[1]] == False and \ + near_maps[ne_node[0], ne_node[1]] == False: + if mesh.nodes[ne_node].get('inpaint_id') == 1: + new_tmp_near_nodes.add(ne_node) + near_maps[ne_node[0], ne_node[1]] = True + mesh.nodes[ne_node]['refine_rgbd'] = True + mesh.nodes[ne_node]['backup_depth'] = ne_node[2] \ + if mesh.nodes[ne_node].get('real_depth') is None else mesh.nodes[ne_node]['real_depth'] + mesh.nodes[ne_node]['backup_color'] = mesh.nodes[ne_node]['color'] + else: + mesh.nodes[ne_node]['backup_depth'] = ne_node[2] \ + if mesh.nodes[ne_node].get('real_depth') is None else mesh.nodes[ne_node]['real_depth'] + mesh.nodes[ne_node]['backup_color'] = mesh.nodes[ne_node]['color'] + end_nodes.add(node) + near_nodes.update(new_tmp_near_nodes) + for node in tmp_far_nodes: + for ne_node in mesh.neighbors(node): + if far_maps[ne_node[0], ne_node[1]] == False and \ + near_maps[ne_node[0], ne_node[1]] == False: + if mesh.nodes[ne_node].get('inpaint_id') == 1: + new_tmp_far_nodes.add(ne_node) + far_maps[ne_node[0], ne_node[1]] = True + mesh.nodes[ne_node]['refine_rgbd'] = True + mesh.nodes[ne_node]['backup_depth'] = ne_node[2] \ + if mesh.nodes[ne_node].get('real_depth') is None else mesh.nodes[ne_node]['real_depth'] + mesh.nodes[ne_node]['backup_color'] = mesh.nodes[ne_node]['color'] + else: + mesh.nodes[ne_node]['backup_depth'] = ne_node[2] \ + if mesh.nodes[ne_node].get('real_depth') is None else mesh.nodes[ne_node]['real_depth'] + mesh.nodes[ne_node]['backup_color'] = mesh.nodes[ne_node]['color'] + end_nodes.add(node) + far_nodes.update(new_tmp_far_nodes) + if len(far_nodes) == 0: + tmp_edge_ccs[edge_id] = set() + continue + for node in new_tmp_far_nodes | new_tmp_near_nodes: + for ne_node in mesh.neighbors(node): + if far_maps[ne_node[0], ne_node[1]] == False and near_maps[ne_node[0], ne_node[1]] == False: + end_nodes.add(node) + mesh.nodes[ne_node]['backup_depth'] = ne_node[2] \ + if mesh.nodes[ne_node].get('real_depth') is None else mesh.nodes[ne_node]['real_depth'] + mesh.nodes[ne_node]['backup_color'] = mesh.nodes[ne_node]['color'] + tmp_end_nodes = end_nodes + + refine_nodes = near_nodes | far_nodes + remain_refine_nodes = copy.deepcopy(refine_nodes) + accum_idx = 0 + while len(remain_refine_nodes) > 0: + accum_idx += 1 + if accum_idx > 100: + break + new_tmp_end_nodes = None + new_tmp_end_nodes = set() + survive_tmp_end_nodes = set() + for node in tmp_end_nodes: + re_depth, re_color, re_count = 0, np.array([0., 0., 0.]), 0 + for ne_node in mesh.neighbors(node): + if mesh.nodes[ne_node].get('refine_rgbd') is True: + if ne_node not in tmp_end_nodes: + new_tmp_end_nodes.add(ne_node) + else: + try: + re_depth += mesh.nodes[ne_node]['backup_depth'] + re_color += mesh.nodes[ne_node]['backup_color'].astype(np.float32) + re_count += 1. + except: + raise #import pdb; pdb.set_trace() #bty + if re_count > 0: + re_depth = re_depth / re_count + re_color = re_color / re_count + mesh.nodes[node]['backup_depth'] = re_depth + mesh.nodes[node]['backup_color'] = re_color + mesh.nodes[node]['refine_rgbd'] = False + else: + survive_tmp_end_nodes.add(node) + for node in tmp_end_nodes - survive_tmp_end_nodes: + if node in remain_refine_nodes: + remain_refine_nodes.remove(node) + tmp_end_nodes = new_tmp_end_nodes + if spdb == True: + bfrd_canvas = np.zeros((H, W)) + bfrc_canvas = np.zeros((H, W, 3)).astype(np.uint8) + aftd_canvas = np.zeros((H, W)) + aftc_canvas = np.zeros((H, W, 3)).astype(np.uint8) + for node in refine_nodes: + bfrd_canvas[node[0], node[1]] = abs(node[2]) + aftd_canvas[node[0], node[1]] = abs(mesh.nodes[node]['backup_depth']) + bfrc_canvas[node[0], node[1]] = mesh.nodes[node]['color'].astype(np.uint8) + aftc_canvas[node[0], node[1]] = mesh.nodes[node]['backup_color'].astype(np.uint8) + f, (ax1, ax2, ax3, ax4) = plt.subplots(1, 4, sharex=True, sharey=True); + ax1.imshow(bfrd_canvas); + ax2.imshow(aftd_canvas); + ax3.imshow(bfrc_canvas); + ax4.imshow(aftc_canvas); + plt.show() + import pdb; pdb.set_trace() + for node in refine_nodes: + if mesh.nodes[node].get('refine_rgbd') is not None: + mesh.nodes[node].pop('refine_rgbd') + mesh.nodes[node]['color'] = mesh.nodes[node]['backup_color'] + for info in info_on_pix[(node[0], node[1])]: + if info['depth'] == node[2]: + info['color'] = mesh.nodes[node]['backup_color'] + + return mesh, info_on_pix + +def refine_depth_around_edge(mask_depth, far_edge, uncleaned_far_edge, near_edge, mask, all_depth, config): + if isinstance(mask_depth, torch.Tensor): + if mask_depth.is_cuda: + mask_depth = mask_depth.cpu() + mask_depth = mask_depth.data + mask_depth = mask_depth.numpy() + if isinstance(far_edge, torch.Tensor): + if far_edge.is_cuda: + far_edge = far_edge.cpu() + far_edge = far_edge.data + far_edge = far_edge.numpy() + if isinstance(uncleaned_far_edge, torch.Tensor): + if uncleaned_far_edge.is_cuda: + uncleaned_far_edge = uncleaned_far_edge.cpu() + uncleaned_far_edge = uncleaned_far_edge.data + uncleaned_far_edge = uncleaned_far_edge.numpy() + if isinstance(near_edge, torch.Tensor): + if near_edge.is_cuda: + near_edge = near_edge.cpu() + near_edge = near_edge.data + near_edge = near_edge.numpy() + if isinstance(mask, torch.Tensor): + if mask.is_cuda: + mask = mask.cpu() + mask = mask.data + mask = mask.numpy() + mask = mask.squeeze() + uncleaned_far_edge = uncleaned_far_edge.squeeze() + far_edge = far_edge.squeeze() + near_edge = near_edge.squeeze() + mask_depth = mask_depth.squeeze() + dilate_far_edge = cv2.dilate(uncleaned_far_edge.astype(np.uint8), kernel=np.array([[0,1,0],[1,1,1],[0,1,0]]).astype(np.uint8), iterations=1) + near_edge[dilate_far_edge == 0] = 0 + dilate_near_edge = cv2.dilate(near_edge.astype(np.uint8), kernel=np.array([[0,1,0],[1,1,1],[0,1,0]]).astype(np.uint8), iterations=1) + far_edge[dilate_near_edge == 0] = 0 + init_far_edge = far_edge.copy() + init_near_edge = near_edge.copy() + for i in range(config['depth_edge_dilate_2']): + init_far_edge = cv2.dilate(init_far_edge, kernel=np.array([[0,1,0],[1,1,1],[0,1,0]]).astype(np.uint8), iterations=1) + init_far_edge[init_near_edge == 1] = 0 + init_near_edge = cv2.dilate(init_near_edge, kernel=np.array([[0,1,0],[1,1,1],[0,1,0]]).astype(np.uint8), iterations=1) + init_near_edge[init_far_edge == 1] = 0 + init_far_edge[mask == 0] = 0 + init_near_edge[mask == 0] = 0 + hole_far_edge = 1 - init_far_edge + hole_near_edge = 1 - init_near_edge + change = None + while True: + change = False + hole_far_edge[init_near_edge == 1] = 0 + hole_near_edge[init_far_edge == 1] = 0 + far_pxs, far_pys = np.where((hole_far_edge == 0) * (init_far_edge == 1) > 0) + current_hole_far_edge = hole_far_edge.copy() + for far_px, far_py in zip(far_pxs, far_pys): + min_px = max(far_px - 1, 0) + max_px = min(far_px + 2, mask.shape[0]-1) + min_py = max(far_py - 1, 0) + max_py = min(far_py + 2, mask.shape[1]-1) + hole_far = current_hole_far_edge[min_px: max_px, min_py: max_py] + tmp_mask = mask[min_px: max_px, min_py: max_py] + all_depth_patch = all_depth[min_px: max_px, min_py: max_py] * 0 + all_depth_mask = (all_depth_patch != 0).astype(np.uint8) + cross_element = np.array([[0,1,0],[1,1,1],[0,1,0]])[min_px - (far_px - 1): max_px - (far_px - 1), min_py - (far_py - 1): max_py - (far_py - 1)] + combine_mask = (tmp_mask + all_depth_mask).clip(0, 1) * hole_far * cross_element + tmp_patch = combine_mask * (mask_depth[min_px: max_px, min_py: max_py] + all_depth_patch) + number = np.count_nonzero(tmp_patch) + if number > 0: + mask_depth[far_px, far_py] = np.sum(tmp_patch).astype(np.float32) / max(number, 1e-6) + hole_far_edge[far_px, far_py] = 1 + change = True + near_pxs, near_pys = np.where((hole_near_edge == 0) * (init_near_edge == 1) > 0) + current_hole_near_edge = hole_near_edge.copy() + for near_px, near_py in zip(near_pxs, near_pys): + min_px = max(near_px - 1, 0) + max_px = min(near_px + 2, mask.shape[0]-1) + min_py = max(near_py - 1, 0) + max_py = min(near_py + 2, mask.shape[1]-1) + hole_near = current_hole_near_edge[min_px: max_px, min_py: max_py] + tmp_mask = mask[min_px: max_px, min_py: max_py] + all_depth_patch = all_depth[min_px: max_px, min_py: max_py] * 0 + all_depth_mask = (all_depth_patch != 0).astype(np.uint8) + cross_element = np.array([[0,1,0],[1,1,1],[0,1,0]])[min_px - near_px + 1:max_px - near_px + 1, min_py - near_py + 1:max_py - near_py + 1] + combine_mask = (tmp_mask + all_depth_mask).clip(0, 1) * hole_near * cross_element + tmp_patch = combine_mask * (mask_depth[min_px: max_px, min_py: max_py] + all_depth_patch) + number = np.count_nonzero(tmp_patch) + if number > 0: + mask_depth[near_px, near_py] = np.sum(tmp_patch) / max(number, 1e-6) + hole_near_edge[near_px, near_py] = 1 + change = True + if change is False: + break + + return mask_depth + + + +def vis_depth_edge_connectivity(depth, config): + disp = 1./depth + u_diff = (disp[1:, :] - disp[:-1, :])[:-1, 1:-1] + b_diff = (disp[:-1, :] - disp[1:, :])[1:, 1:-1] + l_diff = (disp[:, 1:] - disp[:, :-1])[1:-1, :-1] + r_diff = (disp[:, :-1] - disp[:, 1:])[1:-1, 1:] + u_over = (np.abs(u_diff) > config['depth_threshold']).astype(np.float32) + b_over = (np.abs(b_diff) > config['depth_threshold']).astype(np.float32) + l_over = (np.abs(l_diff) > config['depth_threshold']).astype(np.float32) + r_over = (np.abs(r_diff) > config['depth_threshold']).astype(np.float32) + concat_diff = np.stack([u_diff, b_diff, r_diff, l_diff], axis=-1) + concat_over = np.stack([u_over, b_over, r_over, l_over], axis=-1) + over_diff = concat_diff * concat_over + pos_over = (over_diff > 0).astype(np.float32).sum(-1).clip(0, 1) + neg_over = (over_diff < 0).astype(np.float32).sum(-1).clip(0, 1) + neg_over[(over_diff > 0).astype(np.float32).sum(-1) > 0] = 0 + _, edge_label = cv2.connectedComponents(pos_over.astype(np.uint8), connectivity=8) + T_junction_maps = np.zeros_like(pos_over) + for edge_id in range(1, edge_label.max() + 1): + edge_map = (edge_label == edge_id).astype(np.uint8) + edge_map = np.pad(edge_map, pad_width=((1,1),(1,1)), mode='constant') + four_direc = np.roll(edge_map, 1, 1) + np.roll(edge_map, -1, 1) + np.roll(edge_map, 1, 0) + np.roll(edge_map, -1, 0) + eight_direc = np.roll(np.roll(edge_map, 1, 1), 1, 0) + np.roll(np.roll(edge_map, 1, 1), -1, 0) + \ + np.roll(np.roll(edge_map, -1, 1), 1, 0) + np.roll(np.roll(edge_map, -1, 1), -1, 0) + eight_direc = (eight_direc + four_direc)[1:-1,1:-1] + pos_over[eight_direc > 2] = 0 + T_junction_maps[eight_direc > 2] = 1 + _, edge_label = cv2.connectedComponents(pos_over.astype(np.uint8), connectivity=8) + edge_label = np.pad(edge_label, 1, mode='constant') + + return edge_label + + + +def max_size(mat, value=0): + if not (mat and mat[0]): return (0, 0) + it = iter(mat) + prev = [(el==value) for el in next(it)] + max_size = max_rectangle_size(prev) + for row in it: + hist = [(1+h) if el == value else 0 for h, el in zip(prev, row)] + max_size = max(max_size, max_rectangle_size(hist), key=get_area) + prev = hist + return max_size + +def max_rectangle_size(histogram): + Info = namedtuple('Info', 'start height') + stack = [] + top = lambda: stack[-1] + max_size = (0, 0) # height, width of the largest rectangle + pos = 0 # current position in the histogram + for pos, height in enumerate(histogram): + start = pos # position where rectangle starts + while True: + if not stack or height > top().height: + stack.append(Info(start, height)) # push + if stack and height < top().height: + max_size = max(max_size, (top().height, (pos-top().start)), + key=get_area) + start, _ = stack.pop() + continue + break # height == top().height goes here + + pos += 1 + for start, height in stack: + max_size = max(max_size, (height, (pos-start)), + key=get_area) + + return max_size + +def get_area(size): + return reduce(mul, size) + +def find_anchors(matrix): + matrix = [[*x] for x in matrix] + mh, mw = max_size(matrix) + matrix = np.array(matrix) + # element = np.zeros((mh, mw)) + for i in range(matrix.shape[0] + 1 - mh): + for j in range(matrix.shape[1] + 1 - mw): + if matrix[i:i + mh, j:j + mw].max() == 0: + return i, i + mh, j, j + mw + +def find_largest_rect(dst_img, bg_color=(128, 128, 128)): + valid = np.any(dst_img[..., :3] != bg_color, axis=-1) + dst_h, dst_w = dst_img.shape[:2] + ret, labels = cv2.connectedComponents(np.uint8(valid == False)) + red_mat = np.zeros_like(labels) + # denoise + for i in range(1, np.max(labels)+1, 1): + x, y, w, h = cv2.boundingRect(np.uint8(labels==i)) + if x == 0 or (x+w) == dst_h or y == 0 or (y+h) == dst_w: + red_mat[labels==i] = 1 + # crop + t, b, l, r = find_anchors(red_mat) + + return t, b, l, r diff --git a/install.py b/install.py new file mode 100644 index 0000000000000000000000000000000000000000..06f28ca769c7f4b32f934efbb86f8cd6c21dd1f2 --- /dev/null +++ b/install.py @@ -0,0 +1,78 @@ +# Installs dependencies +# Make sure to add to requirements.txt - it can be used for the standalone mode + +import launch +import platform +import sys +import importlib.metadata + +# TODO: some dependencies apparently being reinstalled on every run. Investigate and fix. + +if sys.version_info < (3, 8): + launch.run_pip("install importlib-metadata", "importlib-metadata for depthmap script") + import importlib_metadata +else: + import importlib.metadata as importlib_metadata +if not launch.is_installed('packaging'): + launch.run_pip("install packaging", "packaging requirement for depthmap script") +from packaging.version import Version + +def ensure(module_name, min_version=None): + if launch.is_installed(module_name): + if min_version is None or Version(importlib_metadata.version(module_name)) >= Version(min_version): + return + requirement = f'{module_name}>={min_version}' if min_version is not None else module_name + cmd = f'install "{requirement}"' + msg = f'{requirement} requirement for depthmap script' + launch.run_pip(cmd, msg) + + +ensure('timm', '0.9.2') # For midas, specified just in case + +ensure('matplotlib') + +ensure('trimesh') + +ensure('numba', '0.57.0') +ensure('vispy', '0.13.0') + +ensure('rembg', '2.0.50') + +if not launch.is_installed("moviepy"): + launch.run_pip('install "moviepy==1.0.2"', "moviepy requirement for depthmap script") +ensure('transforms3d', '0.4.1') + +ensure('diffusers', '0.20.1') # For Merigold + +ensure('imageio') # 2.4.1 +try: # Dirty hack to not reinstall every time + importlib_metadata.version('imageio-ffmpeg') +except: + ensure('imageio-ffmpeg') + + +if not launch.is_installed("networkx"): + launch.run_pip('install install "networkx==2.5"', "networkx requirement for depthmap script") +if platform.system() == 'Windows': + ensure('pyqt5') + +if platform.system() == 'Darwin': + ensure('pyqt6') + ensure('PyOpenGL', '3.1.7') + +# Depth Anything +def get_installed_version(package: str): + try: + return importlib.metadata.version(package) + except Exception: + return None +def try_install_from_wheel(pkg_name: str, wheel_url: str): + if get_installed_version(pkg_name) is not None: + return + try: + launch.run_pip(f"install {wheel_url}", f" {pkg_name} requirement for depthmap script") + except Exception as e: + print('Failed to install wheel for Depth Anything support. It won\'t work.') +try_install_from_wheel( + "depth_anything", + "https://github.com/huchenlei/Depth-Anything/releases/download/v1.0.0/depth_anything-2024.1.22.0-py2.py3-none-any.whl") diff --git a/javascript/depthmap.js b/javascript/depthmap.js new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/lib/LICENSE b/lib/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..e2229a86419ec4ccd48ca6c1e07df17f5106b10d --- /dev/null +++ b/lib/LICENSE @@ -0,0 +1,15 @@ +Adobe Research License Terms + +1. You may use, reproduce, modify, and display the research materials provided under this license (the “Research +Materials”) solely for noncommercial purposes. Noncommercial purposes include academic research, teaching, and +testing, but do not include commercial licensing or distribution, development of commercial products, or any other +activity which results in commercial gain. You may not redistribute the Research Materials. + +2. You agree to (a) comply with all laws and regulations applicable to your use of the Research Materials under this license, +including but not limited to any import or export laws; (b) preserve any copyright or other notices from the Research +Materials; and (c) for any Research Materials in object code, not attempt to modify, reverse engineer, or decompile +such Research Materials except as permitted by applicable law. + +3. THE RESEARCH MATERIALS ARE PROVIDED “AS IS,” WITHOUT WARRANTY OF ANY KIND, AND YOU ASSUME ALL RISKS +ASSOCIATED WITH THEIR USE. IN NO EVENT WILL ANYONE BE LIABLE TO YOU FOR ANY ACTUAL, INCIDENTAL, SPECIAL, +OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR IN CONNECTION WITH USE OF THE RESEARCH MATERIALS. diff --git a/lib/Resnet.py b/lib/Resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..8d19038d217c7251ae516bf43f66f9e25c4b040c --- /dev/null +++ b/lib/Resnet.py @@ -0,0 +1,199 @@ +import torch.nn as nn +import torch.nn as NN + +__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', + 'resnet152'] + + +model_urls = { + 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', + 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', + 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', + 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', + 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', +} + + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = NN.BatchNorm2d(planes) #NN.BatchNorm2d + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = NN.BatchNorm2d(planes) #NN.BatchNorm2d + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = NN.BatchNorm2d(planes) #NN.BatchNorm2d + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, + padding=1, bias=False) + self.bn2 = NN.BatchNorm2d(planes) #NN.BatchNorm2d + self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) + self.bn3 = NN.BatchNorm2d(planes * self.expansion) #NN.BatchNorm2d + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class ResNet(nn.Module): + + def __init__(self, block, layers, num_classes=1000): + self.inplanes = 64 + super(ResNet, self).__init__() + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = NN.BatchNorm2d(64) #NN.BatchNorm2d + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + #self.avgpool = nn.AvgPool2d(7, stride=1) + #self.fc = nn.Linear(512 * block.expansion, num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + def _make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, + kernel_size=1, stride=stride, bias=False), + NN.BatchNorm2d(planes * block.expansion), #NN.BatchNorm2d + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + features = [] + + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + features.append(x) + x = self.layer2(x) + features.append(x) + x = self.layer3(x) + features.append(x) + x = self.layer4(x) + features.append(x) + + return features + + +def resnet18(pretrained=True, **kwargs): + """Constructs a ResNet-18 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) + return model + + +def resnet34(pretrained=True, **kwargs): + """Constructs a ResNet-34 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) + return model + + +def resnet50(pretrained=True, **kwargs): + """Constructs a ResNet-50 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) + + return model + + +def resnet101(pretrained=True, **kwargs): + """Constructs a ResNet-101 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) + + return model + + +def resnet152(pretrained=True, **kwargs): + """Constructs a ResNet-152 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) + return model diff --git a/lib/Resnext_torch.py b/lib/Resnext_torch.py new file mode 100644 index 0000000000000000000000000000000000000000..783f350f7cba8a62f9ee1f613336953fb396fc89 --- /dev/null +++ b/lib/Resnext_torch.py @@ -0,0 +1,247 @@ +#!/usr/bin/env python +# coding: utf-8 +import torch.nn as nn + +try: + from urllib import urlretrieve +except ImportError: + from urllib.request import urlretrieve + +__all__ = ['resnext101_32x8d'] + + +model_urls = { + 'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth', + 'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth', +} + + +def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=dilation, groups=groups, bias=False, dilation=dilation) + + +def conv1x1(in_planes, out_planes, stride=1): + """1x1 convolution""" + return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, + base_width=64, dilation=1, norm_layer=None): + super(BasicBlock, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + if groups != 1 or base_width != 64: + raise ValueError('BasicBlock only supports groups=1 and base_width=64') + if dilation > 1: + raise NotImplementedError("Dilation > 1 not supported in BasicBlock") + # Both self.conv1 and self.downsample layers downsample the input when stride != 1 + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = norm_layer(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = norm_layer(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2) + # while original implementation places the stride at the first 1x1 convolution(self.conv1) + # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385. + # This variant is also known as ResNet V1.5 and improves accuracy according to + # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch. + + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, + base_width=64, dilation=1, norm_layer=None): + super(Bottleneck, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + width = int(planes * (base_width / 64.)) * groups + # Both self.conv2 and self.downsample layers downsample the input when stride != 1 + self.conv1 = conv1x1(inplanes, width) + self.bn1 = norm_layer(width) + self.conv2 = conv3x3(width, width, stride, groups, dilation) + self.bn2 = norm_layer(width) + self.conv3 = conv1x1(width, planes * self.expansion) + self.bn3 = norm_layer(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + + +class ResNet(nn.Module): + + def __init__(self, block, layers, num_classes=1000, zero_init_residual=False, + groups=1, width_per_group=64, replace_stride_with_dilation=None, + norm_layer=None): + super(ResNet, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + self._norm_layer = norm_layer + + self.inplanes = 64 + self.dilation = 1 + if replace_stride_with_dilation is None: + # each element in the tuple indicates if we should replace + # the 2x2 stride with a dilated convolution instead + replace_stride_with_dilation = [False, False, False] + if len(replace_stride_with_dilation) != 3: + raise ValueError("replace_stride_with_dilation should be None " + "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) + self.groups = groups + self.base_width = width_per_group + self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = norm_layer(self.inplanes) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2, + dilate=replace_stride_with_dilation[0]) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2, + dilate=replace_stride_with_dilation[1]) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2, + dilate=replace_stride_with_dilation[2]) + #self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + #self.fc = nn.Linear(512 * block.expansion, num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + # Zero-initialize the last BN in each residual branch, + # so that the residual branch starts with zeros, and each residual block behaves like an identity. + # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 + if zero_init_residual: + for m in self.modules(): + if isinstance(m, Bottleneck): + nn.init.constant_(m.bn3.weight, 0) + elif isinstance(m, BasicBlock): + nn.init.constant_(m.bn2.weight, 0) + + def _make_layer(self, block, planes, blocks, stride=1, dilate=False): + norm_layer = self._norm_layer + downsample = None + previous_dilation = self.dilation + if dilate: + self.dilation *= stride + stride = 1 + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + conv1x1(self.inplanes, planes * block.expansion, stride), + norm_layer(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample, self.groups, + self.base_width, previous_dilation, norm_layer)) + self.inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append(block(self.inplanes, planes, groups=self.groups, + base_width=self.base_width, dilation=self.dilation, + norm_layer=norm_layer)) + + return nn.Sequential(*layers) + + def _forward_impl(self, x): + # See note [TorchScript super()] + features = [] + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + features.append(x) + + x = self.layer2(x) + features.append(x) + + x = self.layer3(x) + features.append(x) + + x = self.layer4(x) + features.append(x) + + #x = self.avgpool(x) + #x = torch.flatten(x, 1) + #x = self.fc(x) + + return features + + def forward(self, x): + return self._forward_impl(x) + + + +def resnext101_32x8d(pretrained=True, **kwargs): + """Constructs a ResNet-152 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + kwargs['groups'] = 32 + kwargs['width_per_group'] = 8 + + model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) + return model + + + +if __name__ == '__main__': + import torch + model = resnext101_32x8d(True).cuda() + + rgb = torch.rand((2, 3, 256, 256)).cuda() + out = model(rgb) + print(len(out)) + diff --git a/lib/__init__.py b/lib/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d3f5a12faa99758192ecc4ed3fc22c9249232e86 --- /dev/null +++ b/lib/__init__.py @@ -0,0 +1 @@ + diff --git a/lib/__pycache__/Resnet.cpython-310.pyc b/lib/__pycache__/Resnet.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ee68ed354886cdb2ffdeb34af452218090f802c5 Binary files /dev/null and b/lib/__pycache__/Resnet.cpython-310.pyc differ diff --git a/lib/__pycache__/Resnet.cpython-311.pyc b/lib/__pycache__/Resnet.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d362ee4cc2fae3d0a1d2b392d44689bb3ef0fbda Binary files /dev/null and b/lib/__pycache__/Resnet.cpython-311.pyc differ diff --git a/lib/__pycache__/Resnet.cpython-312.pyc b/lib/__pycache__/Resnet.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5eec69a45162d6ea045bfb667ea360d2191f40cc Binary files /dev/null and b/lib/__pycache__/Resnet.cpython-312.pyc differ diff --git a/lib/__pycache__/Resnext_torch.cpython-310.pyc b/lib/__pycache__/Resnext_torch.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7881ac6db8345686a519398cafb811fccfea5e67 Binary files /dev/null and b/lib/__pycache__/Resnext_torch.cpython-310.pyc differ diff --git a/lib/__pycache__/Resnext_torch.cpython-311.pyc b/lib/__pycache__/Resnext_torch.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fdf8316063935d2c5fde86f4854ed140c9517260 Binary files /dev/null and b/lib/__pycache__/Resnext_torch.cpython-311.pyc differ diff --git a/lib/__pycache__/Resnext_torch.cpython-312.pyc b/lib/__pycache__/Resnext_torch.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..076ea712f201a46ac52f471bebf1290ae2da7d43 Binary files /dev/null and b/lib/__pycache__/Resnext_torch.cpython-312.pyc differ diff --git a/lib/__pycache__/__init__.cpython-310.pyc b/lib/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dbd5893405a040cbaf0f14d0eb9a04ef2af5708c Binary files /dev/null and b/lib/__pycache__/__init__.cpython-310.pyc differ diff --git a/lib/__pycache__/__init__.cpython-311.pyc b/lib/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..942cfac0913e13f6d3fd926bf1dee5632664b5fb Binary files /dev/null and b/lib/__pycache__/__init__.cpython-311.pyc differ diff --git a/lib/__pycache__/__init__.cpython-312.pyc b/lib/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b674ccf1e22ec3727b55e609710fb6eed3dd69fa Binary files /dev/null and b/lib/__pycache__/__init__.cpython-312.pyc differ diff --git a/lib/__pycache__/multi_depth_model_woauxi.cpython-310.pyc b/lib/__pycache__/multi_depth_model_woauxi.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..eaff1cfc6f499b203926bdf6d6b98c1094ee5c84 Binary files /dev/null and b/lib/__pycache__/multi_depth_model_woauxi.cpython-310.pyc differ diff --git a/lib/__pycache__/multi_depth_model_woauxi.cpython-311.pyc b/lib/__pycache__/multi_depth_model_woauxi.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..301b70a96c3a135f099851c87de25f4b8078fd50 Binary files /dev/null and b/lib/__pycache__/multi_depth_model_woauxi.cpython-311.pyc differ diff --git a/lib/__pycache__/multi_depth_model_woauxi.cpython-312.pyc b/lib/__pycache__/multi_depth_model_woauxi.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8850db1621fb92e696261d0837e1841c9f308658 Binary files /dev/null and b/lib/__pycache__/multi_depth_model_woauxi.cpython-312.pyc differ diff --git a/lib/__pycache__/net_tools.cpython-310.pyc b/lib/__pycache__/net_tools.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4e04883703a6eb6d547b307cf3427e31f648cbfd Binary files /dev/null and b/lib/__pycache__/net_tools.cpython-310.pyc differ diff --git a/lib/__pycache__/net_tools.cpython-311.pyc b/lib/__pycache__/net_tools.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..72df87b87f0b78e95cde96ba419b23862a808cf0 Binary files /dev/null and b/lib/__pycache__/net_tools.cpython-311.pyc differ diff --git a/lib/__pycache__/net_tools.cpython-312.pyc b/lib/__pycache__/net_tools.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e744511353aa55f9b3fab8686249902bddb9c8ff Binary files /dev/null and b/lib/__pycache__/net_tools.cpython-312.pyc differ diff --git a/lib/__pycache__/network_auxi.cpython-310.pyc b/lib/__pycache__/network_auxi.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b936141241d5ef545d8618726f39f011c3393ea3 Binary files /dev/null and b/lib/__pycache__/network_auxi.cpython-310.pyc differ diff --git a/lib/__pycache__/network_auxi.cpython-311.pyc b/lib/__pycache__/network_auxi.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..11bf0eb9ace6367ec9aaafc4abbf367aac46d2c9 Binary files /dev/null and b/lib/__pycache__/network_auxi.cpython-311.pyc differ diff --git a/lib/__pycache__/network_auxi.cpython-312.pyc b/lib/__pycache__/network_auxi.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..35cb0c47f97615756cc24cbc13bbe6af70f944d0 Binary files /dev/null and b/lib/__pycache__/network_auxi.cpython-312.pyc differ diff --git a/lib/multi_depth_model_woauxi.py b/lib/multi_depth_model_woauxi.py new file mode 100644 index 0000000000000000000000000000000000000000..ab8cba8206d097b17fa2cdae019d4034d8313626 --- /dev/null +++ b/lib/multi_depth_model_woauxi.py @@ -0,0 +1,33 @@ +from lib import network_auxi as network +from lib.net_tools import get_func +import torch +import torch.nn as nn + +class RelDepthModel(nn.Module): + def __init__(self, backbone='resnet50'): + super(RelDepthModel, self).__init__() + if backbone == 'resnet50': + encoder = 'resnet50_stride32' + elif backbone == 'resnext101': + encoder = 'resnext101_stride32x8d' + self.depth_model = DepthModel(encoder) + + def inference(self, rgb): + with torch.no_grad(): + input = rgb.cuda() + depth = self.depth_model(input) + #pred_depth_out = depth - depth.min() + 0.01 + return depth #pred_depth_out + + +class DepthModel(nn.Module): + def __init__(self, encoder): + super(DepthModel, self).__init__() + backbone = network.__name__.split('.')[-1] + '.' + encoder + self.encoder_modules = get_func(backbone)() + self.decoder_modules = network.Decoder() + + def forward(self, x): + lateral_out = self.encoder_modules(x) + out_logit = self.decoder_modules(lateral_out) + return out_logit \ No newline at end of file diff --git a/lib/net_tools.py b/lib/net_tools.py new file mode 100644 index 0000000000000000000000000000000000000000..035a00f51dbc67841adecbf859357eb6de3e177c --- /dev/null +++ b/lib/net_tools.py @@ -0,0 +1,53 @@ +import importlib +import torch +import os +from collections import OrderedDict + + +def get_func(func_name): + """Helper to return a function object by name. func_name must identify a + function in this module or the path to a function relative to the base + 'modeling' module. + """ + if func_name == '': + return None + try: + parts = func_name.split('.') + # Refers to a function in this module + if len(parts) == 1: + return globals()[parts[0]] + # Otherwise, assume we're referencing a module under modeling + module_name = 'lib.' + '.'.join(parts[:-1]) + module = importlib.import_module(module_name) + return getattr(module, parts[-1]) + except Exception: + print('Failed to f1ind function: %s', func_name) + raise + +def load_ckpt(args, depth_model, shift_model, focal_model): + """ + Load checkpoint. + """ + if os.path.isfile(args.load_ckpt): + print("loading checkpoint %s" % args.load_ckpt) + checkpoint = torch.load(args.load_ckpt) + if shift_model is not None: + shift_model.load_state_dict(strip_prefix_if_present(checkpoint['shift_model'], 'module.'), + strict=True) + if focal_model is not None: + focal_model.load_state_dict(strip_prefix_if_present(checkpoint['focal_model'], 'module.'), + strict=True) + depth_model.load_state_dict(strip_prefix_if_present(checkpoint['depth_model'], "module."), + strict=True) + del checkpoint + torch.cuda.empty_cache() + + +def strip_prefix_if_present(state_dict, prefix): + keys = sorted(state_dict.keys()) + if not all(key.startswith(prefix) for key in keys): + return state_dict + stripped_state_dict = OrderedDict() + for key, value in state_dict.items(): + stripped_state_dict[key.replace(prefix, "")] = value + return stripped_state_dict \ No newline at end of file diff --git a/lib/network_auxi.py b/lib/network_auxi.py new file mode 100644 index 0000000000000000000000000000000000000000..b3364bc830ecc47440bb040634848eaf0214b8a4 --- /dev/null +++ b/lib/network_auxi.py @@ -0,0 +1,417 @@ +import torch +import torch.nn as nn +import torch.nn.init as init + +from lib import Resnet, Resnext_torch + + +def resnet50_stride32(): + return DepthNet(backbone='resnet', depth=50, upfactors=[2, 2, 2, 2]) + +def resnext101_stride32x8d(): + return DepthNet(backbone='resnext101_32x8d', depth=101, upfactors=[2, 2, 2, 2]) + + +class Decoder(nn.Module): + def __init__(self): + super(Decoder, self).__init__() + self.inchannels = [256, 512, 1024, 2048] + self.midchannels = [256, 256, 256, 512] + self.upfactors = [2,2,2,2] + self.outchannels = 1 + + self.conv = FTB(inchannels=self.inchannels[3], midchannels=self.midchannels[3]) + self.conv1 = nn.Conv2d(in_channels=self.midchannels[3], out_channels=self.midchannels[2], kernel_size=3, padding=1, stride=1, bias=True) + self.upsample = nn.Upsample(scale_factor=self.upfactors[3], mode='bilinear', align_corners=True) + + self.ffm2 = FFM(inchannels=self.inchannels[2], midchannels=self.midchannels[2], outchannels = self.midchannels[2], upfactor=self.upfactors[2]) + self.ffm1 = FFM(inchannels=self.inchannels[1], midchannels=self.midchannels[1], outchannels = self.midchannels[1], upfactor=self.upfactors[1]) + self.ffm0 = FFM(inchannels=self.inchannels[0], midchannels=self.midchannels[0], outchannels = self.midchannels[0], upfactor=self.upfactors[0]) + + self.outconv = AO(inchannels=self.midchannels[0], outchannels=self.outchannels, upfactor=2) + self._init_params() + + def _init_params(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + init.normal_(m.weight, std=0.01) + if m.bias is not None: + init.constant_(m.bias, 0) + elif isinstance(m, nn.ConvTranspose2d): + init.normal_(m.weight, std=0.01) + if m.bias is not None: + init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): #NN.BatchNorm2d + init.constant_(m.weight, 1) + init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + init.normal_(m.weight, std=0.01) + if m.bias is not None: + init.constant_(m.bias, 0) + + def forward(self, features): + x_32x = self.conv(features[3]) # 1/32 + x_32 = self.conv1(x_32x) + x_16 = self.upsample(x_32) # 1/16 + + x_8 = self.ffm2(features[2], x_16) # 1/8 + x_4 = self.ffm1(features[1], x_8) # 1/4 + x_2 = self.ffm0(features[0], x_4) # 1/2 + #----------------------------------------- + x = self.outconv(x_2) # original size + return x + +class DepthNet(nn.Module): + __factory = { + 18: Resnet.resnet18, + 34: Resnet.resnet34, + 50: Resnet.resnet50, + 101: Resnet.resnet101, + 152: Resnet.resnet152 + } + def __init__(self, + backbone='resnet', + depth=50, + upfactors=[2, 2, 2, 2]): + super(DepthNet, self).__init__() + self.backbone = backbone + self.depth = depth + self.pretrained = False + self.inchannels = [256, 512, 1024, 2048] + self.midchannels = [256, 256, 256, 512] + self.upfactors = upfactors + self.outchannels = 1 + + # Build model + if self.backbone == 'resnet': + if self.depth not in DepthNet.__factory: + raise KeyError("Unsupported depth:", self.depth) + self.encoder = DepthNet.__factory[depth](pretrained=self.pretrained) + elif self.backbone == 'resnext101_32x8d': + self.encoder = Resnext_torch.resnext101_32x8d(pretrained=self.pretrained) + else: + self.encoder = Resnext_torch.resnext101(pretrained=self.pretrained) + + def forward(self, x): + x = self.encoder(x) # 1/32, 1/16, 1/8, 1/4 + return x + + +class FTB(nn.Module): + def __init__(self, inchannels, midchannels=512): + super(FTB, self).__init__() + self.in1 = inchannels + self.mid = midchannels + self.conv1 = nn.Conv2d(in_channels=self.in1, out_channels=self.mid, kernel_size=3, padding=1, stride=1, + bias=True) + # NN.BatchNorm2d + self.conv_branch = nn.Sequential(nn.ReLU(inplace=True), \ + nn.Conv2d(in_channels=self.mid, out_channels=self.mid, kernel_size=3, + padding=1, stride=1, bias=True), \ + nn.BatchNorm2d(num_features=self.mid), \ + nn.ReLU(inplace=True), \ + nn.Conv2d(in_channels=self.mid, out_channels=self.mid, kernel_size=3, + padding=1, stride=1, bias=True)) + self.relu = nn.ReLU(inplace=True) + + self.init_params() + + def forward(self, x): + x = self.conv1(x) + x = x + self.conv_branch(x) + x = self.relu(x) + + return x + + def init_params(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + init.normal_(m.weight, std=0.01) + if m.bias is not None: + init.constant_(m.bias, 0) + elif isinstance(m, nn.ConvTranspose2d): + # init.kaiming_normal_(m.weight, mode='fan_out') + init.normal_(m.weight, std=0.01) + # init.xavier_normal_(m.weight) + if m.bias is not None: + init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): # NN.BatchNorm2d + init.constant_(m.weight, 1) + init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + init.normal_(m.weight, std=0.01) + if m.bias is not None: + init.constant_(m.bias, 0) + + +class ATA(nn.Module): + def __init__(self, inchannels, reduction=8): + super(ATA, self).__init__() + self.inchannels = inchannels + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Sequential(nn.Linear(self.inchannels * 2, self.inchannels // reduction), + nn.ReLU(inplace=True), + nn.Linear(self.inchannels // reduction, self.inchannels), + nn.Sigmoid()) + self.init_params() + + def forward(self, low_x, high_x): + n, c, _, _ = low_x.size() + x = torch.cat([low_x, high_x], 1) + x = self.avg_pool(x) + x = x.view(n, -1) + x = self.fc(x).view(n, c, 1, 1) + x = low_x * x + high_x + + return x + + def init_params(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + # init.kaiming_normal_(m.weight, mode='fan_out') + # init.normal(m.weight, std=0.01) + init.xavier_normal_(m.weight) + if m.bias is not None: + init.constant_(m.bias, 0) + elif isinstance(m, nn.ConvTranspose2d): + # init.kaiming_normal_(m.weight, mode='fan_out') + # init.normal_(m.weight, std=0.01) + init.xavier_normal_(m.weight) + if m.bias is not None: + init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): # NN.BatchNorm2d + init.constant_(m.weight, 1) + init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + init.normal_(m.weight, std=0.01) + if m.bias is not None: + init.constant_(m.bias, 0) + + +class FFM(nn.Module): + def __init__(self, inchannels, midchannels, outchannels, upfactor=2): + super(FFM, self).__init__() + self.inchannels = inchannels + self.midchannels = midchannels + self.outchannels = outchannels + self.upfactor = upfactor + + self.ftb1 = FTB(inchannels=self.inchannels, midchannels=self.midchannels) + # self.ata = ATA(inchannels = self.midchannels) + self.ftb2 = FTB(inchannels=self.midchannels, midchannels=self.outchannels) + + self.upsample = nn.Upsample(scale_factor=self.upfactor, mode='bilinear', align_corners=True) + + self.init_params() + + def forward(self, low_x, high_x): + x = self.ftb1(low_x) + x = x + high_x + x = self.ftb2(x) + x = self.upsample(x) + + return x + + def init_params(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + # init.kaiming_normal_(m.weight, mode='fan_out') + init.normal_(m.weight, std=0.01) + # init.xavier_normal_(m.weight) + if m.bias is not None: + init.constant_(m.bias, 0) + elif isinstance(m, nn.ConvTranspose2d): + # init.kaiming_normal_(m.weight, mode='fan_out') + init.normal_(m.weight, std=0.01) + # init.xavier_normal_(m.weight) + if m.bias is not None: + init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): # NN.Batchnorm2d + init.constant_(m.weight, 1) + init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + init.normal_(m.weight, std=0.01) + if m.bias is not None: + init.constant_(m.bias, 0) + + +class AO(nn.Module): + # Adaptive output module + def __init__(self, inchannels, outchannels, upfactor=2): + super(AO, self).__init__() + self.inchannels = inchannels + self.outchannels = outchannels + self.upfactor = upfactor + + self.adapt_conv = nn.Sequential( + nn.Conv2d(in_channels=self.inchannels, out_channels=self.inchannels // 2, kernel_size=3, padding=1, + stride=1, bias=True), \ + nn.BatchNorm2d(num_features=self.inchannels // 2), \ + nn.ReLU(inplace=True), \ + nn.Conv2d(in_channels=self.inchannels // 2, out_channels=self.outchannels, kernel_size=3, padding=1, + stride=1, bias=True), \ + nn.Upsample(scale_factor=self.upfactor, mode='bilinear', align_corners=True)) + + self.init_params() + + def forward(self, x): + x = self.adapt_conv(x) + return x + + def init_params(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + # init.kaiming_normal_(m.weight, mode='fan_out') + init.normal_(m.weight, std=0.01) + # init.xavier_normal_(m.weight) + if m.bias is not None: + init.constant_(m.bias, 0) + elif isinstance(m, nn.ConvTranspose2d): + # init.kaiming_normal_(m.weight, mode='fan_out') + init.normal_(m.weight, std=0.01) + # init.xavier_normal_(m.weight) + if m.bias is not None: + init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): # NN.Batchnorm2d + init.constant_(m.weight, 1) + init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + init.normal_(m.weight, std=0.01) + if m.bias is not None: + init.constant_(m.bias, 0) + + + +# ============================================================================================================== + + +class ResidualConv(nn.Module): + def __init__(self, inchannels): + super(ResidualConv, self).__init__() + # NN.BatchNorm2d + self.conv = nn.Sequential( + # nn.BatchNorm2d(num_features=inchannels), + nn.ReLU(inplace=False), + # nn.Conv2d(in_channels=inchannels, out_channels=inchannels, kernel_size=3, padding=1, stride=1, groups=inchannels,bias=True), + # nn.Conv2d(in_channels=inchannels, out_channels=inchannels, kernel_size=1, padding=0, stride=1, groups=1,bias=True) + nn.Conv2d(in_channels=inchannels, out_channels=inchannels / 2, kernel_size=3, padding=1, stride=1, + bias=False), + nn.BatchNorm2d(num_features=inchannels / 2), + nn.ReLU(inplace=False), + nn.Conv2d(in_channels=inchannels / 2, out_channels=inchannels, kernel_size=3, padding=1, stride=1, + bias=False) + ) + self.init_params() + + def forward(self, x): + x = self.conv(x) + x + return x + + def init_params(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + # init.kaiming_normal_(m.weight, mode='fan_out') + init.normal_(m.weight, std=0.01) + # init.xavier_normal_(m.weight) + if m.bias is not None: + init.constant_(m.bias, 0) + elif isinstance(m, nn.ConvTranspose2d): + # init.kaiming_normal_(m.weight, mode='fan_out') + init.normal_(m.weight, std=0.01) + # init.xavier_normal_(m.weight) + if m.bias is not None: + init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): # NN.BatchNorm2d + init.constant_(m.weight, 1) + init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + init.normal_(m.weight, std=0.01) + if m.bias is not None: + init.constant_(m.bias, 0) + + +class FeatureFusion(nn.Module): + def __init__(self, inchannels, outchannels): + super(FeatureFusion, self).__init__() + self.conv = ResidualConv(inchannels=inchannels) + # NN.BatchNorm2d + self.up = nn.Sequential(ResidualConv(inchannels=inchannels), + nn.ConvTranspose2d(in_channels=inchannels, out_channels=outchannels, kernel_size=3, + stride=2, padding=1, output_padding=1), + nn.BatchNorm2d(num_features=outchannels), + nn.ReLU(inplace=True)) + + def forward(self, lowfeat, highfeat): + return self.up(highfeat + self.conv(lowfeat)) + + def init_params(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + # init.kaiming_normal_(m.weight, mode='fan_out') + init.normal_(m.weight, std=0.01) + # init.xavier_normal_(m.weight) + if m.bias is not None: + init.constant_(m.bias, 0) + elif isinstance(m, nn.ConvTranspose2d): + # init.kaiming_normal_(m.weight, mode='fan_out') + init.normal_(m.weight, std=0.01) + # init.xavier_normal_(m.weight) + if m.bias is not None: + init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): # NN.BatchNorm2d + init.constant_(m.weight, 1) + init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + init.normal_(m.weight, std=0.01) + if m.bias is not None: + init.constant_(m.bias, 0) + + +class SenceUnderstand(nn.Module): + def __init__(self, channels): + super(SenceUnderstand, self).__init__() + self.channels = channels + self.conv1 = nn.Sequential(nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1), + nn.ReLU(inplace=True)) + self.pool = nn.AdaptiveAvgPool2d(8) + self.fc = nn.Sequential(nn.Linear(512 * 8 * 8, self.channels), + nn.ReLU(inplace=True)) + self.conv2 = nn.Sequential( + nn.Conv2d(in_channels=self.channels, out_channels=self.channels, kernel_size=1, padding=0), + nn.ReLU(inplace=True)) + self.initial_params() + + def forward(self, x): + n, c, h, w = x.size() + x = self.conv1(x) + x = self.pool(x) + x = x.view(n, -1) + x = self.fc(x) + x = x.view(n, self.channels, 1, 1) + x = self.conv2(x) + x = x.repeat(1, 1, h, w) + return x + + def initial_params(self, dev=0.01): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + # print torch.sum(m.weight) + m.weight.data.normal_(0, dev) + if m.bias is not None: + m.bias.data.fill_(0) + elif isinstance(m, nn.ConvTranspose2d): + # print torch.sum(m.weight) + m.weight.data.normal_(0, dev) + if m.bias is not None: + m.bias.data.fill_(0) + elif isinstance(m, nn.Linear): + m.weight.data.normal_(0, dev) + + +if __name__ == '__main__': + net = DepthNet(depth=50, pretrained=True) + print(net) + inputs = torch.ones(4,3,128,128) + out = net(inputs) + print(out.size()) + diff --git a/lib/spvcnn_classsification.py b/lib/spvcnn_classsification.py new file mode 100644 index 0000000000000000000000000000000000000000..ad2d4d91491b01caed7a468f85f5416b10af8633 --- /dev/null +++ b/lib/spvcnn_classsification.py @@ -0,0 +1,160 @@ +import torch.nn as nn + +import torchsparse.nn as spnn +from torchsparse.point_tensor import PointTensor +from lib.spvcnn_utils import * +__all__ = ['SPVCNN_CLASSIFICATION'] + + + +class BasicConvolutionBlock(nn.Module): + def __init__(self, inc, outc, ks=3, stride=1, dilation=1): + super().__init__() + self.net = nn.Sequential( + spnn.Conv3d(inc, + outc, + kernel_size=ks, + dilation=dilation, + stride=stride), + spnn.BatchNorm(outc), + spnn.ReLU(True)) + + def forward(self, x): + out = self.net(x) + return out + + +class BasicDeconvolutionBlock(nn.Module): + def __init__(self, inc, outc, ks=3, stride=1): + super().__init__() + self.net = nn.Sequential( + spnn.Conv3d(inc, + outc, + kernel_size=ks, + stride=stride, + transpose=True), + spnn.BatchNorm(outc), + spnn.ReLU(True)) + + def forward(self, x): + return self.net(x) + + +class ResidualBlock(nn.Module): + def __init__(self, inc, outc, ks=3, stride=1, dilation=1): + super().__init__() + self.net = nn.Sequential( + spnn.Conv3d(inc, + outc, + kernel_size=ks, + dilation=dilation, + stride=stride), spnn.BatchNorm(outc), + spnn.ReLU(True), + spnn.Conv3d(outc, + outc, + kernel_size=ks, + dilation=dilation, + stride=1), + spnn.BatchNorm(outc) + ) + + self.downsample = nn.Sequential() if (inc == outc and stride == 1) else \ + nn.Sequential( + spnn.Conv3d(inc, outc, kernel_size=1, dilation=1, stride=stride), + spnn.BatchNorm(outc) + ) + + self.relu = spnn.ReLU(True) + + def forward(self, x): + out = self.relu(self.net(x) + self.downsample(x)) + return out + + +class SPVCNN_CLASSIFICATION(nn.Module): + def __init__(self, **kwargs): + super().__init__() + + cr = kwargs.get('cr', 1.0) + cs = [32, 32, 64, 128, 256, 256, 128, 96, 96] + cs = [int(cr * x) for x in cs] + + if 'pres' in kwargs and 'vres' in kwargs: + self.pres = kwargs['pres'] + self.vres = kwargs['vres'] + + self.stem = nn.Sequential( + spnn.Conv3d(kwargs['input_channel'], cs[0], kernel_size=3, stride=1), + spnn.BatchNorm(cs[0]), + spnn.ReLU(True), + spnn.Conv3d(cs[0], cs[0], kernel_size=3, stride=1), + spnn.BatchNorm(cs[0]), + spnn.ReLU(True)) + + self.stage1 = nn.Sequential( + BasicConvolutionBlock(cs[0], cs[0], ks=2, stride=2, dilation=1), + ResidualBlock(cs[0], cs[1], ks=3, stride=1, dilation=1), + ResidualBlock(cs[1], cs[1], ks=3, stride=1, dilation=1), + ) + + self.stage2 = nn.Sequential( + BasicConvolutionBlock(cs[1], cs[1], ks=2, stride=2, dilation=1), + ResidualBlock(cs[1], cs[2], ks=3, stride=1, dilation=1), + ResidualBlock(cs[2], cs[2], ks=3, stride=1, dilation=1), + ) + + self.stage3 = nn.Sequential( + BasicConvolutionBlock(cs[2], cs[2], ks=2, stride=2, dilation=1), + ResidualBlock(cs[2], cs[3], ks=3, stride=1, dilation=1), + ResidualBlock(cs[3], cs[3], ks=3, stride=1, dilation=1), + ) + + self.stage4 = nn.Sequential( + BasicConvolutionBlock(cs[3], cs[3], ks=2, stride=2, dilation=1), + ResidualBlock(cs[3], cs[4], ks=3, stride=1, dilation=1), + ResidualBlock(cs[4], cs[4], ks=3, stride=1, dilation=1), + ) + self.avg_pool = spnn.GlobalAveragePooling() + self.classifier = nn.Sequential(nn.Linear(cs[4], kwargs['num_classes'])) + self.point_transforms = nn.ModuleList([ + nn.Sequential( + nn.Linear(cs[0], cs[4]), + nn.BatchNorm1d(cs[4]), + nn.ReLU(True), + ), + ]) + + self.weight_initialization() + self.dropout = nn.Dropout(0.3, True) + + def weight_initialization(self): + for m in self.modules(): + if isinstance(m, nn.BatchNorm1d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + def forward(self, x): + # x: SparseTensor z: PointTensor + z = PointTensor(x.F, x.C.float()) + + x0 = initial_voxelize(z, self.pres, self.vres) + + x0 = self.stem(x0) + z0 = voxel_to_point(x0, z, nearest=False) + z0.F = z0.F + + x1 = point_to_voxel(x0, z0) + x1 = self.stage1(x1) + x2 = self.stage2(x1) + x3 = self.stage3(x2) + x4 = self.stage4(x3) + z1 = voxel_to_point(x4, z0) + z1.F = z1.F + self.point_transforms[0](z0.F) + y1 = point_to_voxel(x4, z1) + pool = self.avg_pool(y1) + out = self.classifier(pool) + + + return out + + diff --git a/lib/spvcnn_utils.py b/lib/spvcnn_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..43f16bb3f0cedd470ffb792df95704dc26864b45 --- /dev/null +++ b/lib/spvcnn_utils.py @@ -0,0 +1,105 @@ +import torchsparse.nn.functional as spf +from torchsparse.point_tensor import PointTensor +from torchsparse.utils.kernel_region import * +from torchsparse.utils.helpers import * + + +__all__ = ['initial_voxelize', 'point_to_voxel', 'voxel_to_point'] + + +# z: PointTensor +# return: SparseTensor +def initial_voxelize(z, init_res, after_res): + new_float_coord = torch.cat( + [(z.C[:, :3] * init_res) / after_res, z.C[:, -1].view(-1, 1)], 1) + + pc_hash = spf.sphash(torch.floor(new_float_coord).int()) + sparse_hash = torch.unique(pc_hash) + idx_query = spf.sphashquery(pc_hash, sparse_hash) + counts = spf.spcount(idx_query.int(), len(sparse_hash)) + + inserted_coords = spf.spvoxelize(torch.floor(new_float_coord), idx_query, + counts) + inserted_coords = torch.round(inserted_coords).int() + inserted_feat = spf.spvoxelize(z.F, idx_query, counts) + + new_tensor = SparseTensor(inserted_feat, inserted_coords, 1) + new_tensor.check() + z.additional_features['idx_query'][1] = idx_query + z.additional_features['counts'][1] = counts + z.C = new_float_coord + + return new_tensor + + +# x: SparseTensor, z: PointTensor +# return: SparseTensor +def point_to_voxel(x, z): + if z.additional_features is None or z.additional_features.get('idx_query') is None\ + or z.additional_features['idx_query'].get(x.s) is None: + #pc_hash = hash_gpu(torch.floor(z.C).int()) + pc_hash = spf.sphash( + torch.cat([ + torch.floor(z.C[:, :3] / x.s).int() * x.s, + z.C[:, -1].int().view(-1, 1) + ], 1)) + sparse_hash = spf.sphash(x.C) + idx_query = spf.sphashquery(pc_hash, sparse_hash) + counts = spf.spcount(idx_query.int(), x.C.shape[0]) + z.additional_features['idx_query'][x.s] = idx_query + z.additional_features['counts'][x.s] = counts + else: + idx_query = z.additional_features['idx_query'][x.s] + counts = z.additional_features['counts'][x.s] + + inserted_feat = spf.spvoxelize(z.F, idx_query, counts) + new_tensor = SparseTensor(inserted_feat, x.C, x.s) + new_tensor.coord_maps = x.coord_maps + new_tensor.kernel_maps = x.kernel_maps + + return new_tensor + + +# x: SparseTensor, z: PointTensor +# return: PointTensor +def voxel_to_point(x, z, nearest=False): + if z.idx_query is None or z.weights is None or z.idx_query.get( + x.s) is None or z.weights.get(x.s) is None: + kr = KernelRegion(2, x.s, 1) + off = kr.get_kernel_offset().to(z.F.device) + #old_hash = kernel_hash_gpu(torch.floor(z.C).int(), off) + old_hash = spf.sphash( + torch.cat([ + torch.floor(z.C[:, :3] / x.s).int() * x.s, + z.C[:, -1].int().view(-1, 1) + ], 1), off) + pc_hash = spf.sphash(x.C.to(z.F.device)) + idx_query = spf.sphashquery(old_hash, pc_hash) + weights = spf.calc_ti_weights(z.C, idx_query, + scale=x.s).transpose(0, 1).contiguous() + idx_query = idx_query.transpose(0, 1).contiguous() + if nearest: + weights[:, 1:] = 0. + idx_query[:, 1:] = -1 + new_feat = spf.spdevoxelize(x.F, idx_query, weights) + new_tensor = PointTensor(new_feat, + z.C, + idx_query=z.idx_query, + weights=z.weights) + new_tensor.additional_features = z.additional_features + new_tensor.idx_query[x.s] = idx_query + new_tensor.weights[x.s] = weights + z.idx_query[x.s] = idx_query + z.weights[x.s] = weights + + else: + new_feat = spf.spdevoxelize(x.F, z.idx_query.get(x.s), z.weights.get(x.s)) + new_tensor = PointTensor(new_feat, + z.C, + idx_query=z.idx_query, + weights=z.weights) + new_tensor.additional_features = z.additional_features + + return new_tensor + + diff --git a/lib/test_utils.py b/lib/test_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..9b820bf4b057f1378a42bfbe338263e59668fb90 --- /dev/null +++ b/lib/test_utils.py @@ -0,0 +1,242 @@ +import torch +import numpy as np +from torchsparse import SparseTensor +from torchsparse.utils import sparse_collate_fn, sparse_quantize +from plyfile import PlyData, PlyElement +import os + +def init_image_coor(height, width, u0=None, v0=None): + u0 = width / 2.0 if u0 is None else u0 + v0 = height / 2.0 if v0 is None else v0 + + x_row = np.arange(0, width) + x = np.tile(x_row, (height, 1)) + x = x.astype(np.float32) + u_u0 = x - u0 + + y_col = np.arange(0, height) + y = np.tile(y_col, (width, 1)).T + y = y.astype(np.float32) + v_v0 = y - v0 + return u_u0, v_v0 + +def depth_to_pcd(depth, u_u0, v_v0, f, invalid_value=0): + mask_invalid = depth <= invalid_value + depth[mask_invalid] = 0.0 + x = u_u0 / f * depth + y = v_v0 / f * depth + z = depth + pcd = np.stack([x, y, z], axis=2) + return pcd, ~mask_invalid + +def pcd_to_sparsetensor(pcd, mask_valid, voxel_size=0.01, num_points=100000): + pcd_valid = pcd[mask_valid] + block_ = pcd_valid + block = np.zeros_like(block_) + block[:, :3] = block_[:, :3] + + pc_ = np.round(block_[:, :3] / voxel_size) + pc_ -= pc_.min(0, keepdims=1) + feat_ = block + + # transfer point cloud to voxels + inds = sparse_quantize(pc_, + feat_, + return_index=True, + return_invs=False) + if len(inds) > num_points: + inds = np.random.choice(inds, num_points, replace=False) + + pc = pc_[inds] + feat = feat_[inds] + lidar = SparseTensor(feat, pc) + feed_dict = [{'lidar': lidar}] + inputs = sparse_collate_fn(feed_dict) + return inputs + +def pcd_uv_to_sparsetensor(pcd, u_u0, v_v0, mask_valid, f= 500.0, voxel_size=0.01, mask_side=None, num_points=100000): + if mask_side is not None: + mask_valid = mask_valid & mask_side + pcd_valid = pcd[mask_valid] + u_u0_valid = u_u0[mask_valid][:, np.newaxis] / f + v_v0_valid = v_v0[mask_valid][:, np.newaxis] / f + + block_ = np.concatenate([pcd_valid, u_u0_valid, v_v0_valid], axis=1) + block = np.zeros_like(block_) + block[:, :] = block_[:, :] + + + pc_ = np.round(block_[:, :3] / voxel_size) + pc_ -= pc_.min(0, keepdims=1) + feat_ = block + + # transfer point cloud to voxels + inds = sparse_quantize(pc_, + feat_, + return_index=True, + return_invs=False) + if len(inds) > num_points: + inds = np.random.choice(inds, num_points, replace=False) + + pc = pc_[inds] + feat = feat_[inds] + lidar = SparseTensor(feat, pc) + feed_dict = [{'lidar': lidar}] + inputs = sparse_collate_fn(feed_dict) + return inputs + + +def refine_focal_one_step(depth, focal, model, u0, v0): + # reconstruct PCD from depth + u_u0, v_v0 = init_image_coor(depth.shape[0], depth.shape[1], u0=u0, v0=v0) + pcd, mask_valid = depth_to_pcd(depth, u_u0, v_v0, f=focal, invalid_value=0) + # input for the voxelnet + feed_dict = pcd_uv_to_sparsetensor(pcd, u_u0, v_v0, mask_valid, f=focal, voxel_size=0.005, mask_side=None) + inputs = feed_dict['lidar'].cuda() + + outputs = model(inputs) + return outputs + +def refine_shift_one_step(depth_wshift, model, focal, u0, v0): + # reconstruct PCD from depth + u_u0, v_v0 = init_image_coor(depth_wshift.shape[0], depth_wshift.shape[1], u0=u0, v0=v0) + pcd_wshift, mask_valid = depth_to_pcd(depth_wshift, u_u0, v_v0, f=focal, invalid_value=0) + # input for the voxelnet + feed_dict = pcd_to_sparsetensor(pcd_wshift, mask_valid, voxel_size=0.01) + inputs = feed_dict['lidar'].cuda() + + outputs = model(inputs) + return outputs + +def refine_focal(depth, focal, model, u0, v0): + last_scale = 1 + focal_tmp = np.copy(focal) + for i in range(1): + scale = refine_focal_one_step(depth, focal_tmp, model, u0, v0) + focal_tmp = focal_tmp / scale.item() + last_scale = last_scale * scale + return torch.tensor([[last_scale]]) + +def refine_shift(depth_wshift, model, focal, u0, v0): + depth_wshift_tmp = np.copy(depth_wshift) + last_shift = 0 + for i in range(1): + shift = refine_shift_one_step(depth_wshift_tmp, model, focal, u0, v0) + shift = shift if shift.item() < 0.7 else torch.tensor([[0.7]]) + depth_wshift_tmp -= shift.item() + last_shift += shift.item() + return torch.tensor([[last_shift]]) + +def reconstruct_3D(depth, f): + """ + Reconstruct depth to 3D pointcloud with the provided focal length. + Return: + pcd: N X 3 array, point cloud + """ + cu = depth.shape[1] / 2 + cv = depth.shape[0] / 2 + width = depth.shape[1] + height = depth.shape[0] + row = np.arange(0, width, 1) + u = np.array([row for i in np.arange(height)]) + col = np.arange(0, height, 1) + v = np.array([col for i in np.arange(width)]) + v = v.transpose(1, 0) + + if f > 1e5: + print('Infinit focal length!!!') + x = u - cu + y = v - cv + z = depth / depth.max() * x.max() + else: + x = (u - cu) * depth / f + y = (v - cv) * depth / f + z = depth + + x = np.reshape(x, (width * height, 1)).astype(float) + y = np.reshape(y, (width * height, 1)).astype(float) + z = np.reshape(z, (width * height, 1)).astype(float) + pcd = np.concatenate((x, y, z), axis=1) + pcd = pcd.astype(int) + return pcd + +def save_point_cloud(pcd, rgb, filename, binary=True): + """Save an RGB point cloud as a PLY file. + + :paras + @pcd: Nx3 matrix, the XYZ coordinates + @rgb: NX3 matrix, the rgb colors for each 3D point + """ + assert pcd.shape[0] == rgb.shape[0] + + if rgb is None: + gray_concat = np.tile(np.array([128], dtype=np.uint8), (pcd.shape[0], 3)) + points_3d = np.hstack((pcd, gray_concat)) + else: + points_3d = np.hstack((pcd, rgb)) + python_types = (float, float, float, int, int, int) + npy_types = [('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('red', 'u1'), ('green', 'u1'), + ('blue', 'u1')] + if binary is True: + # Format into NumPy structured array + vertices = [] + for row_idx in range(points_3d.shape[0]): + cur_point = points_3d[row_idx] + vertices.append(tuple(dtype(point) for dtype, point in zip(python_types, cur_point))) + vertices_array = np.array(vertices, dtype=npy_types) + el = PlyElement.describe(vertices_array, 'vertex') + + # Write + PlyData([el]).write(filename) + else: + x = np.squeeze(points_3d[:, 0]) + y = np.squeeze(points_3d[:, 1]) + z = np.squeeze(points_3d[:, 2]) + r = np.squeeze(points_3d[:, 3]) + g = np.squeeze(points_3d[:, 4]) + b = np.squeeze(points_3d[:, 5]) + + ply_head = 'ply\n' \ + 'format ascii 1.0\n' \ + 'element vertex %d\n' \ + 'property float x\n' \ + 'property float y\n' \ + 'property float z\n' \ + 'property uchar red\n' \ + 'property uchar green\n' \ + 'property uchar blue\n' \ + 'end_header' % r.shape[0] + # ---- Save ply data to disk + np.savetxt(filename, np.column_stack((x, y, z, r, g, b)), fmt="%d %d %d %d %d %d", header=ply_head, comments='') + +def reconstruct_depth(depth, rgb, dir, pcd_name, focal): + """ + para disp: disparity, [h, w] + para rgb: rgb image, [h, w, 3], in rgb format + """ + rgb = np.squeeze(rgb) + depth = np.squeeze(depth) + + mask = depth < 1e-8 + depth[mask] = 0 + depth = depth / depth.max() * 10000 + + pcd = reconstruct_3D(depth, f=focal) + rgb_n = np.reshape(rgb, (-1, 3)) + save_point_cloud(pcd, rgb_n, os.path.join(dir, pcd_name + '.ply')) + + +def recover_metric_depth(pred, gt): + if type(pred).__module__ == torch.__name__: + pred = pred.cpu().numpy() + if type(gt).__module__ == torch.__name__: + gt = gt.cpu().numpy() + gt = gt.squeeze() + pred = pred.squeeze() + mask = (gt > 1e-8) & (pred > 1e-8) + + gt_mask = gt[mask] + pred_mask = pred[mask] + a, b = np.polyfit(pred_mask, gt_mask, deg=1) + pred_metric = a * pred + b + return pred_metric diff --git a/main.py b/main.py new file mode 100644 index 0000000000000000000000000000000000000000..b539fdbe2a59394dcb9679f6c92dfae6d321150d --- /dev/null +++ b/main.py @@ -0,0 +1,40 @@ +# This launches DepthMap without the AUTOMATIC1111/stable-diffusion-webui + +import argparse +import os +import pathlib + +import src.misc + + +def maybe_chdir(): + """Detects if DepthMap was installed as a stable-diffusion-webui script, but run without current directory set to + the stable-diffusion-webui root. Changes current directory if needed. + This is to avoid re-downloading models and putting results into a wrong folder.""" + try: + file_path = pathlib.Path(__file__) + path = file_path.parts + while len(path) > 0 and path[-1] != src.misc.REPOSITORY_NAME: + path = path[:-1] + if len(path) >= 2 and path[-1] == src.misc.REPOSITORY_NAME and path[-2] == "extensions": + path = path[:-2] + listdir = os.listdir(str(pathlib.Path(*path))) + if 'launch.py' in listdir and 'webui.py': + os.chdir(str(pathlib.Path(*path))) + except: + pass + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--share", help="Create public link", action='store_true') + parser.add_argument("--listen", help="Create public link", action='store_true') + parser.add_argument("--no_chdir", help="Do not try to use the root of stable-diffusion-webui", action='store_true') + args = parser.parse_args() + + print(f"{src.misc.SCRIPT_FULL_NAME} running in standalone mode!") + if not args.no_chdir: + maybe_chdir() + server_name = "0.0.0.0" if args.listen else None + import src.common_ui + src.common_ui.on_ui_tabs().launch(share=args.share, server_name=server_name) diff --git a/models/depth_anything_v2/depth_anything_v2_vitb.pth b/models/depth_anything_v2/depth_anything_v2_vitb.pth new file mode 100644 index 0000000000000000000000000000000000000000..b75f06d1059551eee7972956eb6ca18d30887057 --- /dev/null +++ b/models/depth_anything_v2/depth_anything_v2_vitb.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d2b7002e62d39d655571c371333340bd88f67ab95050c03591555aa05645328 +size 389961218 diff --git a/models/depth_anything_v2/depth_anything_v2_vitl.pth b/models/depth_anything_v2/depth_anything_v2_vitl.pth new file mode 100644 index 0000000000000000000000000000000000000000..241600d95e155ae70a693000db395d140359352a --- /dev/null +++ b/models/depth_anything_v2/depth_anything_v2_vitl.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7ea19fa0ed99244e67b624c72b8580b7e9553043245905be58796a608eb9345 +size 1341395338 diff --git a/models/depth_anything_v2/depth_anything_v2_vits.pth b/models/depth_anything_v2/depth_anything_v2_vits.pth new file mode 100644 index 0000000000000000000000000000000000000000..c82b5fd8eb774158e8ab9e3387f6fb035922337b --- /dev/null +++ b/models/depth_anything_v2/depth_anything_v2_vits.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:715fade13be8f229f8a70cc02066f656f2423a59effd0579197bbf57860e1378 +size 99218434 diff --git a/models/pix2pix/latest_net_G.pth b/models/pix2pix/latest_net_G.pth new file mode 100644 index 0000000000000000000000000000000000000000..863672ed308575a80038085d7b0bac9f9e7a2f82 --- /dev/null +++ b/models/pix2pix/latest_net_G.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50ec735d74ed6499562d898f41b49343e521808b8dae589aa3c2f5c9ac9f7462 +size 318268048 diff --git a/models/rem_bg/u2net.onnx b/models/rem_bg/u2net.onnx new file mode 100644 index 0000000000000000000000000000000000000000..d5e2c4d942dc1e3d0a5cc5b194516e9ddd70a3ed --- /dev/null +++ b/models/rem_bg/u2net.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d10d2f3bb75ae3b6d527c77944fc5e7dcd94b29809d47a739a7a728a912b491 +size 175997641 diff --git a/options.png b/options.png new file mode 100644 index 0000000000000000000000000000000000000000..c429cfbedede86778499404a054b68a35b7109d0 Binary files /dev/null and b/options.png differ diff --git a/outputs/depthmap-17278951300001.png b/outputs/depthmap-17278951300001.png new file mode 100644 index 0000000000000000000000000000000000000000..a0760dbfa327c290916f4a0782d7d502dcb320cf Binary files /dev/null and b/outputs/depthmap-17278951300001.png differ diff --git a/outputs/depthmap-17278951300002-left-right.png b/outputs/depthmap-17278951300002-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..ce9778325b8f4925e52ef2d16ee94ac5736d3cc9 Binary files /dev/null and b/outputs/depthmap-17278951300002-left-right.png differ diff --git a/outputs/depthmap-17278951300003-left-right.png b/outputs/depthmap-17278951300003-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..65a11499baf6bfd426f29985869892a9dedf9a2a Binary files /dev/null and b/outputs/depthmap-17278951300003-left-right.png differ diff --git a/outputs/depthmap-17278951300004-left-right.png b/outputs/depthmap-17278951300004-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..25378927c5b9d8fa2f7c0e220a2f9e68f048dc09 Binary files /dev/null and b/outputs/depthmap-17278951300004-left-right.png differ diff --git a/outputs/depthmap-17278951300005-left-right.png b/outputs/depthmap-17278951300005-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..2734c2a3ed00c43a6668da74c842688772e32e71 --- /dev/null +++ b/outputs/depthmap-17278951300005-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:949f4f20c97f919c7f307c48b116a5e319c9eb4a5430e794eac909bb849ed00d +size 1803845 diff --git a/outputs/depthmap-17278951300006-left-right.png b/outputs/depthmap-17278951300006-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..e49422cb85a569fe05c35788eb78b09a0753bce8 --- /dev/null +++ b/outputs/depthmap-17278951300006-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f99863ad9805b269386057b60d240c88d646694bbc66d5ed6fe10bb5cf8ce8b +size 1592157 diff --git a/outputs/depthmap-17278951300007-left-right.png b/outputs/depthmap-17278951300007-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..f64c69ce8e95a82e2281bf2ba0ef987be29b0e72 --- /dev/null +++ b/outputs/depthmap-17278951300007-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79fb86cbba68c9d41e9e2d6db53941c6399285829a8b9eb03180106ffe23212d +size 2028992 diff --git a/outputs/depthmap-17278951300008-left-right.png b/outputs/depthmap-17278951300008-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..3d30eb35f67b0222dc7706cd3a898b063050576e --- /dev/null +++ b/outputs/depthmap-17278951300008-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffc995b30ae8a52613f13fbe2f950b2a37c9853a32f69f091e5927a30a8ecdaf +size 1952699 diff --git a/outputs/depthmap-17278951300009-left-right.png b/outputs/depthmap-17278951300009-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..db5b61d4d8446a615c95bfd2e04d070e4b86f9e5 --- /dev/null +++ b/outputs/depthmap-17278951300009-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f129fdbdbf78216e858bccf90f3625d005c7f704c6d166c5eeaf44b95253083 +size 1998075 diff --git a/outputs/depthmap-17278951300010-left-right.png b/outputs/depthmap-17278951300010-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..5c18d6141fe34ffc505ca4ae018b0b4e572744ef --- /dev/null +++ b/outputs/depthmap-17278951300010-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d5ef1973c2e0de667887b807676d26ba7100f95d559513b87ad006fb4e49496 +size 1924145 diff --git a/outputs/depthmap-17278951300011-left-right.png b/outputs/depthmap-17278951300011-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..753ca29a048e975f125392775df488c6803fc9de --- /dev/null +++ b/outputs/depthmap-17278951300011-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc93c9ddb39a7c634ce1d5a280d589584173d326694f57fd3b1a55527f3f9217 +size 2114647 diff --git a/outputs/depthmap-17278951300012-left-right.png b/outputs/depthmap-17278951300012-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..08ca2f79b68fdaba84c2a0f9f8332f764a9736c3 --- /dev/null +++ b/outputs/depthmap-17278951300012-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd2132320f105ee88eae5f5af93bd7d24bb79e20811d243e01218d5f8af577fe +size 1963183 diff --git a/outputs/depthmap-17278951300013-left-right.png b/outputs/depthmap-17278951300013-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..4d84582878b7d148b7111844c081114e800c13ce --- /dev/null +++ b/outputs/depthmap-17278951300013-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5f1d54c6f3cdd7f850f3bb6e051e190ddcbc4cfe9fffebb7f54a1318f0c9acd +size 1944774 diff --git a/outputs/depthmap-17278951300014-left-right.png b/outputs/depthmap-17278951300014-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..8c724cde733faf64d6bad3192462ce90114221d7 --- /dev/null +++ b/outputs/depthmap-17278951300014-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd967edc1659c20d372d344c76e8ca7782b55a2255065755722649300bc6e899 +size 1680042 diff --git a/outputs/depthmap-17278951300015-left-right.png b/outputs/depthmap-17278951300015-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..3292c1140cd422cce2503ed36f3d8c7c47ebc2ba --- /dev/null +++ b/outputs/depthmap-17278951300015-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cd00ac1e56cab23a0ad9a6ff4d5201bb9f9da54314fdbe4e544a78624485558 +size 1511219 diff --git a/outputs/depthmap-17278951300016-left-right.png b/outputs/depthmap-17278951300016-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..c8394d325a3f8e1ce02919dd26d5aa0ede201e17 --- /dev/null +++ b/outputs/depthmap-17278951300016-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e419e744c6b2b0c2035323cd555c6dadce051e86a7b17bf9d026b9a9c82db6f +size 1605657 diff --git a/outputs/depthmap-17278951300017-left-right.png b/outputs/depthmap-17278951300017-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..21a5e0d277067339fc54a3960c838121e1374ea8 --- /dev/null +++ b/outputs/depthmap-17278951300017-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9def6f19bd81e065d1f1e45b390531204a042cd86c8be7478665fd3238f47da +size 1610305 diff --git a/outputs/depthmap-17278951300018-left-right.png b/outputs/depthmap-17278951300018-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..4934033658e63ae4aa58362782719949f4a86da7 --- /dev/null +++ b/outputs/depthmap-17278951300018-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fe375fd9c8a481b656961bffa3d370290bf93476b8d17865832a19a66d0f79d +size 1970309 diff --git a/outputs/depthmap-17278951300019-left-right.png b/outputs/depthmap-17278951300019-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..d84f6f75a339dabc2f4f748c9fba79f2acfbdc5c --- /dev/null +++ b/outputs/depthmap-17278951300019-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d126433e0005a34086f3ad7e0dd457544bb6fcc5454be4df4f6de3a45868d95 +size 1835762 diff --git a/outputs/depthmap-17278951300020-left-right.png b/outputs/depthmap-17278951300020-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..5f327075d26acd3d8c58c3b396e05f56651cacbb --- /dev/null +++ b/outputs/depthmap-17278951300020-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:967c13f6c0bcfad1c1ed35884e22cf78c0cb934dda417bea2f67b33ec6b99f91 +size 2069470 diff --git a/outputs/depthmap-17278951300021-left-right.png b/outputs/depthmap-17278951300021-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..5f327075d26acd3d8c58c3b396e05f56651cacbb --- /dev/null +++ b/outputs/depthmap-17278951300021-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:967c13f6c0bcfad1c1ed35884e22cf78c0cb934dda417bea2f67b33ec6b99f91 +size 2069470 diff --git a/outputs/depthmap-17278951300022-left-right.png b/outputs/depthmap-17278951300022-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..60da5896a99e228123e5332d9bc6a313b726b602 --- /dev/null +++ b/outputs/depthmap-17278951300022-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85f052237800cdaf73d4db78d5fa9227c6076fd91bbc915ce201b2139233dd6c +size 2117644 diff --git a/outputs/depthmap-17278951300023-left-right.png b/outputs/depthmap-17278951300023-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..ff323f289a70bb7ac343c9ef75c06324e15a5603 --- /dev/null +++ b/outputs/depthmap-17278951300023-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bcd22d390ffce07751a7264319f103cf6719953073a8161e29fa3ded31eaabb +size 2054050 diff --git a/outputs/depthmap-17278951300024-left-right.png b/outputs/depthmap-17278951300024-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..e8b29a940e951014b745f63dfa1df43ad63df96a --- /dev/null +++ b/outputs/depthmap-17278951300024-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99954b72931e32582577b39f54e41de47073002fe7c12239018d28c1b8c341e1 +size 1833762 diff --git a/outputs/depthmap-17278951300025-left-right.png b/outputs/depthmap-17278951300025-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..57f949b56ca1155d6435bcb6e281494f7f151eaf --- /dev/null +++ b/outputs/depthmap-17278951300025-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d16aa368887217b3dc992ec788b2c85c5ef9f8fe8052c9a1cba708a2b99f509b +size 1986523 diff --git a/outputs/depthmap-17278951300026-left-right.png b/outputs/depthmap-17278951300026-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..3cb15cd61a7d3eb69e20c50ebef5ee2d2c5b9be0 --- /dev/null +++ b/outputs/depthmap-17278951300026-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdd8e082c4bce8373e1812ea5db0dc828e6010f7cac75d5f6dd96112bc58eab4 +size 2029606 diff --git a/outputs/depthmap-17278951300027-left-right.png b/outputs/depthmap-17278951300027-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..4dba1731b5a4815d0b702d068762371db6194857 --- /dev/null +++ b/outputs/depthmap-17278951300027-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8321049df78fd38c2b220cc20851f58f4320768c99700c8d629a44e891faeeca +size 1696041 diff --git a/outputs/depthmap-17278951300028-left-right.png b/outputs/depthmap-17278951300028-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..4d4c11e5d1263419fa660f897ffbe087afa49711 --- /dev/null +++ b/outputs/depthmap-17278951300028-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:205feeb8f9db18b58f735aa40303cda546c026267adf3b572fe059155ccb4c87 +size 2360143 diff --git a/outputs/depthmap-17278951300029-left-right.png b/outputs/depthmap-17278951300029-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..d570b17cc5481a115fbdee23f4185ed72c7d0389 --- /dev/null +++ b/outputs/depthmap-17278951300029-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1eea6308caa75016857aae17f0d08689dda99baf19ff782c34f07b637487ef87 +size 2425585 diff --git a/outputs/depthmap-17278951300030-left-right.png b/outputs/depthmap-17278951300030-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..fb0558af7cbcb8b2cb747182993968c503bcbd23 --- /dev/null +++ b/outputs/depthmap-17278951300030-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7899886b60f63c45f8710b60b9cbb152282e067614cc044d4929508a2664b6cb +size 1980555 diff --git a/outputs/depthmap-17278951300031-left-right.png b/outputs/depthmap-17278951300031-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..028f0d67aa4bc3394efbf3d96b7e105dab322d3a --- /dev/null +++ b/outputs/depthmap-17278951300031-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca909a9a7930f768822b98f36e5abb467bcfdacd8edd1a327d147415372c38d9 +size 2056437 diff --git a/outputs/depthmap-17278951300032.png b/outputs/depthmap-17278951300032.png new file mode 100644 index 0000000000000000000000000000000000000000..145b5ebb662cf89eec764e5d68cfeb6c0e1f4308 Binary files /dev/null and b/outputs/depthmap-17278951300032.png differ diff --git a/outputs/depthmap-17278951300033-left-right.png b/outputs/depthmap-17278951300033-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..172ef3508efeafb64a9aee911a9eb09b2671b201 --- /dev/null +++ b/outputs/depthmap-17278951300033-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dcff5b492fdf9e5e69c096274b1176dc6d541f1461cd6cacb47c601eb17d8b8 +size 2212207 diff --git a/outputs/depthmap-17278951300034.png b/outputs/depthmap-17278951300034.png new file mode 100644 index 0000000000000000000000000000000000000000..03559cb05b94c0b5fb84c1d826c72bf13a844983 --- /dev/null +++ b/outputs/depthmap-17278951300034.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3b3c0d6c2eed68f8a0c20569fa36a32789b0c8a0ec350653e74dae79f37ab9d +size 1035216 diff --git a/outputs/depthmap-17278951300035-left-right.png b/outputs/depthmap-17278951300035-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..69ba1d3d6798adc1e1e85f509996ca0e93b55fb5 --- /dev/null +++ b/outputs/depthmap-17278951300035-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2aaea7eb376f740ae95f7319029718eadff8201ca7be068a537208abe20e25da +size 1468740 diff --git a/outputs/depthmap-17278951300036.png b/outputs/depthmap-17278951300036.png new file mode 100644 index 0000000000000000000000000000000000000000..769a3d0f5efaa60bf4d5e6a4f398019db27bcbc4 --- /dev/null +++ b/outputs/depthmap-17278951300036.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ebdc78f9c4f821c89491e28f9de4fd12b8568e3d45ccc5326f5afd9e3540828 +size 1029540 diff --git a/outputs/depthmap-17278951300037-left-right.png b/outputs/depthmap-17278951300037-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..d83373817838ebcda12538593c759d7904615aa5 --- /dev/null +++ b/outputs/depthmap-17278951300037-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1649c672d1d356cdc76a4cdacc6a14ea9a091d0e61691f206f0f39adfec652a +size 1841637 diff --git a/outputs/depthmap-17278951300038.png b/outputs/depthmap-17278951300038.png new file mode 100644 index 0000000000000000000000000000000000000000..f2fa9ed9152ea9c91ed16b49d8eaed0d202fd1a8 Binary files /dev/null and b/outputs/depthmap-17278951300038.png differ diff --git a/outputs/depthmap-17278951300039-left-right.png b/outputs/depthmap-17278951300039-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..ae745d99b7daa25c5ee619ed8c1fc1dde5ad4974 --- /dev/null +++ b/outputs/depthmap-17278951300039-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9512bb23c32ddc5a76c9cf2ef69424d7fd579941515332a78f560360f6d8e46 +size 2070319 diff --git a/outputs/depthmap-17278951300040.png b/outputs/depthmap-17278951300040.png new file mode 100644 index 0000000000000000000000000000000000000000..9c0b6196d371f5fdca5547603748230d41881c63 --- /dev/null +++ b/outputs/depthmap-17278951300040.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3bc0db84d30b097658fa97d87e94394e8c63253a9a0ea8f7b6b09d3f5d43bd5 +size 1040121 diff --git a/outputs/depthmap-17278951300041-left-right.png b/outputs/depthmap-17278951300041-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..9368f9e596b8a429349070037826c667099a577c --- /dev/null +++ b/outputs/depthmap-17278951300041-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bb1b309fb60464986334c0f5d85c2c870922fdd9a0856fa1a450d9aa8ca7e4c +size 1575869 diff --git a/outputs/depthmap-17278951300042.png b/outputs/depthmap-17278951300042.png new file mode 100644 index 0000000000000000000000000000000000000000..2ea093985999f6d9a7167a702698400782bcba60 Binary files /dev/null and b/outputs/depthmap-17278951300042.png differ diff --git a/outputs/depthmap-17278951300043-left-right.png b/outputs/depthmap-17278951300043-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..ee72c8b0ab77332e74a2863aff602696d94396bc --- /dev/null +++ b/outputs/depthmap-17278951300043-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27570359ce94bbf17b7f6c096d7c14ffd03d7796c313c19a3bcdb617b40adee3 +size 1437470 diff --git a/outputs/depthmap-17278951300044.png b/outputs/depthmap-17278951300044.png new file mode 100644 index 0000000000000000000000000000000000000000..402de3bec8f7dcfb6e0688bc77ca43363fea1cf5 Binary files /dev/null and b/outputs/depthmap-17278951300044.png differ diff --git a/outputs/depthmap-17278951300045-left-right.png b/outputs/depthmap-17278951300045-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..45075d188c5883b40888682261a470cbc2732571 --- /dev/null +++ b/outputs/depthmap-17278951300045-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d37c42f54b6019b60daa9a435b606befc2d3419ffcbbd7b12a6215a44416ba6 +size 1457431 diff --git a/outputs/depthmap-17278951300046.png b/outputs/depthmap-17278951300046.png new file mode 100644 index 0000000000000000000000000000000000000000..a29642ef2c83d628b13fba4745e28c7bcd9f8a97 Binary files /dev/null and b/outputs/depthmap-17278951300046.png differ diff --git a/outputs/depthmap-17278951300047-left-right.png b/outputs/depthmap-17278951300047-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..fad4c308656cfd93623604245d8b03db96cf85bb Binary files /dev/null and b/outputs/depthmap-17278951300047-left-right.png differ diff --git a/outputs/depthmap-17278951300048.png b/outputs/depthmap-17278951300048.png new file mode 100644 index 0000000000000000000000000000000000000000..6b40b41ef03b715dd60ba9c2e9e7b4020e3a065c Binary files /dev/null and b/outputs/depthmap-17278951300048.png differ diff --git a/outputs/depthmap-17278951300049-left-right.png b/outputs/depthmap-17278951300049-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..d16b10bb54b6f00d76c97a6d206c60589ce799b6 Binary files /dev/null and b/outputs/depthmap-17278951300049-left-right.png differ diff --git a/outputs/depthmap-17278951300050.png b/outputs/depthmap-17278951300050.png new file mode 100644 index 0000000000000000000000000000000000000000..f34598744f19f7a3e60bf81ea4828d45d044b0f7 Binary files /dev/null and b/outputs/depthmap-17278951300050.png differ diff --git a/outputs/depthmap-17278951300051-left-right.png b/outputs/depthmap-17278951300051-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..09df8b583045c8c07b61b7ce9ba97dedabedf507 Binary files /dev/null and b/outputs/depthmap-17278951300051-left-right.png differ diff --git a/outputs/depthmap-17278951300052.png b/outputs/depthmap-17278951300052.png new file mode 100644 index 0000000000000000000000000000000000000000..ef8affe5a84a8bff3a2b3530b3272281e0955477 Binary files /dev/null and b/outputs/depthmap-17278951300052.png differ diff --git a/outputs/depthmap-17278951300053-left-right.png b/outputs/depthmap-17278951300053-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..25378927c5b9d8fa2f7c0e220a2f9e68f048dc09 Binary files /dev/null and b/outputs/depthmap-17278951300053-left-right.png differ diff --git a/outputs/depthmap-17278951300053-simple.obj b/outputs/depthmap-17278951300053-simple.obj new file mode 100644 index 0000000000000000000000000000000000000000..b1929094a2038716ae62d0c9534090d03c623a31 --- /dev/null +++ b/outputs/depthmap-17278951300053-simple.obj @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97ee235a48e57dd4b74c4db4b2e6edca36f080569b6c921db894abe212d79eef +size 20637978 diff --git a/outputs/depthmap-17278951300055.png b/outputs/depthmap-17278951300055.png new file mode 100644 index 0000000000000000000000000000000000000000..ef8affe5a84a8bff3a2b3530b3272281e0955477 Binary files /dev/null and b/outputs/depthmap-17278951300055.png differ diff --git a/outputs/depthmap-17278951300056-left-right.png b/outputs/depthmap-17278951300056-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..25378927c5b9d8fa2f7c0e220a2f9e68f048dc09 Binary files /dev/null and b/outputs/depthmap-17278951300056-left-right.png differ diff --git a/outputs/depthmap-17278951300056-simple.obj b/outputs/depthmap-17278951300056-simple.obj new file mode 100644 index 0000000000000000000000000000000000000000..914072afab0c0ad3a7b91915cdf8cc0ee2e0c9c3 --- /dev/null +++ b/outputs/depthmap-17278951300056-simple.obj @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:222b52e4a90c391d3e53d89df269078e7c1e3128b04c79cbc8ac0ca7f4281a1f +size 22584536 diff --git a/outputs/depthmap-17278951300058.png b/outputs/depthmap-17278951300058.png new file mode 100644 index 0000000000000000000000000000000000000000..ef8affe5a84a8bff3a2b3530b3272281e0955477 Binary files /dev/null and b/outputs/depthmap-17278951300058.png differ diff --git a/outputs/depthmap-17278951300059-left-right.png b/outputs/depthmap-17278951300059-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..013ffc5e6d7379a05369822511c17fef06e8b343 Binary files /dev/null and b/outputs/depthmap-17278951300059-left-right.png differ diff --git a/outputs/depthmap-17278951300060-concat_depth.png b/outputs/depthmap-17278951300060-concat_depth.png new file mode 100644 index 0000000000000000000000000000000000000000..cec35781145fb3e59c01277bd941d604f28ecfef Binary files /dev/null and b/outputs/depthmap-17278951300060-concat_depth.png differ diff --git a/outputs/depthmap-17278951300061-left-right.png b/outputs/depthmap-17278951300061-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..013ffc5e6d7379a05369822511c17fef06e8b343 Binary files /dev/null and b/outputs/depthmap-17278951300061-left-right.png differ diff --git a/outputs/depthmap-17278951300061-simple.obj b/outputs/depthmap-17278951300061-simple.obj new file mode 100644 index 0000000000000000000000000000000000000000..b1929094a2038716ae62d0c9534090d03c623a31 --- /dev/null +++ b/outputs/depthmap-17278951300061-simple.obj @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97ee235a48e57dd4b74c4db4b2e6edca36f080569b6c921db894abe212d79eef +size 20637978 diff --git a/outputs/depthmap-17278951300063-concat_depth.png b/outputs/depthmap-17278951300063-concat_depth.png new file mode 100644 index 0000000000000000000000000000000000000000..28e0053235b53695368f6db7de191755fd0189c0 Binary files /dev/null and b/outputs/depthmap-17278951300063-concat_depth.png differ diff --git a/outputs/depthmap-17278951300064-left-right.png b/outputs/depthmap-17278951300064-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..3ddbcb50d3ec01dbd1d2e1abe0aca3cef5cbe4a6 Binary files /dev/null and b/outputs/depthmap-17278951300064-left-right.png differ diff --git a/outputs/depthmap-17278951300065-concat_depth.png b/outputs/depthmap-17278951300065-concat_depth.png new file mode 100644 index 0000000000000000000000000000000000000000..25a8453de2252239217eaf5abaaf105e48a42b2a Binary files /dev/null and b/outputs/depthmap-17278951300065-concat_depth.png differ diff --git a/outputs/depthmap-17278951300066-left-right.png b/outputs/depthmap-17278951300066-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..6a61f9b0505be53aee198484996fb73c45a77c4c Binary files /dev/null and b/outputs/depthmap-17278951300066-left-right.png differ diff --git a/outputs/depthmap-17278951300067-concat_depth.png b/outputs/depthmap-17278951300067-concat_depth.png new file mode 100644 index 0000000000000000000000000000000000000000..c25bfd36a8b3ead8bee84d78feef569680d2a24a Binary files /dev/null and b/outputs/depthmap-17278951300067-concat_depth.png differ diff --git a/outputs/depthmap-17278951300068-left-right.png b/outputs/depthmap-17278951300068-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..f3350557c0819725b18d51ebfda587e9155ef55f Binary files /dev/null and b/outputs/depthmap-17278951300068-left-right.png differ diff --git a/outputs/depthmap-17278951300069-concat_depth.png b/outputs/depthmap-17278951300069-concat_depth.png new file mode 100644 index 0000000000000000000000000000000000000000..04bd4c0dc0bf55a0f545b0e6c3005cde34353fc5 Binary files /dev/null and b/outputs/depthmap-17278951300069-concat_depth.png differ diff --git a/outputs/depthmap-17278951300070-left-right.png b/outputs/depthmap-17278951300070-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..1e589c1add675f08e2a570f775d879b6d88545f5 Binary files /dev/null and b/outputs/depthmap-17278951300070-left-right.png differ diff --git a/outputs/depthmap-17278951300071-concat_depth.png b/outputs/depthmap-17278951300071-concat_depth.png new file mode 100644 index 0000000000000000000000000000000000000000..04bd4c0dc0bf55a0f545b0e6c3005cde34353fc5 Binary files /dev/null and b/outputs/depthmap-17278951300071-concat_depth.png differ diff --git a/outputs/depthmap-17278951300072-left-right.png b/outputs/depthmap-17278951300072-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..127ce0f78de2cb8b4bfc4dc052aad6813fe0178b Binary files /dev/null and b/outputs/depthmap-17278951300072-left-right.png differ diff --git a/outputs/depthmap-17278951300073-concat_depth.png b/outputs/depthmap-17278951300073-concat_depth.png new file mode 100644 index 0000000000000000000000000000000000000000..2089d1a2d0cce23ccaf297e1c256d956e26c3ee8 Binary files /dev/null and b/outputs/depthmap-17278951300073-concat_depth.png differ diff --git a/outputs/depthmap-17278951300074-left-right.png b/outputs/depthmap-17278951300074-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..38b190e4220578a36a23575ceec26d8347343f8c Binary files /dev/null and b/outputs/depthmap-17278951300074-left-right.png differ diff --git a/outputs/depthmap-17278951300075-concat_depth.png b/outputs/depthmap-17278951300075-concat_depth.png new file mode 100644 index 0000000000000000000000000000000000000000..c0d091e3006218cf19fce2f2acd3662a93f7c380 Binary files /dev/null and b/outputs/depthmap-17278951300075-concat_depth.png differ diff --git a/outputs/depthmap-17278951300076-left-right.png b/outputs/depthmap-17278951300076-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..1732924461b714ca64fd7169469271f1111db193 Binary files /dev/null and b/outputs/depthmap-17278951300076-left-right.png differ diff --git a/outputs/depthmap-17278951300077-concat_depth.png b/outputs/depthmap-17278951300077-concat_depth.png new file mode 100644 index 0000000000000000000000000000000000000000..9c21c9aab0f69496fb848b3224bf73f56fc75a75 Binary files /dev/null and b/outputs/depthmap-17278951300077-concat_depth.png differ diff --git a/outputs/depthmap-17278951300078-left-right.png b/outputs/depthmap-17278951300078-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..c6250bb253c312bfc3e7d9646e33ac37d672ee47 Binary files /dev/null and b/outputs/depthmap-17278951300078-left-right.png differ diff --git a/outputs/depthmap-17278951300079-concat_depth.png b/outputs/depthmap-17278951300079-concat_depth.png new file mode 100644 index 0000000000000000000000000000000000000000..ab5d965a757c1d61d1f97f7816bba39025d3902e Binary files /dev/null and b/outputs/depthmap-17278951300079-concat_depth.png differ diff --git a/outputs/depthmap-17278951300080-left-right.png b/outputs/depthmap-17278951300080-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..69a84d3aac69f1933507bf119fb6c9d02ab9d084 Binary files /dev/null and b/outputs/depthmap-17278951300080-left-right.png differ diff --git a/outputs/depthmap-17278951300081-concat_depth.png b/outputs/depthmap-17278951300081-concat_depth.png new file mode 100644 index 0000000000000000000000000000000000000000..20e96ddc80eb542d133ec1262548c9a866438788 Binary files /dev/null and b/outputs/depthmap-17278951300081-concat_depth.png differ diff --git a/outputs/depthmap-17278951300082-left-right.png b/outputs/depthmap-17278951300082-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..8646e58a77fe9b52ef4b27e9e6b9f65b2b6bc868 Binary files /dev/null and b/outputs/depthmap-17278951300082-left-right.png differ diff --git a/outputs/depthmap-17278951300083-concat_depth.png b/outputs/depthmap-17278951300083-concat_depth.png new file mode 100644 index 0000000000000000000000000000000000000000..2152e4cbd1feb07046cb96d23e212c449ef52283 Binary files /dev/null and b/outputs/depthmap-17278951300083-concat_depth.png differ diff --git a/outputs/depthmap-17278951300084-left-right.png b/outputs/depthmap-17278951300084-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..ea6acef2f0aa7f4f93283a16a2654b60d7042d3b Binary files /dev/null and b/outputs/depthmap-17278951300084-left-right.png differ diff --git a/outputs/depthmap-17278951300085-concat_depth.png b/outputs/depthmap-17278951300085-concat_depth.png new file mode 100644 index 0000000000000000000000000000000000000000..905b388b00669fef6e1b09ffeb1469b806d6a07e Binary files /dev/null and b/outputs/depthmap-17278951300085-concat_depth.png differ diff --git a/outputs/depthmap-17278951300086-left-right.png b/outputs/depthmap-17278951300086-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..8c044c5558b7207a3d6e68c2be19a54a0901bcc3 Binary files /dev/null and b/outputs/depthmap-17278951300086-left-right.png differ diff --git a/outputs/depthmap-17278951300087-concat_depth.png b/outputs/depthmap-17278951300087-concat_depth.png new file mode 100644 index 0000000000000000000000000000000000000000..905b388b00669fef6e1b09ffeb1469b806d6a07e Binary files /dev/null and b/outputs/depthmap-17278951300087-concat_depth.png differ diff --git a/outputs/depthmap-17278951300088-left-right.png b/outputs/depthmap-17278951300088-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..93a46efb0e8c28bb51f6c7ddd5e985adba34cb51 Binary files /dev/null and b/outputs/depthmap-17278951300088-left-right.png differ diff --git a/outputs/depthmap-17280589390001.png b/outputs/depthmap-17280589390001.png new file mode 100644 index 0000000000000000000000000000000000000000..2a5801a091d185d05e414930b66e8683817751f9 Binary files /dev/null and b/outputs/depthmap-17280589390001.png differ diff --git a/outputs/depthmap-17280589390002.png b/outputs/depthmap-17280589390002.png new file mode 100644 index 0000000000000000000000000000000000000000..2a5801a091d185d05e414930b66e8683817751f9 Binary files /dev/null and b/outputs/depthmap-17280589390002.png differ diff --git a/outputs/depthmap-17280589390003-left-right.png b/outputs/depthmap-17280589390003-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..17e5eea4a06fe142021b7f96d389863425cb5dc7 --- /dev/null +++ b/outputs/depthmap-17280589390003-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:787136cae4bf49b7e8c1295118f288ead27c904e414c6a5ee25d65283eb133d1 +size 2844476 diff --git a/outputs/depthmap-17280589390004.png b/outputs/depthmap-17280589390004.png new file mode 100644 index 0000000000000000000000000000000000000000..53125d11dd37a45cabb06ecb47b0d3c2ad61b172 Binary files /dev/null and b/outputs/depthmap-17280589390004.png differ diff --git a/outputs/depthmap-17280589390005-left-right.png b/outputs/depthmap-17280589390005-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..736d5f8fa25b161ee8517a840f2a1137734c057a Binary files /dev/null and b/outputs/depthmap-17280589390005-left-right.png differ diff --git a/outputs/depthmap-17280589390006.png b/outputs/depthmap-17280589390006.png new file mode 100644 index 0000000000000000000000000000000000000000..872309764a0fa34c9ce0b30f734bea35c09cd90a Binary files /dev/null and b/outputs/depthmap-17280589390006.png differ diff --git a/outputs/depthmap-17280589390007-left-right.png b/outputs/depthmap-17280589390007-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..9b22c0cd67d9c2eb8a10b00399a6930994ee7f6e Binary files /dev/null and b/outputs/depthmap-17280589390007-left-right.png differ diff --git a/outputs/depthmap-17280589390008.png b/outputs/depthmap-17280589390008.png new file mode 100644 index 0000000000000000000000000000000000000000..ce07b1d8ee390b1760ff165917c7fe7563008caa --- /dev/null +++ b/outputs/depthmap-17280589390008.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77cfb96fadcc39829900207b38edabe6281629e9a8060012021d8f236f21c183 +size 1817183 diff --git a/outputs/depthmap-17280589390009-left-right.png b/outputs/depthmap-17280589390009-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..3c9f9c1e9571aa97e99e2f3aa42905c4b621fe15 --- /dev/null +++ b/outputs/depthmap-17280589390009-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36a8c283e599649e1fc707811c3517148562eacbe00225b2ffd5924e2d88228a +size 3803378 diff --git a/outputs/depthmap-17280589390010.png b/outputs/depthmap-17280589390010.png new file mode 100644 index 0000000000000000000000000000000000000000..6b55d06a2891871d9298d6fe8f2228b8328cc13a --- /dev/null +++ b/outputs/depthmap-17280589390010.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03f816ebd1656d8955c78bdd9e45e02165f3a2395215b873423b684600df430e +size 1742659 diff --git a/outputs/depthmap-17280589390011-left-right.png b/outputs/depthmap-17280589390011-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..3b5ec8436c088d862629077d668c38555bad862d --- /dev/null +++ b/outputs/depthmap-17280589390011-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2833f7fe0dd278f8d467463ee75098bc115f0ef14b26d09451f638a8930be1d +size 4514955 diff --git a/outputs/depthmap-17280589390012.png b/outputs/depthmap-17280589390012.png new file mode 100644 index 0000000000000000000000000000000000000000..b402006b557639f32ab8bca6707da8670b1af0e0 --- /dev/null +++ b/outputs/depthmap-17280589390012.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ea6f7ad174cab4aa46a9a417531e99272e1c3d331fb268e3a019632c9b862f3 +size 1054094 diff --git a/outputs/depthmap-17280589390013-left-right.png b/outputs/depthmap-17280589390013-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..021eec15fcc7987ae228d7d68b024dd59cb3d179 --- /dev/null +++ b/outputs/depthmap-17280589390013-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c715b339d15a3e62f116f72ae27e457ae73c0812c0acdb5f7aa0a1c54dc3f42 +size 2424676 diff --git a/outputs/depthmap-17280589390014.png b/outputs/depthmap-17280589390014.png new file mode 100644 index 0000000000000000000000000000000000000000..da47be403781ab5d6c4824dfa6c9c50200852f5e Binary files /dev/null and b/outputs/depthmap-17280589390014.png differ diff --git a/outputs/depthmap-17280589390015-left-right.png b/outputs/depthmap-17280589390015-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..7f7c45c3e90465b59bb39bcfc075f42591d1db2b --- /dev/null +++ b/outputs/depthmap-17280589390015-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a99678539a0becb9d2ff05a24a9c28b2604f8795669d69fdd9a0ab50bff67e99 +size 1139715 diff --git a/outputs/depthmap-17280589390016.png b/outputs/depthmap-17280589390016.png new file mode 100644 index 0000000000000000000000000000000000000000..fce927f656ba8424b19ea8be61aeaa7c00c5c759 Binary files /dev/null and b/outputs/depthmap-17280589390016.png differ diff --git a/outputs/depthmap-17280589390017-left-right.png b/outputs/depthmap-17280589390017-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..6de3f55b79cc6e0dedadbc2035a8b363f6dcd3cb --- /dev/null +++ b/outputs/depthmap-17280589390017-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1de18622af95af82f5615b694438d5058a94ffe0cbe0023cc4617fc5aa83acb6 +size 1117312 diff --git a/outputs/depthmap-17280589390018.png b/outputs/depthmap-17280589390018.png new file mode 100644 index 0000000000000000000000000000000000000000..51447e47ec053c80181ce948ca878a4ca56a8d0e Binary files /dev/null and b/outputs/depthmap-17280589390018.png differ diff --git a/outputs/depthmap-17280589390019-left-right.png b/outputs/depthmap-17280589390019-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..3bd774788c2d424a6335993e4fb1058fe2b46db1 Binary files /dev/null and b/outputs/depthmap-17280589390019-left-right.png differ diff --git a/outputs/depthmap-17280589390020.png b/outputs/depthmap-17280589390020.png new file mode 100644 index 0000000000000000000000000000000000000000..a4f6364b38e39bdcfb6b462d8c2beb47c342ed0a Binary files /dev/null and b/outputs/depthmap-17280589390020.png differ diff --git a/outputs/depthmap-17280589390021-left-right.png b/outputs/depthmap-17280589390021-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..f41287018ef6a84a53b0385cf70e983aada8d11e Binary files /dev/null and b/outputs/depthmap-17280589390021-left-right.png differ diff --git a/outputs/depthmap-17280589390022.png b/outputs/depthmap-17280589390022.png new file mode 100644 index 0000000000000000000000000000000000000000..bdec1903ab761880748567f189b067bc1e90b781 Binary files /dev/null and b/outputs/depthmap-17280589390022.png differ diff --git a/outputs/depthmap-17280589390023-left-right.png b/outputs/depthmap-17280589390023-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..e5c1ef186716d6df91e83d29ceb1b0744f6fc04f --- /dev/null +++ b/outputs/depthmap-17280589390023-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d02f420056e5d307a887eb5f2d0ba92465a07c9ed96122f0f3d0026e749a2d3 +size 2043356 diff --git a/outputs/depthmap-17280589390024.png b/outputs/depthmap-17280589390024.png new file mode 100644 index 0000000000000000000000000000000000000000..0f1d8349d131267e1e79d4347517fa3239708daf Binary files /dev/null and b/outputs/depthmap-17280589390024.png differ diff --git a/outputs/depthmap-17280589390025-left-right.png b/outputs/depthmap-17280589390025-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..3a02913a7f1545a41ff1ee0cb2e1cb9745d301c4 --- /dev/null +++ b/outputs/depthmap-17280589390025-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79ef199f215841aaed4d1e22edd5cb0898168c5b64548851fb21bf06d30b0562 +size 1250368 diff --git a/outputs/depthmap-17280589390026.png b/outputs/depthmap-17280589390026.png new file mode 100644 index 0000000000000000000000000000000000000000..68a3f54664a6135ac4d0aca49f181c32faa02cdd Binary files /dev/null and b/outputs/depthmap-17280589390026.png differ diff --git a/outputs/depthmap-17280589390027-left-right.png b/outputs/depthmap-17280589390027-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..aa5d0264a51583e4424f5a9e59c283e64157c875 --- /dev/null +++ b/outputs/depthmap-17280589390027-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1798bf00f0b72f6c9b0fca07e501e7040d50010d802ccc483b665874dc9152cd +size 2452248 diff --git a/outputs/depthmap-17280589390028.png b/outputs/depthmap-17280589390028.png new file mode 100644 index 0000000000000000000000000000000000000000..e61a3f4e87c486638f0f516ee49ee7aa1677ee66 Binary files /dev/null and b/outputs/depthmap-17280589390028.png differ diff --git a/outputs/depthmap-17280589390029-left-right.png b/outputs/depthmap-17280589390029-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..6134cd55fd51261b525a09c88171244c85587434 --- /dev/null +++ b/outputs/depthmap-17280589390029-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c34f3aee9193c261dee593baea552fe9aacdc38b38ad6f0a4b93b249dae4dc60 +size 2830518 diff --git a/outputs/depthmap-17280589390030.png b/outputs/depthmap-17280589390030.png new file mode 100644 index 0000000000000000000000000000000000000000..194a272bc0587c27413472629c75e9554b925478 Binary files /dev/null and b/outputs/depthmap-17280589390030.png differ diff --git a/outputs/depthmap-17280589390031-left-right.png b/outputs/depthmap-17280589390031-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..f50c1c3a236fe5e30e198986ac8e59f6979c5e61 --- /dev/null +++ b/outputs/depthmap-17280589390031-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9965271737e4e36c041df86bfbc67f15e5ee92a2055f2e78a9f3a92a94a0af0e +size 2725960 diff --git a/outputs/depthmap-17280589390032.png b/outputs/depthmap-17280589390032.png new file mode 100644 index 0000000000000000000000000000000000000000..194a272bc0587c27413472629c75e9554b925478 Binary files /dev/null and b/outputs/depthmap-17280589390032.png differ diff --git a/outputs/depthmap-17280589390033-left-right.png b/outputs/depthmap-17280589390033-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..f50c1c3a236fe5e30e198986ac8e59f6979c5e61 --- /dev/null +++ b/outputs/depthmap-17280589390033-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9965271737e4e36c041df86bfbc67f15e5ee92a2055f2e78a9f3a92a94a0af0e +size 2725960 diff --git a/outputs/depthmap-17280589390034.png b/outputs/depthmap-17280589390034.png new file mode 100644 index 0000000000000000000000000000000000000000..18b8fa7f25c16a755b32d1bf522b2a3ec4f8f1c4 Binary files /dev/null and b/outputs/depthmap-17280589390034.png differ diff --git a/outputs/depthmap-17280589390035-left-right.png b/outputs/depthmap-17280589390035-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..780045af3feb83cba36762b31d726dcbfef18a6d --- /dev/null +++ b/outputs/depthmap-17280589390035-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82093f45c02d893ab4187d377853d8916618e3aae7272dca6f0ea94a3b0b5ce8 +size 2757885 diff --git a/outputs/depthmap-17280589390036.png b/outputs/depthmap-17280589390036.png new file mode 100644 index 0000000000000000000000000000000000000000..18b8fa7f25c16a755b32d1bf522b2a3ec4f8f1c4 Binary files /dev/null and b/outputs/depthmap-17280589390036.png differ diff --git a/outputs/depthmap-17280589390037-left-right.png b/outputs/depthmap-17280589390037-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..780045af3feb83cba36762b31d726dcbfef18a6d --- /dev/null +++ b/outputs/depthmap-17280589390037-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82093f45c02d893ab4187d377853d8916618e3aae7272dca6f0ea94a3b0b5ce8 +size 2757885 diff --git a/outputs/depthmap-17280589390038.png b/outputs/depthmap-17280589390038.png new file mode 100644 index 0000000000000000000000000000000000000000..d50bb2e02fb5141e794a97fc1635ef8dc7113f85 Binary files /dev/null and b/outputs/depthmap-17280589390038.png differ diff --git a/outputs/depthmap-17280589390039-left-right.png b/outputs/depthmap-17280589390039-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..52cad1b1d66fef4a8b4cc8c67fb753accf5165c8 --- /dev/null +++ b/outputs/depthmap-17280589390039-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a587a56bf207385702df4cdaf07a4d3e4f60d42eb5bdd05f40abab5f08dd6a +size 2166828 diff --git a/outputs/depthmap-17280589390040.png b/outputs/depthmap-17280589390040.png new file mode 100644 index 0000000000000000000000000000000000000000..e96ef2a8bdd28adf65d7d159624eaf64379808c6 Binary files /dev/null and b/outputs/depthmap-17280589390040.png differ diff --git a/outputs/depthmap-17280589390041-left-right.png b/outputs/depthmap-17280589390041-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..168873c042d406198d930e419b1f4f3579f73e6e --- /dev/null +++ b/outputs/depthmap-17280589390041-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:470732b76f683567fc6ec6eb75923c8a9ad59b2562a7c96d6fb9e062fccb4bb0 +size 1629201 diff --git a/outputs/depthmap-17280589390042.png b/outputs/depthmap-17280589390042.png new file mode 100644 index 0000000000000000000000000000000000000000..8a4937cae061b66af13472bacdd00b649db97162 Binary files /dev/null and b/outputs/depthmap-17280589390042.png differ diff --git a/outputs/depthmap-17280589390043-left-right.png b/outputs/depthmap-17280589390043-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..3fd13e6f32d2151e52fd1aef574282c27d6eb1be --- /dev/null +++ b/outputs/depthmap-17280589390043-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1b31a6aec41b6e4ac1dfded499612a6bb467b46f10102bb05564cb3046189a9 +size 2659911 diff --git a/outputs/depthmap-17280589390044.png b/outputs/depthmap-17280589390044.png new file mode 100644 index 0000000000000000000000000000000000000000..a8629cd535782f3d2f5965ac88f64805763baa66 Binary files /dev/null and b/outputs/depthmap-17280589390044.png differ diff --git a/outputs/depthmap-17280589390045-left-right.png b/outputs/depthmap-17280589390045-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..22c82b7951c16fceafca732f4940b7a1f5f0c21a --- /dev/null +++ b/outputs/depthmap-17280589390045-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a3df0d5b078deb1d7df23511eb43dac85ca67865ae92e0f85892dc268f231e7 +size 2043119 diff --git a/outputs/depthmap-17280589390046.png b/outputs/depthmap-17280589390046.png new file mode 100644 index 0000000000000000000000000000000000000000..97b5a0d0df565c642ae1c9f1b360dd3372766e5f Binary files /dev/null and b/outputs/depthmap-17280589390046.png differ diff --git a/outputs/depthmap-17280589390047-left-right.png b/outputs/depthmap-17280589390047-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..e1cfd1d8fa7d119b58320558faa1a05e84994ac2 --- /dev/null +++ b/outputs/depthmap-17280589390047-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4a7ad6563ebf594cb852839c43f849bf98c75da4e5f515d522764d8a02a5a89 +size 1035386 diff --git a/outputs/depthmap-17280589390048.png b/outputs/depthmap-17280589390048.png new file mode 100644 index 0000000000000000000000000000000000000000..d32eae288d5a4bdb9bdbc1a8f56daadbc807cfb8 Binary files /dev/null and b/outputs/depthmap-17280589390048.png differ diff --git a/outputs/depthmap-17280589390049-left-right.png b/outputs/depthmap-17280589390049-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..04e69034ef8501ad66783236b8d9d691caeabb51 --- /dev/null +++ b/outputs/depthmap-17280589390049-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd00ab93f5aa4fbc5f95ae4cb1c23e70e5a4a04aeeaf1f9623780c3bdc94825d +size 1598352 diff --git a/outputs/depthmap-17280589390050.png b/outputs/depthmap-17280589390050.png new file mode 100644 index 0000000000000000000000000000000000000000..37d85f15ad7f8c916a7d2ab2d8fc6443f3d693c4 Binary files /dev/null and b/outputs/depthmap-17280589390050.png differ diff --git a/outputs/depthmap-17280589390051-left-right.png b/outputs/depthmap-17280589390051-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..66aed1ef38d82e465f56c16205e5a3b672c2fa19 --- /dev/null +++ b/outputs/depthmap-17280589390051-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65cefe51863da358ccdca4f6dc9daf8c74554311a81026303f2c95d411448b99 +size 1747396 diff --git a/outputs/depthmap-17280589390052.png b/outputs/depthmap-17280589390052.png new file mode 100644 index 0000000000000000000000000000000000000000..724b27a2606443e05456ccc147144e70068ca34a --- /dev/null +++ b/outputs/depthmap-17280589390052.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16b226595c87b0eb2b49597e055f6cb672adc93bda2e10e768a28852368d016b +size 1227466 diff --git a/outputs/depthmap-17280589390053-left-right.png b/outputs/depthmap-17280589390053-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..9922008283a37f4d520e16729d2a0beb098176b3 --- /dev/null +++ b/outputs/depthmap-17280589390053-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04f6f5a27156497c091f255f4a10978fd74b68c08288feec105c51e4277b913e +size 2861213 diff --git a/outputs/depthmap-17280589390054.png b/outputs/depthmap-17280589390054.png new file mode 100644 index 0000000000000000000000000000000000000000..e2f39443c22f49de4641951f24bbcd35667d1dfd --- /dev/null +++ b/outputs/depthmap-17280589390054.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae3e406b8e51626ea429ee861a0816f36455c2da6430f124df2c1c862de615da +size 1164238 diff --git a/outputs/depthmap-17280589390055-left-right.png b/outputs/depthmap-17280589390055-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..7f7d2cc8c2860af5156460a3425be4ca45fbfb5d --- /dev/null +++ b/outputs/depthmap-17280589390055-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbf7c2f8660a4701bbece074d869f4a6d0fa3b6ed3ee300a102de6894f5f70ef +size 2537417 diff --git a/outputs/depthmap-17280589390056.png b/outputs/depthmap-17280589390056.png new file mode 100644 index 0000000000000000000000000000000000000000..57115e650ad052e58e64eb5d0a51cc606a0d49a9 --- /dev/null +++ b/outputs/depthmap-17280589390056.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d570d8a6f74fdf3d6c61d51b01856939b95c71f166f57843f7cbcd68a7bff10 +size 1165440 diff --git a/outputs/depthmap-17280589390057-left-right.png b/outputs/depthmap-17280589390057-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..0d5b5cb2a56c64da7975615df95d817a8f1c943b --- /dev/null +++ b/outputs/depthmap-17280589390057-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc23edb616393eb487b8c39833cf1a00879998cd9eb7ed25fe27826085338861 +size 2635672 diff --git a/outputs/depthmap-17280589390058.png b/outputs/depthmap-17280589390058.png new file mode 100644 index 0000000000000000000000000000000000000000..7d1205cd5384afd91dd146eea71383b8dcc86b5d --- /dev/null +++ b/outputs/depthmap-17280589390058.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:959cc2b8e9d6d11716f455712194765ce1147bcf688ecf3dbc7cf7b8ffe46ae1 +size 1043697 diff --git a/outputs/depthmap-17280589390059-left-right.png b/outputs/depthmap-17280589390059-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..27dc3817ba6ae6ae71d71df0428b975b67258d48 --- /dev/null +++ b/outputs/depthmap-17280589390059-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:270476d53cc860ca4dcfe4df43316fd5802d53db1f3f5e97c2d944d6df57ca11 +size 2163407 diff --git a/outputs/depthmap-17280589390060.png b/outputs/depthmap-17280589390060.png new file mode 100644 index 0000000000000000000000000000000000000000..6aa2186413dd9be20ad413856cf0cddc51051b58 Binary files /dev/null and b/outputs/depthmap-17280589390060.png differ diff --git a/outputs/depthmap-17280589390061-left-right.png b/outputs/depthmap-17280589390061-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..0dd704c36a60473e2ac853812765134373ee16e0 --- /dev/null +++ b/outputs/depthmap-17280589390061-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba5ddaca08a23dc979d61ac6d64a9430eeac69406b948210e67e377ccf7c0007 +size 2443955 diff --git a/outputs/depthmap-17280589390062.png b/outputs/depthmap-17280589390062.png new file mode 100644 index 0000000000000000000000000000000000000000..dfabc0312742fdec8d56cb520f6a1757a3d6996d Binary files /dev/null and b/outputs/depthmap-17280589390062.png differ diff --git a/outputs/depthmap-17280589390063-left-right.png b/outputs/depthmap-17280589390063-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..029de69e340b50c87ead11365d4433cb58b6bbee --- /dev/null +++ b/outputs/depthmap-17280589390063-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7bcf08bb2d85bddf25de497efc97ad038848119cb154638d7f1bafb78006ca5 +size 2099887 diff --git a/outputs/depthmap-17280589390064.png b/outputs/depthmap-17280589390064.png new file mode 100644 index 0000000000000000000000000000000000000000..1f44df794837d9b19e0da47598786f69f66fdc71 Binary files /dev/null and b/outputs/depthmap-17280589390064.png differ diff --git a/outputs/depthmap-17280589390065-left-right.png b/outputs/depthmap-17280589390065-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..a904f3e1634d741fea69870cb793c2af952f7528 --- /dev/null +++ b/outputs/depthmap-17280589390065-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:936607697dbac9929c62f95de19ac98f04cff46d54563e6a35b6cc01f3273106 +size 1187950 diff --git a/outputs/depthmap-17280589390066.png b/outputs/depthmap-17280589390066.png new file mode 100644 index 0000000000000000000000000000000000000000..df6aa0e91b163aca727e673ba0f0cc9274b0822b Binary files /dev/null and b/outputs/depthmap-17280589390066.png differ diff --git a/outputs/depthmap-17280589390067-left-right.png b/outputs/depthmap-17280589390067-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..6bfba224721e51f5ea722ca8c73c3a5654338730 --- /dev/null +++ b/outputs/depthmap-17280589390067-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdea04edf4ba41d9517e53f1216c420b6c85e1d2f76b9d7c818e29d803f2d599 +size 1652394 diff --git a/outputs/depthmap-17280589390068.png b/outputs/depthmap-17280589390068.png new file mode 100644 index 0000000000000000000000000000000000000000..e935272ca6cf84783708a582c27789e603014ac6 Binary files /dev/null and b/outputs/depthmap-17280589390068.png differ diff --git a/outputs/depthmap-17280589390069-left-right.png b/outputs/depthmap-17280589390069-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..3ac61b4d645b9415741edc333cc9f1b5adac9885 --- /dev/null +++ b/outputs/depthmap-17280589390069-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cbf72b67d5f390062fbdfb826b0e69cd102def5f1e4676b3e956682ae1fafde +size 2262269 diff --git a/outputs/depthmap-17280589390070.png b/outputs/depthmap-17280589390070.png new file mode 100644 index 0000000000000000000000000000000000000000..3d5e79fabd965ef2b290f2eff5b810df61fd4e13 Binary files /dev/null and b/outputs/depthmap-17280589390070.png differ diff --git a/outputs/depthmap-17280589390071-left-right.png b/outputs/depthmap-17280589390071-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..ff2cc34bc4ca8212bc86bff97acfe2a53cbdf4fe --- /dev/null +++ b/outputs/depthmap-17280589390071-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d95b26ba59d9751ac9d5a3e00f38302dfec606f3885b6bfc853ab6f69b55e8b8 +size 2303714 diff --git a/outputs/depthmap-17280589390072.png b/outputs/depthmap-17280589390072.png new file mode 100644 index 0000000000000000000000000000000000000000..22f396eced584639a187189792ca0e7db1143244 Binary files /dev/null and b/outputs/depthmap-17280589390072.png differ diff --git a/outputs/depthmap-17280589390073-left-right.png b/outputs/depthmap-17280589390073-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..79ffae62b8f6a7cb340296abf5bfcf543f87c767 --- /dev/null +++ b/outputs/depthmap-17280589390073-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b217bc2cafbd7e3f6d5163d16690e2ef798b3e3f0c22f9334a4e87f6079eee4 +size 1529290 diff --git a/outputs/depthmap-17280589390074.png b/outputs/depthmap-17280589390074.png new file mode 100644 index 0000000000000000000000000000000000000000..08af3622f065736172e4bc0acb960500607abd5d Binary files /dev/null and b/outputs/depthmap-17280589390074.png differ diff --git a/outputs/depthmap-17280589390075-left-right.png b/outputs/depthmap-17280589390075-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..17514e29d3b3c57ad41d350f719ee3df2c89b271 --- /dev/null +++ b/outputs/depthmap-17280589390075-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0edc5c9c65c7fe73c39f66673a24da81ae3946cdceaf71e0b82b7365b6c106c7 +size 2095985 diff --git a/outputs/depthmap-17280589390076.png b/outputs/depthmap-17280589390076.png new file mode 100644 index 0000000000000000000000000000000000000000..17764d94b6f820db339b51eb0e3e7a76efe4b6ab Binary files /dev/null and b/outputs/depthmap-17280589390076.png differ diff --git a/outputs/depthmap-17280589390077-left-right.png b/outputs/depthmap-17280589390077-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..212addf2c00f2e4fc16d2830224336177a68c5b0 --- /dev/null +++ b/outputs/depthmap-17280589390077-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79d7ade1116fb88cabc1bcb537b0880ee4374e6187f872294b3bb480560c9fac +size 2387952 diff --git a/outputs/depthmap-17280589390078.png b/outputs/depthmap-17280589390078.png new file mode 100644 index 0000000000000000000000000000000000000000..20d2fffca41ce5e5be6adc3facbff5ee58b85e41 Binary files /dev/null and b/outputs/depthmap-17280589390078.png differ diff --git a/outputs/depthmap-17280589390079-left-right.png b/outputs/depthmap-17280589390079-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..1b18bcf737f3482a3912965b9cc447ff70c09ead --- /dev/null +++ b/outputs/depthmap-17280589390079-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59a158d590be63c9e2ef1b862876773dbacd0994e346b2d873efbcbf14ef5b48 +size 2415018 diff --git a/outputs/depthmap-17280589390080.png b/outputs/depthmap-17280589390080.png new file mode 100644 index 0000000000000000000000000000000000000000..7b058410f4ce38818de5d437ff1b45008db2aa8c Binary files /dev/null and b/outputs/depthmap-17280589390080.png differ diff --git a/outputs/depthmap-17280589390081-left-right.png b/outputs/depthmap-17280589390081-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..452805e19f645712a8d4fa389677df5b858ef8f5 --- /dev/null +++ b/outputs/depthmap-17280589390081-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caf7f0594f0f73913798820d486ac23199a076de98229da40699452d86c3b290 +size 2488869 diff --git a/outputs/depthmap-17280589390082.png b/outputs/depthmap-17280589390082.png new file mode 100644 index 0000000000000000000000000000000000000000..fde5b63ae505889f04e126b21701656d038f29f5 Binary files /dev/null and b/outputs/depthmap-17280589390082.png differ diff --git a/outputs/depthmap-17280589390083-left-right.png b/outputs/depthmap-17280589390083-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..ca06a4cc42382336926dac516ad223cfe8444102 Binary files /dev/null and b/outputs/depthmap-17280589390083-left-right.png differ diff --git a/outputs/depthmap-17280589390084.png b/outputs/depthmap-17280589390084.png new file mode 100644 index 0000000000000000000000000000000000000000..4b9e1e68c275f31240136b4b55cd072fdd668889 Binary files /dev/null and b/outputs/depthmap-17280589390084.png differ diff --git a/outputs/depthmap-17280589390085-left-right.png b/outputs/depthmap-17280589390085-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..f5de8f95beb22fd5c0adc07b9f7deceb3eb07a14 --- /dev/null +++ b/outputs/depthmap-17280589390085-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aac5a68ad2ae0a811f2fbaa235db302758ff3b6790f21c05416054868e7230b +size 1938141 diff --git a/outputs/depthmap-17280589390086.png b/outputs/depthmap-17280589390086.png new file mode 100644 index 0000000000000000000000000000000000000000..56e0db97eeace99edf9741329cce1231501ab79a Binary files /dev/null and b/outputs/depthmap-17280589390086.png differ diff --git a/outputs/depthmap-17280589390087-left-right.png b/outputs/depthmap-17280589390087-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..18dd0c6af0b5430aee8c7f26b085fed67a4c1808 --- /dev/null +++ b/outputs/depthmap-17280589390087-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ad598129994596b043f456766e0542894e9148ba5db60fdd01d796b324cab70 +size 1567431 diff --git a/outputs/depthmap-17280589390088.png b/outputs/depthmap-17280589390088.png new file mode 100644 index 0000000000000000000000000000000000000000..ab11acc925569ee07f6689d3042db22e42175986 Binary files /dev/null and b/outputs/depthmap-17280589390088.png differ diff --git a/outputs/depthmap-17280589390089-left-right.png b/outputs/depthmap-17280589390089-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..49153a81482d5d8fa31783ad4d314af0174c8cfc --- /dev/null +++ b/outputs/depthmap-17280589390089-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e75aa2be77a2a785d79c35ebb03d89a21cde9778a56e6c339d28ef1e0d57118 +size 1888887 diff --git a/outputs/depthmap-17281752370001.png b/outputs/depthmap-17281752370001.png new file mode 100644 index 0000000000000000000000000000000000000000..f26c52bdbd0822a47e63a26a20b61ebe3a887caa Binary files /dev/null and b/outputs/depthmap-17281752370001.png differ diff --git a/outputs/depthmap-17281752370002.png b/outputs/depthmap-17281752370002.png new file mode 100644 index 0000000000000000000000000000000000000000..d740b138ccc92d7072319c3fe19aa0e8829800a3 Binary files /dev/null and b/outputs/depthmap-17281752370002.png differ diff --git a/outputs/depthmap-17285060200001.png b/outputs/depthmap-17285060200001.png new file mode 100644 index 0000000000000000000000000000000000000000..477ae88495ab57ef8e4fc02e6118ada39dce3b11 --- /dev/null +++ b/outputs/depthmap-17285060200001.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c04dccdbb27ec59fb2c56e99e96730571536d3cb665f8b010cc1aedf06e2b40c +size 1019547 diff --git a/outputs/depthmap-17285060200002-left-right.png b/outputs/depthmap-17285060200002-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..316ae583b29eef115303fbf4ff13f87ab837e49b --- /dev/null +++ b/outputs/depthmap-17285060200002-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:640d87a7ffcbd3ad5f02994184dce6f656badf2a6796d43c6bef689bbc38936c +size 8085739 diff --git a/outputs/depthmap-17285060200003-top-bottom.png b/outputs/depthmap-17285060200003-top-bottom.png new file mode 100644 index 0000000000000000000000000000000000000000..c5a28cac1dc7b953e8781837af785112a6bda72e --- /dev/null +++ b/outputs/depthmap-17285060200003-top-bottom.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:272b19538db4439b8163821687d1bead22107411b82840eacb877372c6dce598 +size 8726473 diff --git a/outputs/depthmap-17285371260001.png b/outputs/depthmap-17285371260001.png new file mode 100644 index 0000000000000000000000000000000000000000..0c2fb8284869355a1325cbfa617ae5b9a36b7ba9 Binary files /dev/null and b/outputs/depthmap-17285371260001.png differ diff --git a/outputs/depthmap-17285371260002-left-right.png b/outputs/depthmap-17285371260002-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..94f1391099db9b044fb476c1501962518af88974 --- /dev/null +++ b/outputs/depthmap-17285371260002-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7250c0b702255fef42c369e60ea3458a3dcbee2da2e05c8abb60bc94c132eb9 +size 2155140 diff --git a/outputs/depthmap-17285859980001.png b/outputs/depthmap-17285859980001.png new file mode 100644 index 0000000000000000000000000000000000000000..2c6f42c2503e6304c2001b5d754ce683dbd7a4c6 --- /dev/null +++ b/outputs/depthmap-17285859980001.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c4fbd3b794e9c23b887c1f4043baeec503ea502eae2f39d22a7af0032d51d07 +size 1004185 diff --git a/outputs/depthmap-17285859980002.png b/outputs/depthmap-17285859980002.png new file mode 100644 index 0000000000000000000000000000000000000000..2c6f42c2503e6304c2001b5d754ce683dbd7a4c6 --- /dev/null +++ b/outputs/depthmap-17285859980002.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c4fbd3b794e9c23b887c1f4043baeec503ea502eae2f39d22a7af0032d51d07 +size 1004185 diff --git a/outputs/depthmap-17285859980003-left-right.png b/outputs/depthmap-17285859980003-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..672efbc3590c74993a36a148fe7f1399c316de13 --- /dev/null +++ b/outputs/depthmap-17285859980003-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e78e5f8edaeb1bcc9fabbc5351a546a00ce152cef10f6fe74852250cac90079 +size 3994071 diff --git a/outputs/depthmap-17285861380001.png b/outputs/depthmap-17285861380001.png new file mode 100644 index 0000000000000000000000000000000000000000..447d83a8be2cc5032f514c6b4cb8d9ad0b47576b Binary files /dev/null and b/outputs/depthmap-17285861380001.png differ diff --git a/outputs/depthmap-17285861380002-left-right.png b/outputs/depthmap-17285861380002-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..356e4403f4c275ae925b2482994f187bbea756a9 --- /dev/null +++ b/outputs/depthmap-17285861380002-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e80b8bf2837d542709ac2cf748f4135b9bbf3e8eb9f62cc2504a1c74915e361 +size 2617376 diff --git a/outputs/depthmap-17285861380003-left-right_video.avi b/outputs/depthmap-17285861380003-left-right_video.avi new file mode 100644 index 0000000000000000000000000000000000000000..3d69278dab111dea1c30222a2754f169f41ee480 --- /dev/null +++ b/outputs/depthmap-17285861380003-left-right_video.avi @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2abf110900160a1858f03deff3fb35bd85d63974ab628b1433b907e01ce7f7ce +size 3062507370 diff --git a/outputs/depthmap-17285861380004-depth_video.avi b/outputs/depthmap-17285861380004-depth_video.avi new file mode 100644 index 0000000000000000000000000000000000000000..9c7d67f9a223826b6e63fc7bc3cc1aea8e303269 --- /dev/null +++ b/outputs/depthmap-17285861380004-depth_video.avi @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aa485d9181f2e03d16537d436f9ae4ba5bd66f0e768762e4392ba87e8ecb7fc +size 675259854 diff --git a/outputs/depthmap-17286922440001.png b/outputs/depthmap-17286922440001.png new file mode 100644 index 0000000000000000000000000000000000000000..2042bdfa67db79bb5f676db00a98543312c4fbe7 Binary files /dev/null and b/outputs/depthmap-17286922440001.png differ diff --git a/outputs/depthmap-17286927930001.png b/outputs/depthmap-17286927930001.png new file mode 100644 index 0000000000000000000000000000000000000000..d6276f01ad5f06107deeba9e19ba53fe570fc066 Binary files /dev/null and b/outputs/depthmap-17286927930001.png differ diff --git a/outputs/depthmap-17286927930002-left-right.png b/outputs/depthmap-17286927930002-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..145c1790939a83d9afdf60910c7aafc5b08cc057 --- /dev/null +++ b/outputs/depthmap-17286927930002-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b74a96a16934edc0ceebb36de48d392ad0ea553a1227cc5bcde91742024cc58 +size 1038686 diff --git a/outputs/depthmap-17286927930003.png b/outputs/depthmap-17286927930003.png new file mode 100644 index 0000000000000000000000000000000000000000..8f36ed3d09f322c2c3c146c4158779630dec1ffb --- /dev/null +++ b/outputs/depthmap-17286927930003.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcbc59fa373603de3b965b3d13e22536dfbc8c748045f36c75f0cfaef1ac616e +size 2042344 diff --git a/outputs/depthmap-17286927930004-left-right.png b/outputs/depthmap-17286927930004-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..f8e4ca9be2c63d7fe3cc3ee73a2b17ed6a673187 --- /dev/null +++ b/outputs/depthmap-17286927930004-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63226e685d661d3586e79d4084db841e5bd0107fb7416b6a6c4a2114842c680a +size 3216163 diff --git a/outputs/depthmap-17286927930005.png b/outputs/depthmap-17286927930005.png new file mode 100644 index 0000000000000000000000000000000000000000..1619dc40ea416b08b8739198cf6de4e359bc26d5 --- /dev/null +++ b/outputs/depthmap-17286927930005.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b7a68bf518f503cbe5f4ceae43a15e6c82f7548e51459d51900de008c7aa3dc +size 2083164 diff --git a/outputs/depthmap-17286927930006-left-right.png b/outputs/depthmap-17286927930006-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..21cc510a12103fcb6c3a4ba981f45e292cd5989b --- /dev/null +++ b/outputs/depthmap-17286927930006-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3d53a6f5e6594b29fa829646489cb7368fc4b76142a20955ebfd283af84f712 +size 2590775 diff --git a/outputs/depthmap-17286927930007.png b/outputs/depthmap-17286927930007.png new file mode 100644 index 0000000000000000000000000000000000000000..e18306c310beee78a5cedc5688759c728e3b81d6 Binary files /dev/null and b/outputs/depthmap-17286927930007.png differ diff --git a/outputs/depthmap-17286927930008-left-right.png b/outputs/depthmap-17286927930008-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..503ce084e6ceb1d211025dd1e0eb7c5f4f29a288 Binary files /dev/null and b/outputs/depthmap-17286927930008-left-right.png differ diff --git a/outputs/depthmap-17286927930009.png b/outputs/depthmap-17286927930009.png new file mode 100644 index 0000000000000000000000000000000000000000..038c4532f66a156d77d2a3879fb263e3bb93aef2 Binary files /dev/null and b/outputs/depthmap-17286927930009.png differ diff --git a/outputs/depthmap-17286927930010-left-right.png b/outputs/depthmap-17286927930010-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..f4bcaf36d90787bf3a120a6ec2255bf44239c306 --- /dev/null +++ b/outputs/depthmap-17286927930010-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:960163f8fb2086e88ca718f6cafabaf55f21bdf9fdebec476b3051c19fc597e8 +size 1072535 diff --git a/outputs/depthmap-17286927930011.png b/outputs/depthmap-17286927930011.png new file mode 100644 index 0000000000000000000000000000000000000000..5c67799a999609ebf2e4a82bc397d1cb10a66020 Binary files /dev/null and b/outputs/depthmap-17286927930011.png differ diff --git a/outputs/depthmap-17286927930012-left-right.png b/outputs/depthmap-17286927930012-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..8d50242d80f4c273e5c5c2d975d2d3fdecf41522 --- /dev/null +++ b/outputs/depthmap-17286927930012-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28edd835928d45900e0f8c1310232ca33de8ded4f7d18838e20f3b833bea8ea7 +size 1113429 diff --git a/outputs/depthmap-17286927930013.png b/outputs/depthmap-17286927930013.png new file mode 100644 index 0000000000000000000000000000000000000000..5e1e8b8b72d5da7191195a31d9aa430119ff292d Binary files /dev/null and b/outputs/depthmap-17286927930013.png differ diff --git a/outputs/depthmap-17286927930014-left-right.png b/outputs/depthmap-17286927930014-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..d0d1efe9c495823e77558e755b0814fa68752af1 Binary files /dev/null and b/outputs/depthmap-17286927930014-left-right.png differ diff --git a/outputs/depthmap-17286927930015.png b/outputs/depthmap-17286927930015.png new file mode 100644 index 0000000000000000000000000000000000000000..a68b5f6a7e9fd682e50081ac4ee35b54f7672156 Binary files /dev/null and b/outputs/depthmap-17286927930015.png differ diff --git a/outputs/depthmap-17286927930016-left-right.png b/outputs/depthmap-17286927930016-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..ba62ec6ea6bfb7fd9a31d1fa9ec7279e35047f7a --- /dev/null +++ b/outputs/depthmap-17286927930016-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:787eff8093cf624d515507c25c6805e5a5f7403bd69571fb0ff3d812011a1f0b +size 1163869 diff --git a/outputs/depthmap-17286927930017.png b/outputs/depthmap-17286927930017.png new file mode 100644 index 0000000000000000000000000000000000000000..0990526e09ba5db96282dbd19f288cb36be92e61 Binary files /dev/null and b/outputs/depthmap-17286927930017.png differ diff --git a/outputs/depthmap-17286927930018-left-right.png b/outputs/depthmap-17286927930018-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..f6432cd108315d1c35d8ccf3ba81c1974f1a191c --- /dev/null +++ b/outputs/depthmap-17286927930018-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:742ad31aa5c9bc0c503e9326cb1ac548d59934d949c29c1139deef6280c2741e +size 2726566 diff --git a/outputs/depthmap-17286927930019.png b/outputs/depthmap-17286927930019.png new file mode 100644 index 0000000000000000000000000000000000000000..d8d26bcb4b8fff07c7a514943c01769f88f993ac Binary files /dev/null and b/outputs/depthmap-17286927930019.png differ diff --git a/outputs/depthmap-17286927930020-left-right.png b/outputs/depthmap-17286927930020-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..1975f98d591a3c8010cd95105889c4bc515f9b11 --- /dev/null +++ b/outputs/depthmap-17286927930020-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bef119419c56a08d4049600cebeb51882242e15e985be59c719a3604387eb59c +size 2082431 diff --git a/outputs/depthmap-17286927930021.png b/outputs/depthmap-17286927930021.png new file mode 100644 index 0000000000000000000000000000000000000000..7262583f40f4324acd79f91aacfdb02f4f3cc05e Binary files /dev/null and b/outputs/depthmap-17286927930021.png differ diff --git a/outputs/depthmap-17286927930022-left-right.png b/outputs/depthmap-17286927930022-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..c76834caf86c2306c60f4927e53492146fa426a6 Binary files /dev/null and b/outputs/depthmap-17286927930022-left-right.png differ diff --git a/outputs/depthmap-17286927930023.png b/outputs/depthmap-17286927930023.png new file mode 100644 index 0000000000000000000000000000000000000000..6ca4b905dc9da4b64acb89e91f4ad45e77daa2f7 Binary files /dev/null and b/outputs/depthmap-17286927930023.png differ diff --git a/outputs/depthmap-17286927930024-left-right.png b/outputs/depthmap-17286927930024-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..d78e91962f1114f37d0790cd89ee1815c14c28f9 Binary files /dev/null and b/outputs/depthmap-17286927930024-left-right.png differ diff --git a/outputs/depthmap-17286927930025.png b/outputs/depthmap-17286927930025.png new file mode 100644 index 0000000000000000000000000000000000000000..180a33b7d3c34379ee9e23886cf4b217f8de1443 Binary files /dev/null and b/outputs/depthmap-17286927930025.png differ diff --git a/outputs/depthmap-17286927930026-left-right.png b/outputs/depthmap-17286927930026-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..6eadb776f61bfe7f1ddf1e5c1673d16a2b4b9698 --- /dev/null +++ b/outputs/depthmap-17286927930026-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5d35fabe92fde15f6f5c3a617fdbc3ef989db604ff326131748babc5dd32ec2 +size 1072412 diff --git a/outputs/depthmap-17286927930027.png b/outputs/depthmap-17286927930027.png new file mode 100644 index 0000000000000000000000000000000000000000..5ca6937f89cfb732ba1104e07bbde24324b8d411 Binary files /dev/null and b/outputs/depthmap-17286927930027.png differ diff --git a/outputs/depthmap-17286927930028-left-right.png b/outputs/depthmap-17286927930028-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..66aac14333d15a2942372794a805df1325260201 --- /dev/null +++ b/outputs/depthmap-17286927930028-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08cf4b56b6c7fa5263b4749863b9dfbdcd89dba4552591d268693797cbae7203 +size 1046366 diff --git a/outputs/depthmap-17286927930029.png b/outputs/depthmap-17286927930029.png new file mode 100644 index 0000000000000000000000000000000000000000..cbeb627d10816f40fe5ea76b04b5932a9df8bb0f Binary files /dev/null and b/outputs/depthmap-17286927930029.png differ diff --git a/outputs/depthmap-17286927930030-left-right.png b/outputs/depthmap-17286927930030-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..e39000ddcfe1e77e0967b8f55c2d9ff7b2f5c413 Binary files /dev/null and b/outputs/depthmap-17286927930030-left-right.png differ diff --git a/outputs/depthmap-17286927930031.png b/outputs/depthmap-17286927930031.png new file mode 100644 index 0000000000000000000000000000000000000000..1ba68a8ab710fb1f0b22d6104a7a5381252b96f8 Binary files /dev/null and b/outputs/depthmap-17286927930031.png differ diff --git a/outputs/depthmap-17286927930032-left-right.png b/outputs/depthmap-17286927930032-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..360810a7366cc57235deb2bf0411006f49df9616 Binary files /dev/null and b/outputs/depthmap-17286927930032-left-right.png differ diff --git a/outputs/depthmap-17286927930033.png b/outputs/depthmap-17286927930033.png new file mode 100644 index 0000000000000000000000000000000000000000..16ddb7beffff7a24bcfbbd8c7f4315528c0d4c8f Binary files /dev/null and b/outputs/depthmap-17286927930033.png differ diff --git a/outputs/depthmap-17286927930034-left-right.png b/outputs/depthmap-17286927930034-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..f3b3b22711c4a1e6354d2530455319b2f5a3460e Binary files /dev/null and b/outputs/depthmap-17286927930034-left-right.png differ diff --git a/outputs/depthmap-17286927930035.png b/outputs/depthmap-17286927930035.png new file mode 100644 index 0000000000000000000000000000000000000000..72afe96cf6ebb9b84d74225ebceba77091f972ef Binary files /dev/null and b/outputs/depthmap-17286927930035.png differ diff --git a/outputs/depthmap-17286927930036-left-right.png b/outputs/depthmap-17286927930036-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..122fdc8215e96444ad18970143e72139e3a5faaa --- /dev/null +++ b/outputs/depthmap-17286927930036-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2b7b64a94602155624a71b869d9f21e64a8ff8f2876f72b3df0f8a50824e137 +size 1173623 diff --git a/outputs/depthmap-17286927930037.png b/outputs/depthmap-17286927930037.png new file mode 100644 index 0000000000000000000000000000000000000000..ceddb82cebe402cca1abac52eb1034745d52ed38 Binary files /dev/null and b/outputs/depthmap-17286927930037.png differ diff --git a/outputs/depthmap-17286927930038-left-right.png b/outputs/depthmap-17286927930038-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..956dc5943d578b9e1efd36d567a84d8c2c2446c0 Binary files /dev/null and b/outputs/depthmap-17286927930038-left-right.png differ diff --git a/outputs/depthmap-17286927930039.png b/outputs/depthmap-17286927930039.png new file mode 100644 index 0000000000000000000000000000000000000000..d624bdf109d865c36654d81ab71c9848369c77d0 Binary files /dev/null and b/outputs/depthmap-17286927930039.png differ diff --git a/outputs/depthmap-17286927930040-left-right.png b/outputs/depthmap-17286927930040-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..27c621c292f96fa8e18236371bfe3c3060c0eff4 Binary files /dev/null and b/outputs/depthmap-17286927930040-left-right.png differ diff --git a/outputs/depthmap-17286927930041.png b/outputs/depthmap-17286927930041.png new file mode 100644 index 0000000000000000000000000000000000000000..4e1afc15c19f4310755b44be0db57f144ce1d023 Binary files /dev/null and b/outputs/depthmap-17286927930041.png differ diff --git a/outputs/depthmap-17286927930042-left-right.png b/outputs/depthmap-17286927930042-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..215dd25f4c6d1afc5d8a25ac38bed3a01809a59c Binary files /dev/null and b/outputs/depthmap-17286927930042-left-right.png differ diff --git a/outputs/depthmap-17286927930043.png b/outputs/depthmap-17286927930043.png new file mode 100644 index 0000000000000000000000000000000000000000..6bc32ad1dd3a804006504c19df7972b1179a9778 Binary files /dev/null and b/outputs/depthmap-17286927930043.png differ diff --git a/outputs/depthmap-17286927930044-left-right.png b/outputs/depthmap-17286927930044-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..942e14d787d1de198f98ecf7f8197e0c7db54637 Binary files /dev/null and b/outputs/depthmap-17286927930044-left-right.png differ diff --git a/outputs/depthmap-17286927930045.png b/outputs/depthmap-17286927930045.png new file mode 100644 index 0000000000000000000000000000000000000000..9b202be4ab7017d38bf445c1c12c789ef4e140a3 Binary files /dev/null and b/outputs/depthmap-17286927930045.png differ diff --git a/outputs/depthmap-17286927930046-left-right.png b/outputs/depthmap-17286927930046-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..0b0e701abec4847d920009db22018780cb4f87ba --- /dev/null +++ b/outputs/depthmap-17286927930046-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e3712b754eb4bd811161498183699a12c5d878ff1e861070efee444a8f5d195 +size 1057450 diff --git a/outputs/depthmap-17286927930047.png b/outputs/depthmap-17286927930047.png new file mode 100644 index 0000000000000000000000000000000000000000..59fd7ee931b84c9181973ed9e4f5157db50444ef Binary files /dev/null and b/outputs/depthmap-17286927930047.png differ diff --git a/outputs/depthmap-17286927930048-left-right.png b/outputs/depthmap-17286927930048-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..c64944beb78832526b7c36b64d04216a099afceb Binary files /dev/null and b/outputs/depthmap-17286927930048-left-right.png differ diff --git a/outputs/depthmap-17286927930049.png b/outputs/depthmap-17286927930049.png new file mode 100644 index 0000000000000000000000000000000000000000..6977db878d6e6651ec0bfbdad72bd768f16b0622 Binary files /dev/null and b/outputs/depthmap-17286927930049.png differ diff --git a/outputs/depthmap-17286927930050-left-right.png b/outputs/depthmap-17286927930050-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..7ad74d825416dae76c75606f43e0f87f5eadae37 --- /dev/null +++ b/outputs/depthmap-17286927930050-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9488b9c8078fcef33b590d7aa74bc716c4a1c8e3b912ea573994fde3926423ea +size 2139297 diff --git a/outputs/depthmap-17286927930051.png b/outputs/depthmap-17286927930051.png new file mode 100644 index 0000000000000000000000000000000000000000..aa44ab67f993fe0fda3b1315285ee8d921f7d088 Binary files /dev/null and b/outputs/depthmap-17286927930051.png differ diff --git a/outputs/depthmap-17286927930052-left-right.png b/outputs/depthmap-17286927930052-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..576a813fce18bde726d5ba3fced76c12370b77ab --- /dev/null +++ b/outputs/depthmap-17286927930052-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0dae8cc039aba350e04222eaa06e0f84af202b7c5b435a9962419d9d9471789 +size 1663533 diff --git a/outputs/depthmap-17286927930053.png b/outputs/depthmap-17286927930053.png new file mode 100644 index 0000000000000000000000000000000000000000..1f1db04a80065ef997a4e6ae267058eb07b6251e --- /dev/null +++ b/outputs/depthmap-17286927930053.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf1b54f44eb4cdce074e7dbbc07f8d8cb4355a4edb7ea5900592b84453d9e536 +size 2645411 diff --git a/outputs/depthmap-17286927930054-left-right.png b/outputs/depthmap-17286927930054-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..eab8d4852ab9862fe719dc15a8fa5018b5d84e4c --- /dev/null +++ b/outputs/depthmap-17286927930054-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d70fd8935dd9adbea87654f479f86435356b9cb3f3b67d41f7fe42d472588f0 +size 5923561 diff --git a/outputs/depthmap-17286927930055.png b/outputs/depthmap-17286927930055.png new file mode 100644 index 0000000000000000000000000000000000000000..3e04d20c0b25e88eb883e7a16e805ed401379c6b --- /dev/null +++ b/outputs/depthmap-17286927930055.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bb0b4894365fb2039b0840e1d697d846653c8bfdf2167586d2b7f0075a197cf +size 1929299 diff --git a/outputs/depthmap-17286927930056-left-right.png b/outputs/depthmap-17286927930056-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..73406d0c14aa8c23307e9504a20559a977fdb96e --- /dev/null +++ b/outputs/depthmap-17286927930056-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c62f1b7130a7419758fa868dc17d19abf9f590484f1f71fe66e99b8e17f05d13 +size 5970846 diff --git a/outputs/depthmap-17286927930057.png b/outputs/depthmap-17286927930057.png new file mode 100644 index 0000000000000000000000000000000000000000..c9068c0950d7dc2015830c0f0b06281e6b7b80a4 --- /dev/null +++ b/outputs/depthmap-17286927930057.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5c05309c5cf94373249f0c5aa2536fa04305d3fd8fbc891f9ff36ef448126c9 +size 1553773 diff --git a/outputs/depthmap-17286927930058-left-right.png b/outputs/depthmap-17286927930058-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..c09f37a6e7bc3a43a2f3129d7585d70a1454a2bb --- /dev/null +++ b/outputs/depthmap-17286927930058-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71da4bd131756ba117092e5de4d9c5ab6373f8fd42b9b8942fa21d90982a4d38 +size 4201218 diff --git a/outputs/depthmap-17286927930059.png b/outputs/depthmap-17286927930059.png new file mode 100644 index 0000000000000000000000000000000000000000..5386508cfaaccebeea246c80381637163f5bddd3 --- /dev/null +++ b/outputs/depthmap-17286927930059.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6aabf31a3c1506d79d2f2c308334f9227a61b8426290080ec29843dbb0b0ea10 +size 2372357 diff --git a/outputs/depthmap-17286927930060-left-right.png b/outputs/depthmap-17286927930060-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..f5b107966fd92e561058c39395590f5c5b041a26 --- /dev/null +++ b/outputs/depthmap-17286927930060-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b2b9ec709429d0f1f7de0eef6075c57153594f7f7a99a6b5b725422e3657f95 +size 5756292 diff --git a/outputs/depthmap-17286927930061.png b/outputs/depthmap-17286927930061.png new file mode 100644 index 0000000000000000000000000000000000000000..9022b2c292235140b6ecafb3b48969ef6910362b --- /dev/null +++ b/outputs/depthmap-17286927930061.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76baf04d6aa556d9b2e3119e012395672cf95539d166f16f7929f071e9087d76 +size 2277460 diff --git a/outputs/depthmap-17286927930062-left-right.png b/outputs/depthmap-17286927930062-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..32b8032fad71009939bd301e13ab2254a6bea62b --- /dev/null +++ b/outputs/depthmap-17286927930062-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b53cb7325aa41512e4f8b28d2d83ddeda53704d3948837b540349c3e1e55217 +size 4353863 diff --git a/outputs/depthmap-17286927930063.png b/outputs/depthmap-17286927930063.png new file mode 100644 index 0000000000000000000000000000000000000000..205373eedd7b88bbbd6faee321ae8652780bb2f7 --- /dev/null +++ b/outputs/depthmap-17286927930063.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb56bfab9af2953991c95c4fe4b32288e94a5fabcd2762012babe3f695560e2b +size 2710565 diff --git a/outputs/depthmap-17286927930064-left-right.png b/outputs/depthmap-17286927930064-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..0d306075409d17e51e95653f1d6f1b9d415ef11d --- /dev/null +++ b/outputs/depthmap-17286927930064-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3564ddf9b694d26d404425d7bc1535654546195c020a3ae844d5dfa597ea4afc +size 9575455 diff --git a/outputs/depthmap-17286927930065.png b/outputs/depthmap-17286927930065.png new file mode 100644 index 0000000000000000000000000000000000000000..6977db878d6e6651ec0bfbdad72bd768f16b0622 Binary files /dev/null and b/outputs/depthmap-17286927930065.png differ diff --git a/outputs/depthmap-17286927930066-left-right.png b/outputs/depthmap-17286927930066-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..7ad74d825416dae76c75606f43e0f87f5eadae37 --- /dev/null +++ b/outputs/depthmap-17286927930066-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9488b9c8078fcef33b590d7aa74bc716c4a1c8e3b912ea573994fde3926423ea +size 2139297 diff --git a/outputs/depthmap-17286927930067.png b/outputs/depthmap-17286927930067.png new file mode 100644 index 0000000000000000000000000000000000000000..5da7c3d200acf5f55c290a9d95f85f48b498a6d3 Binary files /dev/null and b/outputs/depthmap-17286927930067.png differ diff --git a/outputs/depthmap-17286927930068-left-right.png b/outputs/depthmap-17286927930068-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..f364d9a953bcc9fe4ba57c0cdb773966613ee056 Binary files /dev/null and b/outputs/depthmap-17286927930068-left-right.png differ diff --git a/outputs/depthmap-17286927930069.png b/outputs/depthmap-17286927930069.png new file mode 100644 index 0000000000000000000000000000000000000000..180a33b7d3c34379ee9e23886cf4b217f8de1443 Binary files /dev/null and b/outputs/depthmap-17286927930069.png differ diff --git a/outputs/depthmap-17286927930070-left-right.png b/outputs/depthmap-17286927930070-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..6eadb776f61bfe7f1ddf1e5c1673d16a2b4b9698 --- /dev/null +++ b/outputs/depthmap-17286927930070-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5d35fabe92fde15f6f5c3a617fdbc3ef989db604ff326131748babc5dd32ec2 +size 1072412 diff --git a/outputs/depthmap-17286927930071.png b/outputs/depthmap-17286927930071.png new file mode 100644 index 0000000000000000000000000000000000000000..5ca6937f89cfb732ba1104e07bbde24324b8d411 Binary files /dev/null and b/outputs/depthmap-17286927930071.png differ diff --git a/outputs/depthmap-17286927930072-left-right.png b/outputs/depthmap-17286927930072-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..66aac14333d15a2942372794a805df1325260201 --- /dev/null +++ b/outputs/depthmap-17286927930072-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08cf4b56b6c7fa5263b4749863b9dfbdcd89dba4552591d268693797cbae7203 +size 1046366 diff --git a/outputs/depthmap-17286927930073.png b/outputs/depthmap-17286927930073.png new file mode 100644 index 0000000000000000000000000000000000000000..cbeb627d10816f40fe5ea76b04b5932a9df8bb0f Binary files /dev/null and b/outputs/depthmap-17286927930073.png differ diff --git a/outputs/depthmap-17286927930074-left-right.png b/outputs/depthmap-17286927930074-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..e39000ddcfe1e77e0967b8f55c2d9ff7b2f5c413 Binary files /dev/null and b/outputs/depthmap-17286927930074-left-right.png differ diff --git a/outputs/depthmap-17286927930075.png b/outputs/depthmap-17286927930075.png new file mode 100644 index 0000000000000000000000000000000000000000..1758e7e26b3892156cac55e044cbfb361d21a815 Binary files /dev/null and b/outputs/depthmap-17286927930075.png differ diff --git a/outputs/depthmap-17286927930076-left-right.png b/outputs/depthmap-17286927930076-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..2194581538d47d5ac6b7c5609ba2b764091eaf2c Binary files /dev/null and b/outputs/depthmap-17286927930076-left-right.png differ diff --git a/outputs/depthmap-17286927930077.png b/outputs/depthmap-17286927930077.png new file mode 100644 index 0000000000000000000000000000000000000000..0d6798317af81a65f8dd38d7f2001c369be2bbd9 Binary files /dev/null and b/outputs/depthmap-17286927930077.png differ diff --git a/outputs/depthmap-17286927930078-left-right.png b/outputs/depthmap-17286927930078-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..c2ffa6a3ca41c9e93055bb67391253fcbda22bba Binary files /dev/null and b/outputs/depthmap-17286927930078-left-right.png differ diff --git a/outputs/depthmap-17286927930079.png b/outputs/depthmap-17286927930079.png new file mode 100644 index 0000000000000000000000000000000000000000..373c1ec1cafbc30ac0309af4d8b1a633c98e5c8f Binary files /dev/null and b/outputs/depthmap-17286927930079.png differ diff --git a/outputs/depthmap-17286927930080-left-right.png b/outputs/depthmap-17286927930080-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..11ad930135520cc5127684522cae2dcc404905df --- /dev/null +++ b/outputs/depthmap-17286927930080-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d39efad39f1e7816844e89a6bb8d73a6215f7a44662e298a54d0f9123696a104 +size 1149426 diff --git a/outputs/depthmap-17286927930081.png b/outputs/depthmap-17286927930081.png new file mode 100644 index 0000000000000000000000000000000000000000..4deeccd86b68778ebc6342abc40085bb68d5766d Binary files /dev/null and b/outputs/depthmap-17286927930081.png differ diff --git a/outputs/depthmap-17286927930082-left-right.png b/outputs/depthmap-17286927930082-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..76cc940328bf3aa5dd1c01902863b094143f5594 --- /dev/null +++ b/outputs/depthmap-17286927930082-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c2a2935a43c82aec598d109aaa6413053ade29fb9916e29581f366878ecaf6e +size 1167253 diff --git a/outputs/depthmap-17286927930083.png b/outputs/depthmap-17286927930083.png new file mode 100644 index 0000000000000000000000000000000000000000..cd8aed054614c9879dda9ad73b0ff92e059d6244 Binary files /dev/null and b/outputs/depthmap-17286927930083.png differ diff --git a/outputs/depthmap-17286927930084-left-right.png b/outputs/depthmap-17286927930084-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..dfb93be0bcb779acebae3c12769906b031543dfe --- /dev/null +++ b/outputs/depthmap-17286927930084-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e29e8608eb342ac0b14bd57ca91b77b2b15ec182d91b4150d74350a0785d842 +size 1459388 diff --git a/outputs/depthmap-17286927930085.png b/outputs/depthmap-17286927930085.png new file mode 100644 index 0000000000000000000000000000000000000000..8a0a3aadf21f2125465022656aae29861f0be53b Binary files /dev/null and b/outputs/depthmap-17286927930085.png differ diff --git a/outputs/depthmap-17286927930086-left-right.png b/outputs/depthmap-17286927930086-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..e6df406339558c19cc2a216fd4432b88c618fb72 Binary files /dev/null and b/outputs/depthmap-17286927930086-left-right.png differ diff --git a/outputs/depthmap-17286927930087.png b/outputs/depthmap-17286927930087.png new file mode 100644 index 0000000000000000000000000000000000000000..557c173a73051c7e16cfac5204717a53400389c0 Binary files /dev/null and b/outputs/depthmap-17286927930087.png differ diff --git a/outputs/depthmap-17286927930088-left-right.png b/outputs/depthmap-17286927930088-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..00c76a151d5a8ed4cfefd44f5c0276c790cc6ac0 Binary files /dev/null and b/outputs/depthmap-17286927930088-left-right.png differ diff --git a/outputs/depthmap-17286927930089.png b/outputs/depthmap-17286927930089.png new file mode 100644 index 0000000000000000000000000000000000000000..e2670fd0e9c69a8fcd8831f7a4832fa3fdc67ac4 Binary files /dev/null and b/outputs/depthmap-17286927930089.png differ diff --git a/outputs/depthmap-17286927930090-left-right.png b/outputs/depthmap-17286927930090-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..6c1783412a0fc18a1aaf005a6ad084ab434e251e Binary files /dev/null and b/outputs/depthmap-17286927930090-left-right.png differ diff --git a/outputs/depthmap-17286927930091.png b/outputs/depthmap-17286927930091.png new file mode 100644 index 0000000000000000000000000000000000000000..4bb5ef974f7333d976b8865b25811e77aabf12e9 Binary files /dev/null and b/outputs/depthmap-17286927930091.png differ diff --git a/outputs/depthmap-17286927930092-left-right.png b/outputs/depthmap-17286927930092-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..139376653fe7a3d60331c5e75ea465d02f0cdd23 Binary files /dev/null and b/outputs/depthmap-17286927930092-left-right.png differ diff --git a/outputs/depthmap-17286927930093.png b/outputs/depthmap-17286927930093.png new file mode 100644 index 0000000000000000000000000000000000000000..e848f01bd267312d68e3f644de15698c5d75b111 Binary files /dev/null and b/outputs/depthmap-17286927930093.png differ diff --git a/outputs/depthmap-17286927930094-left-right.png b/outputs/depthmap-17286927930094-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..10ab8d3ab9f720ff2346dad1433e930e2f41d070 Binary files /dev/null and b/outputs/depthmap-17286927930094-left-right.png differ diff --git a/outputs/depthmap-17286927930095.png b/outputs/depthmap-17286927930095.png new file mode 100644 index 0000000000000000000000000000000000000000..06a2f5121826648999df04b611eef26644e3679b Binary files /dev/null and b/outputs/depthmap-17286927930095.png differ diff --git a/outputs/depthmap-17286927930096-left-right.png b/outputs/depthmap-17286927930096-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..3442a033d954753bdc5ab4caa27099ab0b8901d0 Binary files /dev/null and b/outputs/depthmap-17286927930096-left-right.png differ diff --git a/outputs/depthmap-17286927930097.png b/outputs/depthmap-17286927930097.png new file mode 100644 index 0000000000000000000000000000000000000000..983c9240aede891e3a5c44336de90ea411aea181 Binary files /dev/null and b/outputs/depthmap-17286927930097.png differ diff --git a/outputs/depthmap-17286927930098-left-right.png b/outputs/depthmap-17286927930098-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..38db9f00d776137546f2c7b0ca0f6f3e50262d46 Binary files /dev/null and b/outputs/depthmap-17286927930098-left-right.png differ diff --git a/outputs/depthmap-17286927930099.png b/outputs/depthmap-17286927930099.png new file mode 100644 index 0000000000000000000000000000000000000000..182ee3fd23592d32882b200d9d3f0ff13fc8619c Binary files /dev/null and b/outputs/depthmap-17286927930099.png differ diff --git a/outputs/depthmap-17286927930100-left-right.png b/outputs/depthmap-17286927930100-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..e38b41e2763bf7aaf47d81388ba766b00275de63 Binary files /dev/null and b/outputs/depthmap-17286927930100-left-right.png differ diff --git a/outputs/depthmap-17286927930101.png b/outputs/depthmap-17286927930101.png new file mode 100644 index 0000000000000000000000000000000000000000..32464a7a7169fd0019cdab72a1b66fd19842ebe4 Binary files /dev/null and b/outputs/depthmap-17286927930101.png differ diff --git a/outputs/depthmap-17286927930102-left-right.png b/outputs/depthmap-17286927930102-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..a76eebbd862abfa384e8984386010c932c583347 Binary files /dev/null and b/outputs/depthmap-17286927930102-left-right.png differ diff --git a/outputs/depthmap-17286927930103.png b/outputs/depthmap-17286927930103.png new file mode 100644 index 0000000000000000000000000000000000000000..a27e0cde0a1fec3d5ac13c96c8ee188aea5435ca Binary files /dev/null and b/outputs/depthmap-17286927930103.png differ diff --git a/outputs/depthmap-17286927930104-left-right.png b/outputs/depthmap-17286927930104-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..ea49e95b8cee55e033f46bcec38f83d6a95bfa7d Binary files /dev/null and b/outputs/depthmap-17286927930104-left-right.png differ diff --git a/outputs/depthmap-17286927930105.png b/outputs/depthmap-17286927930105.png new file mode 100644 index 0000000000000000000000000000000000000000..96d38bee9017f1def4b04cf3e256773f7779350e Binary files /dev/null and b/outputs/depthmap-17286927930105.png differ diff --git a/outputs/depthmap-17286927930106-left-right.png b/outputs/depthmap-17286927930106-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..94d58acf273eb0a3550bb2b1aadb4a29bfe8be79 Binary files /dev/null and b/outputs/depthmap-17286927930106-left-right.png differ diff --git a/outputs/depthmap-17286927930107.png b/outputs/depthmap-17286927930107.png new file mode 100644 index 0000000000000000000000000000000000000000..27cea4a0529ad6bfcc4681f049f9f7b7e33e16c0 Binary files /dev/null and b/outputs/depthmap-17286927930107.png differ diff --git a/outputs/depthmap-17286927930108-left-right.png b/outputs/depthmap-17286927930108-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..165f5bf567c9b21a67d32fdfba212390f83c36dd Binary files /dev/null and b/outputs/depthmap-17286927930108-left-right.png differ diff --git a/outputs/depthmap-17286927930109.png b/outputs/depthmap-17286927930109.png new file mode 100644 index 0000000000000000000000000000000000000000..538d1dd9aa571c25b4716ea49ba3f8e0938e3561 Binary files /dev/null and b/outputs/depthmap-17286927930109.png differ diff --git a/outputs/depthmap-17286927930110-left-right.png b/outputs/depthmap-17286927930110-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..f8b2c1df6e7d17e76e5c616dd84db6953df031ca Binary files /dev/null and b/outputs/depthmap-17286927930110-left-right.png differ diff --git a/outputs/depthmap-17286927930111.png b/outputs/depthmap-17286927930111.png new file mode 100644 index 0000000000000000000000000000000000000000..91ad64ffe35b963b9b92f28479422c8231eb8d04 Binary files /dev/null and b/outputs/depthmap-17286927930111.png differ diff --git a/outputs/depthmap-17286927930112-left-right.png b/outputs/depthmap-17286927930112-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..dcf0bfc081192419fc6fe004efdc438a34d3dd45 Binary files /dev/null and b/outputs/depthmap-17286927930112-left-right.png differ diff --git a/outputs/depthmap-17286927930113.png b/outputs/depthmap-17286927930113.png new file mode 100644 index 0000000000000000000000000000000000000000..e78c79a3c96a00801b21299b6ac2ebc687dc83f3 Binary files /dev/null and b/outputs/depthmap-17286927930113.png differ diff --git a/outputs/depthmap-17286927930114-left-right.png b/outputs/depthmap-17286927930114-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..65f59252f93ea2248b85c9f4d44ee935e9d18e7d Binary files /dev/null and b/outputs/depthmap-17286927930114-left-right.png differ diff --git a/outputs/depthmap-17286927930115.png b/outputs/depthmap-17286927930115.png new file mode 100644 index 0000000000000000000000000000000000000000..92e4bf638ad4c986414fdfb9e57f26616ec9837a Binary files /dev/null and b/outputs/depthmap-17286927930115.png differ diff --git a/outputs/depthmap-17286927930116-left-right.png b/outputs/depthmap-17286927930116-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..659bce635a3c2cd061a2668780db27b05ed3936f Binary files /dev/null and b/outputs/depthmap-17286927930116-left-right.png differ diff --git a/outputs/depthmap-17286927930117.png b/outputs/depthmap-17286927930117.png new file mode 100644 index 0000000000000000000000000000000000000000..2ce0e1322863116a0063593a9025297882cbd4fd Binary files /dev/null and b/outputs/depthmap-17286927930117.png differ diff --git a/outputs/depthmap-17286927930118-left-right.png b/outputs/depthmap-17286927930118-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..95d4f45ee63d28ab91e8210199f00f406a1a5a7a Binary files /dev/null and b/outputs/depthmap-17286927930118-left-right.png differ diff --git a/outputs/depthmap-17286927930119.png b/outputs/depthmap-17286927930119.png new file mode 100644 index 0000000000000000000000000000000000000000..cf8faab30804a5408c7542aec519d111b59a1058 Binary files /dev/null and b/outputs/depthmap-17286927930119.png differ diff --git a/outputs/depthmap-17286927930120-left-right.png b/outputs/depthmap-17286927930120-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..805f0a3737f258c71e4ba1985a33fda4bff50e4e --- /dev/null +++ b/outputs/depthmap-17286927930120-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bb7be35072ddeba856e14be5769dad67b1a7f1858c760a0382b893649cc7a78 +size 2256905 diff --git a/outputs/depthmap-17286927930121.png b/outputs/depthmap-17286927930121.png new file mode 100644 index 0000000000000000000000000000000000000000..2c5bb6bae41b1fa7f7b831056e86c0728f729689 Binary files /dev/null and b/outputs/depthmap-17286927930121.png differ diff --git a/outputs/depthmap-17286927930122-left-right.png b/outputs/depthmap-17286927930122-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..32fbb3b8b0262efcc36209e547e20660841cbd22 Binary files /dev/null and b/outputs/depthmap-17286927930122-left-right.png differ diff --git a/outputs/depthmap-17286927930123.png b/outputs/depthmap-17286927930123.png new file mode 100644 index 0000000000000000000000000000000000000000..fa3f4e54bb9d747e3dab21b062ea0bbc12381a76 Binary files /dev/null and b/outputs/depthmap-17286927930123.png differ diff --git a/outputs/depthmap-17286927930124-left-right.png b/outputs/depthmap-17286927930124-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..cb8c84de6051de7318192319a478b8a783e5632c Binary files /dev/null and b/outputs/depthmap-17286927930124-left-right.png differ diff --git a/outputs/depthmap-17286927930125.png b/outputs/depthmap-17286927930125.png new file mode 100644 index 0000000000000000000000000000000000000000..b26bdf0afb128eaa6bf873d603678f5c05ec9932 Binary files /dev/null and b/outputs/depthmap-17286927930125.png differ diff --git a/outputs/depthmap-17286927930126-left-right.png b/outputs/depthmap-17286927930126-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..e4ac3c317f727b38e14e515672ef9406bf2a0573 --- /dev/null +++ b/outputs/depthmap-17286927930126-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b9ffa0f87d7c759bbf7e3ebb448864dd95902de8b8b2fdab7d052b7c06a87a5 +size 1253644 diff --git a/outputs/depthmap-17286927930127.png b/outputs/depthmap-17286927930127.png new file mode 100644 index 0000000000000000000000000000000000000000..b101932319d7926a3defb243ea5214ab55cbf6b3 Binary files /dev/null and b/outputs/depthmap-17286927930127.png differ diff --git a/outputs/depthmap-17286927930128-left-right.png b/outputs/depthmap-17286927930128-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..19244af51878c642150f4855c67bbb8b51a06ad0 Binary files /dev/null and b/outputs/depthmap-17286927930128-left-right.png differ diff --git a/outputs/depthmap-17286927930129.png b/outputs/depthmap-17286927930129.png new file mode 100644 index 0000000000000000000000000000000000000000..d86470e9475b6a56c914579a7a7f7d9ac3d6792c Binary files /dev/null and b/outputs/depthmap-17286927930129.png differ diff --git a/outputs/depthmap-17286927930130-left-right.png b/outputs/depthmap-17286927930130-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..65ab69ba2d4494deba54c589cf9fe7933630358b Binary files /dev/null and b/outputs/depthmap-17286927930130-left-right.png differ diff --git a/outputs/depthmap-17286927930131.png b/outputs/depthmap-17286927930131.png new file mode 100644 index 0000000000000000000000000000000000000000..a158fb2bdce59c13c90ea61125dfa090301f6298 Binary files /dev/null and b/outputs/depthmap-17286927930131.png differ diff --git a/outputs/depthmap-17286927930132-left-right.png b/outputs/depthmap-17286927930132-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..1ff575941d5fe3b2b71d619b7d3307df45b5a6b1 --- /dev/null +++ b/outputs/depthmap-17286927930132-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58da925df4e0e6ee810180f82590c4aec9f36723465aa3bf6ef496f22468c19b +size 1200876 diff --git a/outputs/depthmap-17286927930133.png b/outputs/depthmap-17286927930133.png new file mode 100644 index 0000000000000000000000000000000000000000..8b8f18068e831c45044b121ea7d39bff41d3c18a Binary files /dev/null and b/outputs/depthmap-17286927930133.png differ diff --git a/outputs/depthmap-17286927930134-left-right.png b/outputs/depthmap-17286927930134-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..7bd2c5d8ff5433f2435f29a5e70d7c09e6232770 Binary files /dev/null and b/outputs/depthmap-17286927930134-left-right.png differ diff --git a/outputs/depthmap-17286927930135.png b/outputs/depthmap-17286927930135.png new file mode 100644 index 0000000000000000000000000000000000000000..ac45e3a204c6f71ab13ee6e955a57ab5fbba128d Binary files /dev/null and b/outputs/depthmap-17286927930135.png differ diff --git a/outputs/depthmap-17286927930136-left-right.png b/outputs/depthmap-17286927930136-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..c5a677f625021dbf4d6c2664be17864baf4c9ed7 Binary files /dev/null and b/outputs/depthmap-17286927930136-left-right.png differ diff --git a/outputs/depthmap-17286927930137.png b/outputs/depthmap-17286927930137.png new file mode 100644 index 0000000000000000000000000000000000000000..4c115ff0d51ca550d6ed2a65e3772b267c1a2619 Binary files /dev/null and b/outputs/depthmap-17286927930137.png differ diff --git a/outputs/depthmap-17286927930138-left-right.png b/outputs/depthmap-17286927930138-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..f7fe8600a6ad07bc773afae664fe283c78280987 Binary files /dev/null and b/outputs/depthmap-17286927930138-left-right.png differ diff --git a/outputs/depthmap-17286927930139.png b/outputs/depthmap-17286927930139.png new file mode 100644 index 0000000000000000000000000000000000000000..4a3b315e837397cf14cb141e4438446ef970421c Binary files /dev/null and b/outputs/depthmap-17286927930139.png differ diff --git a/outputs/depthmap-17286927930140-left-right.png b/outputs/depthmap-17286927930140-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..94bb6caf7f64de898b9cee5271b3d4f41cd4ca81 Binary files /dev/null and b/outputs/depthmap-17286927930140-left-right.png differ diff --git a/outputs/depthmap-17286927930141.png b/outputs/depthmap-17286927930141.png new file mode 100644 index 0000000000000000000000000000000000000000..bc4558ef4ae86625ef6897366f7f160200b1be4b Binary files /dev/null and b/outputs/depthmap-17286927930141.png differ diff --git a/outputs/depthmap-17286927930142-left-right.png b/outputs/depthmap-17286927930142-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..578da2cd7c88b562a31210efe0e90878be225274 --- /dev/null +++ b/outputs/depthmap-17286927930142-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfaf6e20844cd1ec90d961de8f8088630226c78231a6423c213b2e1b94b1c049 +size 1097368 diff --git a/outputs/depthmap-17286927930143.png b/outputs/depthmap-17286927930143.png new file mode 100644 index 0000000000000000000000000000000000000000..cd49752909789adcb56bbf756582780d7b243d24 Binary files /dev/null and b/outputs/depthmap-17286927930143.png differ diff --git a/outputs/depthmap-17286927930144-left-right.png b/outputs/depthmap-17286927930144-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..a7e2ebd5c92f98cdbdfb6d2c2e2fb0946f385e1b Binary files /dev/null and b/outputs/depthmap-17286927930144-left-right.png differ diff --git a/outputs/depthmap-17286927930145.png b/outputs/depthmap-17286927930145.png new file mode 100644 index 0000000000000000000000000000000000000000..6191c098d3012f615436b44c1d450c50231ebb2a Binary files /dev/null and b/outputs/depthmap-17286927930145.png differ diff --git a/outputs/depthmap-17286927930146-left-right.png b/outputs/depthmap-17286927930146-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..e69dce3a4ee2d980e51ddb016758efbedff1b529 Binary files /dev/null and b/outputs/depthmap-17286927930146-left-right.png differ diff --git a/outputs/depthmap-17286927930147.png b/outputs/depthmap-17286927930147.png new file mode 100644 index 0000000000000000000000000000000000000000..3e023558e24af4a6306ef33d08255ea9fc403c24 --- /dev/null +++ b/outputs/depthmap-17286927930147.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83fa3b9794bb41706b1fe4dc3cb76f91fbe9d5dfd8c6712ad9bcc17424724c23 +size 1513566 diff --git a/outputs/depthmap-17286927930148-left-right.png b/outputs/depthmap-17286927930148-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..abc8d948deb3d48426f2424b470a41518d94637a Binary files /dev/null and b/outputs/depthmap-17286927930148-left-right.png differ diff --git a/outputs/depthmap-17286927930149.png b/outputs/depthmap-17286927930149.png new file mode 100644 index 0000000000000000000000000000000000000000..b452d56039dd4e0f7a35b692faea2a6a422e8005 Binary files /dev/null and b/outputs/depthmap-17286927930149.png differ diff --git a/outputs/depthmap-17286927930150-left-right.png b/outputs/depthmap-17286927930150-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..649c2d7b2f41af00890bc072a1f08b6a5199d6a9 Binary files /dev/null and b/outputs/depthmap-17286927930150-left-right.png differ diff --git a/outputs/depthmap-17286927930151.png b/outputs/depthmap-17286927930151.png new file mode 100644 index 0000000000000000000000000000000000000000..563f108378740bf841162be4bcde3a3231b82a52 Binary files /dev/null and b/outputs/depthmap-17286927930151.png differ diff --git a/outputs/depthmap-17286927930152-left-right.png b/outputs/depthmap-17286927930152-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..4a11c697d0c7d14eb2a7c0c2f74eefd1b1cc20d4 --- /dev/null +++ b/outputs/depthmap-17286927930152-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d059a3360a7b28c1b6a56aed848a3cbd6077f3a0a84bded4ce5972b1b548526 +size 2063495 diff --git a/outputs/depthmap-17286927930153.png b/outputs/depthmap-17286927930153.png new file mode 100644 index 0000000000000000000000000000000000000000..65959e5f2daba9744fecf8fa1661538ce5b777c6 Binary files /dev/null and b/outputs/depthmap-17286927930153.png differ diff --git a/outputs/depthmap-17286927930154-left-right.png b/outputs/depthmap-17286927930154-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..82da4c6cec77984ce45872c1f29d20d3424088fc --- /dev/null +++ b/outputs/depthmap-17286927930154-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92b610e878b0b1a27acf18807be38c7691cf9565ed03d84ac1901c89a2e203e1 +size 2347566 diff --git a/outputs/depthmap-17286927930155.png b/outputs/depthmap-17286927930155.png new file mode 100644 index 0000000000000000000000000000000000000000..5278a3225ce3d85b783d08f82f23151072def245 Binary files /dev/null and b/outputs/depthmap-17286927930155.png differ diff --git a/outputs/depthmap-17286927930156-left-right.png b/outputs/depthmap-17286927930156-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..b127df6b133543f4cc1941db0851fe47e00455cf --- /dev/null +++ b/outputs/depthmap-17286927930156-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2a3cb643b7252730375a7b978de1185a8e98697adecd6ea7f38aa70121fb091 +size 1699294 diff --git a/outputs/depthmap-17286927930157.png b/outputs/depthmap-17286927930157.png new file mode 100644 index 0000000000000000000000000000000000000000..d9a7fe7be39db35bff7c57a1becff2263e923b0c Binary files /dev/null and b/outputs/depthmap-17286927930157.png differ diff --git a/outputs/depthmap-17286927930158-left-right.png b/outputs/depthmap-17286927930158-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..7f0c0a9eb4564669933f8754898c00b289c06fad --- /dev/null +++ b/outputs/depthmap-17286927930158-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e2542274629d7aae203ab4ee60f0e7b6d36912a21d77e7fbc41f5820aba4725 +size 1652615 diff --git a/outputs/depthmap-17286927930159.png b/outputs/depthmap-17286927930159.png new file mode 100644 index 0000000000000000000000000000000000000000..35434aeafe462cba05badf049437e9f774d8977d Binary files /dev/null and b/outputs/depthmap-17286927930159.png differ diff --git a/outputs/depthmap-17286927930160-left-right.png b/outputs/depthmap-17286927930160-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..ac1154763e184b5d653405a89e8167f2c435ad9b --- /dev/null +++ b/outputs/depthmap-17286927930160-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:167c207320572d78469b584262ad3458754baa1898e4acf67c8a1e9e561161c5 +size 2269388 diff --git a/outputs/depthmap-17286927930161.png b/outputs/depthmap-17286927930161.png new file mode 100644 index 0000000000000000000000000000000000000000..7ae8a7e5409dbfb9b7bcaea1b82dd4a2835ecac8 Binary files /dev/null and b/outputs/depthmap-17286927930161.png differ diff --git a/outputs/depthmap-17286927930162-left-right.png b/outputs/depthmap-17286927930162-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..b14f11186436611294c04ac8b67a6c20954ea127 --- /dev/null +++ b/outputs/depthmap-17286927930162-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41c02deadf32471950bc7598a20d15364706cb1d567697613f2820eadc0164c8 +size 2089433 diff --git a/outputs/depthmap-17286927930163.png b/outputs/depthmap-17286927930163.png new file mode 100644 index 0000000000000000000000000000000000000000..0efd0c41817f3705b71d5fb622e74120a755c865 Binary files /dev/null and b/outputs/depthmap-17286927930163.png differ diff --git a/outputs/depthmap-17286927930164-left-right.png b/outputs/depthmap-17286927930164-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..06223e59db4543a3ac9bed51caef9cc271c0f7bb --- /dev/null +++ b/outputs/depthmap-17286927930164-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e306fe59221adfb7953aef30d0ce6ac6610bf068c11c2f46deec80deccd22c2e +size 1591250 diff --git a/outputs/depthmap-17286927930165.png b/outputs/depthmap-17286927930165.png new file mode 100644 index 0000000000000000000000000000000000000000..3ed3cfb70f61b7a21db165056a2786b0e3af3d8c Binary files /dev/null and b/outputs/depthmap-17286927930165.png differ diff --git a/outputs/depthmap-17286927930166-left-right.png b/outputs/depthmap-17286927930166-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..6ec0445bb6f6264845fc0d156ee9163dbef50326 --- /dev/null +++ b/outputs/depthmap-17286927930166-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e206735bbd6d22edc089c9ba3df92625e6c98e39a5df111a9a865661c29d753 +size 2289468 diff --git a/outputs/depthmap-17286927930167.png b/outputs/depthmap-17286927930167.png new file mode 100644 index 0000000000000000000000000000000000000000..81cdb7e37018aab31ce30e95e8adecf0456cb76d Binary files /dev/null and b/outputs/depthmap-17286927930167.png differ diff --git a/outputs/depthmap-17286927930168-left-right.png b/outputs/depthmap-17286927930168-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..15ba8d28216ddb037e1184e869b5363da6fac4e7 --- /dev/null +++ b/outputs/depthmap-17286927930168-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:392614970a70bde5741b180af36ffb65f929408ca9eb31db95abc6a6461e4f5b +size 1699577 diff --git a/outputs/depthmap-17286927930169.png b/outputs/depthmap-17286927930169.png new file mode 100644 index 0000000000000000000000000000000000000000..2dfdcb8156fd752067884788895cf2cdec6cb226 Binary files /dev/null and b/outputs/depthmap-17286927930169.png differ diff --git a/outputs/depthmap-17286927930170-left-right.png b/outputs/depthmap-17286927930170-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..12b84cd92a813385fa177425d604e889295626f7 --- /dev/null +++ b/outputs/depthmap-17286927930170-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8e6010511bb4797298688af747921cf941a3f8abb1492562ab8090d3e6369bd +size 1631783 diff --git a/outputs/depthmap-17286927930171.png b/outputs/depthmap-17286927930171.png new file mode 100644 index 0000000000000000000000000000000000000000..b1b48e30564d5ededfcb34e0d932c4568c0f8204 Binary files /dev/null and b/outputs/depthmap-17286927930171.png differ diff --git a/outputs/depthmap-17286927930172-left-right.png b/outputs/depthmap-17286927930172-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..df8ec95f6a61a29efee91a195ae833ae791213d8 --- /dev/null +++ b/outputs/depthmap-17286927930172-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99e99eb2336646d69383833381c0cb997cf5cecf6d24ba75d04a34c2e2c8d64c +size 1936291 diff --git a/outputs/depthmap-17286927930173.png b/outputs/depthmap-17286927930173.png new file mode 100644 index 0000000000000000000000000000000000000000..db4312859b75c89e2a8a139fe0bd2f05d50b6792 Binary files /dev/null and b/outputs/depthmap-17286927930173.png differ diff --git a/outputs/depthmap-17286927930174-left-right.png b/outputs/depthmap-17286927930174-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..4094e73781035e89dd17bf18a2ca76941ae2edd7 --- /dev/null +++ b/outputs/depthmap-17286927930174-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd8487c10054bf0a3e3203dba02f484185ebdc611836d947ea5e9a23ded18c06 +size 1365733 diff --git a/outputs/depthmap-17286927930175.png b/outputs/depthmap-17286927930175.png new file mode 100644 index 0000000000000000000000000000000000000000..a39dc62e7410f6cb2cf7f4d834e7351b9fa049ad Binary files /dev/null and b/outputs/depthmap-17286927930175.png differ diff --git a/outputs/depthmap-17286927930176-left-right.png b/outputs/depthmap-17286927930176-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..3125a769c9fe62c62fb114ab3522a04f556e24a2 --- /dev/null +++ b/outputs/depthmap-17286927930176-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d79849aac5e5584c8dc1c7794e3b91988a02412d3eb0eaa9dc0feda0b2c6b74a +size 2252320 diff --git a/outputs/depthmap-17286927930177.png b/outputs/depthmap-17286927930177.png new file mode 100644 index 0000000000000000000000000000000000000000..e3e2375a49ca54f36ce3d138e29f8db0fb8c984a Binary files /dev/null and b/outputs/depthmap-17286927930177.png differ diff --git a/outputs/depthmap-17286927930178-left-right.png b/outputs/depthmap-17286927930178-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..10834365206064e70a09ec236b84f2626bccfd32 --- /dev/null +++ b/outputs/depthmap-17286927930178-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8247a61531918a964bafefa93159d76e7646bf590cfacb9c949be1fde22bd8fb +size 2366451 diff --git a/outputs/depthmap-17286927930179.png b/outputs/depthmap-17286927930179.png new file mode 100644 index 0000000000000000000000000000000000000000..859d5a214696462b09e94ae807555b6276aa750d Binary files /dev/null and b/outputs/depthmap-17286927930179.png differ diff --git a/outputs/depthmap-17286927930180-left-right.png b/outputs/depthmap-17286927930180-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..15f7a4ca8c55d43256a985cee299aa0cba6791e8 --- /dev/null +++ b/outputs/depthmap-17286927930180-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4755f647eee020bdfce51c3054b79c5d763a37b0d6343285ad4314c24f729f9 +size 2058796 diff --git a/outputs/depthmap-17286927930181.png b/outputs/depthmap-17286927930181.png new file mode 100644 index 0000000000000000000000000000000000000000..320aadc711836c7791b9bb298fa5cc58327a8769 Binary files /dev/null and b/outputs/depthmap-17286927930181.png differ diff --git a/outputs/depthmap-17286927930182-left-right.png b/outputs/depthmap-17286927930182-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..6a0045528722a672d4e2e3e691bbd73c4d484a63 --- /dev/null +++ b/outputs/depthmap-17286927930182-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13a82e248712e377cb2800f790868d925287a512ad43acca8ed718c284e947bd +size 2338556 diff --git a/outputs/depthmap-17286927930183.png b/outputs/depthmap-17286927930183.png new file mode 100644 index 0000000000000000000000000000000000000000..929f50c74b59d4f0d4991f475ca806628269a8f4 Binary files /dev/null and b/outputs/depthmap-17286927930183.png differ diff --git a/outputs/depthmap-17286927930184-left-right.png b/outputs/depthmap-17286927930184-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..fe549e7eabd82f87ea6e091d583d8107640d4689 --- /dev/null +++ b/outputs/depthmap-17286927930184-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed9e0eaf9e090c27eea35d6ca542a8d2fe7f59568389ab8daec6c3e7dc93c918 +size 2225402 diff --git a/outputs/depthmap-17286927930185.png b/outputs/depthmap-17286927930185.png new file mode 100644 index 0000000000000000000000000000000000000000..df2bb965a93dc67f82dc421814ed80eb85110c4b Binary files /dev/null and b/outputs/depthmap-17286927930185.png differ diff --git a/outputs/depthmap-17286927930186-left-right.png b/outputs/depthmap-17286927930186-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..7fde5f2b802b8d9e081c1bcd5d8f69e02fbdd98a --- /dev/null +++ b/outputs/depthmap-17286927930186-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f10eec4afad7ba0d65e6f2f7c786456b843b953902c340acd65f79107199a6f +size 1927123 diff --git a/outputs/depthmap-17286927930187.png b/outputs/depthmap-17286927930187.png new file mode 100644 index 0000000000000000000000000000000000000000..f307f7c9e3c544ea059e14bacbdb1cdac80afafa Binary files /dev/null and b/outputs/depthmap-17286927930187.png differ diff --git a/outputs/depthmap-17286927930188-left-right.png b/outputs/depthmap-17286927930188-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..df166382aa410aca1b78732addb13e4b48bb4d19 --- /dev/null +++ b/outputs/depthmap-17286927930188-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb8816f84c200d7ff7f4ca72eb78c0a041ff28124c932be5f90aa7fa8e75576c +size 3950067 diff --git a/outputs/depthmap-17286927930189.png b/outputs/depthmap-17286927930189.png new file mode 100644 index 0000000000000000000000000000000000000000..2adf32740afeca509e9dffb539eed9bccd1429ad Binary files /dev/null and b/outputs/depthmap-17286927930189.png differ diff --git a/outputs/depthmap-17286927930190-left-right.png b/outputs/depthmap-17286927930190-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..d1a777b62c25883924e63d15b9d62082b2da24c4 --- /dev/null +++ b/outputs/depthmap-17286927930190-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a533273b0121264652889d0bced483dae8197eb0bf180a4e9917c0177973c17d +size 3994142 diff --git a/outputs/depthmap-17286927930191.png b/outputs/depthmap-17286927930191.png new file mode 100644 index 0000000000000000000000000000000000000000..59bd475b53f1e3cfb30b40ab8cc149271c004eda Binary files /dev/null and b/outputs/depthmap-17286927930191.png differ diff --git a/outputs/depthmap-17286927930192-left-right.png b/outputs/depthmap-17286927930192-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..cd00adbe2341ee09f8d9ca3579518f9dd13b1609 Binary files /dev/null and b/outputs/depthmap-17286927930192-left-right.png differ diff --git a/outputs/depthmap-17286927930193.png b/outputs/depthmap-17286927930193.png new file mode 100644 index 0000000000000000000000000000000000000000..ad658859c9099750dfebb172d9aba4487dfd9e47 Binary files /dev/null and b/outputs/depthmap-17286927930193.png differ diff --git a/outputs/depthmap-17286927930194-left-right.png b/outputs/depthmap-17286927930194-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..818a24100f4d495b19f3b805bd531432e67dbf8e --- /dev/null +++ b/outputs/depthmap-17286927930194-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9d775ce2237b6b415e024209e39b3d08b158312c963707896597128b62f33c8 +size 1096512 diff --git a/outputs/depthmap-17286927930195.png b/outputs/depthmap-17286927930195.png new file mode 100644 index 0000000000000000000000000000000000000000..a588e9bcd9007eb6b0916e4c1dcb85bceda18110 Binary files /dev/null and b/outputs/depthmap-17286927930195.png differ diff --git a/outputs/depthmap-17286927930196-left-right.png b/outputs/depthmap-17286927930196-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..18601cb8373c9881c19c51dabe9e64e9f4d3e6f7 --- /dev/null +++ b/outputs/depthmap-17286927930196-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1aaff0c7e4708d8e039bfbeb7c6efb63b82fa404d96eba1ce29ec4a156608d1 +size 1139624 diff --git a/outputs/depthmap-17286927930197.png b/outputs/depthmap-17286927930197.png new file mode 100644 index 0000000000000000000000000000000000000000..3ce543bee489bdd2b995cf2fe11bffad0cc678ad Binary files /dev/null and b/outputs/depthmap-17286927930197.png differ diff --git a/outputs/depthmap-17286927930198-left-right.png b/outputs/depthmap-17286927930198-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..68c47167d1cd0720ad497cf7f1ff5ca84aec8be0 --- /dev/null +++ b/outputs/depthmap-17286927930198-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff62f2cb2b821ad247f016ff87906bbac9217d1006ab53e912decaaac6f9a019 +size 1799230 diff --git a/outputs/depthmap-17286927930199.png b/outputs/depthmap-17286927930199.png new file mode 100644 index 0000000000000000000000000000000000000000..f28bff446362d2d012c17f17e0f1a3e3f16e45e1 --- /dev/null +++ b/outputs/depthmap-17286927930199.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6e5a60836c769d7296faff26a858a920abe3ced2bd67a4659dee2d2e5e04c4e +size 1028063 diff --git a/outputs/depthmap-17286927930200-left-right.png b/outputs/depthmap-17286927930200-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..f8062fb852b8cf709e9ff026fede85c8c2b5bb33 --- /dev/null +++ b/outputs/depthmap-17286927930200-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3f7ccd6c619186c11d0243ecd496d9fc2f828425c735248a88147c2524d5a26 +size 3727561 diff --git a/outputs/depthmap-17286927930201.png b/outputs/depthmap-17286927930201.png new file mode 100644 index 0000000000000000000000000000000000000000..87e080a69ee3386335ca4579e41b975aa0ab545f Binary files /dev/null and b/outputs/depthmap-17286927930201.png differ diff --git a/outputs/depthmap-17286927930202-left-right.png b/outputs/depthmap-17286927930202-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..dc16566cca867ae4adc8539e60f3117fa221841b --- /dev/null +++ b/outputs/depthmap-17286927930202-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfeac1b23366fd70575d2c57d03854ceb11ca99ca72e3c0205e21423bc804f3b +size 2318150 diff --git a/outputs/depthmap-17286927930203.png b/outputs/depthmap-17286927930203.png new file mode 100644 index 0000000000000000000000000000000000000000..6d7b48df80514a64a17a542ab3c935c4d1aa75d6 Binary files /dev/null and b/outputs/depthmap-17286927930203.png differ diff --git a/outputs/depthmap-17286927930204-left-right.png b/outputs/depthmap-17286927930204-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..da5e49eb1b1ada8834e6b2b7dacb2833a7af2373 --- /dev/null +++ b/outputs/depthmap-17286927930204-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c399c0ffa27c760ac88834a48e864dd9da569960eea08a353bfafd307b2f6ea +size 3230394 diff --git a/outputs/depthmap-17286927930205.png b/outputs/depthmap-17286927930205.png new file mode 100644 index 0000000000000000000000000000000000000000..447d83a8be2cc5032f514c6b4cb8d9ad0b47576b Binary files /dev/null and b/outputs/depthmap-17286927930205.png differ diff --git a/outputs/depthmap-17286927930206-left-right.png b/outputs/depthmap-17286927930206-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..356e4403f4c275ae925b2482994f187bbea756a9 --- /dev/null +++ b/outputs/depthmap-17286927930206-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e80b8bf2837d542709ac2cf748f4135b9bbf3e8eb9f62cc2504a1c74915e361 +size 2617376 diff --git a/outputs/depthmap-17286927930207.png b/outputs/depthmap-17286927930207.png new file mode 100644 index 0000000000000000000000000000000000000000..a08503345097ff3ae672328b5877baaf06560761 Binary files /dev/null and b/outputs/depthmap-17286927930207.png differ diff --git a/outputs/depthmap-17286927930208-left-right.png b/outputs/depthmap-17286927930208-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..6673c7b4e86aeceec409ad306af6b4e2c16cfafd --- /dev/null +++ b/outputs/depthmap-17286927930208-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5d456ffe3bfc82941f1aa2f8aa639c8747c59d65794e34ea7553ffc051dfcb3 +size 2713046 diff --git a/outputs/depthmap-17286927930209.png b/outputs/depthmap-17286927930209.png new file mode 100644 index 0000000000000000000000000000000000000000..a6ac59c0b171c32f7aeaae774f1c79711a0f0ed2 Binary files /dev/null and b/outputs/depthmap-17286927930209.png differ diff --git a/outputs/depthmap-17286927930210-left-right.png b/outputs/depthmap-17286927930210-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..58a1b67a12c10235da392981c7c3765b07bac188 --- /dev/null +++ b/outputs/depthmap-17286927930210-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e19aec978918d347201365a9aa869fb4702d34697beeb8fea260d62e995ecc4 +size 2833281 diff --git a/outputs/depthmap-17286927930211.png b/outputs/depthmap-17286927930211.png new file mode 100644 index 0000000000000000000000000000000000000000..f60a162e8367c67b21e7d5b9e1fec3e0cbadb948 Binary files /dev/null and b/outputs/depthmap-17286927930211.png differ diff --git a/outputs/depthmap-17286927930212-left-right.png b/outputs/depthmap-17286927930212-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..0db537a612a01cd6688822cc417c4e37d2deface --- /dev/null +++ b/outputs/depthmap-17286927930212-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f58cad3a147f3996137497310cc06b15a407aca25c5e1760b21ca9577ccf763f +size 2760040 diff --git a/outputs/depthmap-17286927930213.png b/outputs/depthmap-17286927930213.png new file mode 100644 index 0000000000000000000000000000000000000000..02944981c2e322b2bf8ea5644e0f7cf668842df8 --- /dev/null +++ b/outputs/depthmap-17286927930213.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:752a6c7df40366666e686467b625609047c2d914b24ee018acc2308669e462f3 +size 1073715 diff --git a/outputs/depthmap-17286927930214-left-right.png b/outputs/depthmap-17286927930214-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..6ee2de8ad65e041777bd7e056d529a7b262785fb --- /dev/null +++ b/outputs/depthmap-17286927930214-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41e79c4556466ad20b3682e1f7d01b4576f3a665c8cf3ddd26f0fb8441925077 +size 3718188 diff --git a/outputs/depthmap-17286927930215.png b/outputs/depthmap-17286927930215.png new file mode 100644 index 0000000000000000000000000000000000000000..fe99e239e1c0ea1d802323f01f1a478d6a3bd692 Binary files /dev/null and b/outputs/depthmap-17286927930215.png differ diff --git a/outputs/depthmap-17286927930216-left-right.png b/outputs/depthmap-17286927930216-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..e2c67bbfc9b0ba6a9e25373fbb5cc2ba8aeb413d --- /dev/null +++ b/outputs/depthmap-17286927930216-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9554807438687f6ab9b68bb99456d2d859c663de6a04f0af477f9c6e7f54fff7 +size 2214628 diff --git a/outputs/depthmap-17286927930217.png b/outputs/depthmap-17286927930217.png new file mode 100644 index 0000000000000000000000000000000000000000..adfd7f16b397638262ba49a153c3d6ab0f5939d7 Binary files /dev/null and b/outputs/depthmap-17286927930217.png differ diff --git a/outputs/depthmap-17286927930218-left-right.png b/outputs/depthmap-17286927930218-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..426d62c3a9a05d0504205e3b9a6184054e5e9cea --- /dev/null +++ b/outputs/depthmap-17286927930218-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c9fe03ac90b1427fc664f8b8a1caf8af65b3d1a673e9e39c47c027d1fc47536 +size 2228869 diff --git a/outputs/depthmap-17286927930219.png b/outputs/depthmap-17286927930219.png new file mode 100644 index 0000000000000000000000000000000000000000..90ed6de49a84ee202fadd6da3aca2ba36510f980 Binary files /dev/null and b/outputs/depthmap-17286927930219.png differ diff --git a/outputs/depthmap-17286927930220-left-right.png b/outputs/depthmap-17286927930220-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..26c96be75479fd6facea38198cc757107b1304c8 --- /dev/null +++ b/outputs/depthmap-17286927930220-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75fbf1ea6b71aa355e8985ab272a070360997b833f92b295e889e674395e686b +size 2960288 diff --git a/outputs/depthmap-17286927930221.png b/outputs/depthmap-17286927930221.png new file mode 100644 index 0000000000000000000000000000000000000000..0129f29926df7645732a55f82a942974b453199d Binary files /dev/null and b/outputs/depthmap-17286927930221.png differ diff --git a/outputs/depthmap-17286927930222-left-right.png b/outputs/depthmap-17286927930222-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..b6bba91513ab9821cd7d2f379e5c136bbdfd03c9 --- /dev/null +++ b/outputs/depthmap-17286927930222-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2abd66c5bf8033c28ad480fb5d7095d9c008f93c0c4ab0239f04d006d01cd0db +size 2499222 diff --git a/outputs/depthmap-17286927930223.png b/outputs/depthmap-17286927930223.png new file mode 100644 index 0000000000000000000000000000000000000000..fc0add9bb6eacae6c0decf591163d4f26c4d8128 Binary files /dev/null and b/outputs/depthmap-17286927930223.png differ diff --git a/outputs/depthmap-17286927930224-left-right.png b/outputs/depthmap-17286927930224-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..32fa3db19b2a23bc9fe94b8f1539da0a9c177646 --- /dev/null +++ b/outputs/depthmap-17286927930224-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2bdac1f8d30b2ba5c1a06ebe7c82e0db2f4b088bf682f6b3bb1c8de4b96223a +size 2967175 diff --git a/outputs/depthmap-17286927930225.png b/outputs/depthmap-17286927930225.png new file mode 100644 index 0000000000000000000000000000000000000000..edb5424d4babfe0c600bf6c4026fc993f4f09a91 Binary files /dev/null and b/outputs/depthmap-17286927930225.png differ diff --git a/outputs/depthmap-17286927930226-left-right.png b/outputs/depthmap-17286927930226-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..32f2922b7376145a99100db82e5d51c07491b0d4 --- /dev/null +++ b/outputs/depthmap-17286927930226-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5679e9c5688634bdd719dbf18b1e5e06fbcf7b0a9dda235a9c8ecfd226fa8cb +size 2837477 diff --git a/outputs/depthmap-17286927930227.png b/outputs/depthmap-17286927930227.png new file mode 100644 index 0000000000000000000000000000000000000000..51b6a6b2365f6bf88786b582aa5e9a1d97cb9145 Binary files /dev/null and b/outputs/depthmap-17286927930227.png differ diff --git a/outputs/depthmap-17286927930228-left-right.png b/outputs/depthmap-17286927930228-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..2682160472195ce771451e7a15618430e303e7d8 --- /dev/null +++ b/outputs/depthmap-17286927930228-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f73e477f7104b3e7f007861090839dbac3e59d029c4aad5cdf02c9a572ac3b6 +size 1901785 diff --git a/outputs/depthmap-17286927930229.png b/outputs/depthmap-17286927930229.png new file mode 100644 index 0000000000000000000000000000000000000000..6ca5980229c1c1903cdddf4370bbcd5ee4618b2e Binary files /dev/null and b/outputs/depthmap-17286927930229.png differ diff --git a/outputs/depthmap-17286927930230-left-right.png b/outputs/depthmap-17286927930230-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..e52bd8f8db8e537d1253449a777c9cdb1de7b8bf --- /dev/null +++ b/outputs/depthmap-17286927930230-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:976618e909dd3946b97f7f2aecd82ccfd8c9bac709fcf718a964b6119b427163 +size 2345911 diff --git a/outputs/depthmap-17286927930231.png b/outputs/depthmap-17286927930231.png new file mode 100644 index 0000000000000000000000000000000000000000..d1ca9f06dff77846c9c19b36a96cf339474766f2 Binary files /dev/null and b/outputs/depthmap-17286927930231.png differ diff --git a/outputs/depthmap-17286927930232-left-right.png b/outputs/depthmap-17286927930232-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..3682ac84ea6585b60fd8c796a24c43bf4180018e --- /dev/null +++ b/outputs/depthmap-17286927930232-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63598c825a23328194977f313c7d8d3ba95165acc158eb5554d2c51af8bc667d +size 1134261 diff --git a/outputs/depthmap-17286927930233.png b/outputs/depthmap-17286927930233.png new file mode 100644 index 0000000000000000000000000000000000000000..6adbf1ebae4802304d19215df9b717ccf1b5e4cb Binary files /dev/null and b/outputs/depthmap-17286927930233.png differ diff --git a/outputs/depthmap-17286927930234-left-right.png b/outputs/depthmap-17286927930234-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..fc9d14ecf0fce94fe1494b19a0da9f61495f87a9 --- /dev/null +++ b/outputs/depthmap-17286927930234-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a18c064b4916055a12f2c162edd0bb0e4c3c27f36a7ac7f4d204b62b52c52d47 +size 2394055 diff --git a/outputs/depthmap-17286927930235.png b/outputs/depthmap-17286927930235.png new file mode 100644 index 0000000000000000000000000000000000000000..bdbca3422810325bfbbd2fa94b4c887602f362df Binary files /dev/null and b/outputs/depthmap-17286927930235.png differ diff --git a/outputs/depthmap-17286927930236-left-right.png b/outputs/depthmap-17286927930236-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..ede1914381ea42a3834d9a23521a585038d471ff --- /dev/null +++ b/outputs/depthmap-17286927930236-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:929724fabfb2782fe628fa4312b940029778ef83f40ff040406892eece76e789 +size 1401216 diff --git a/outputs/depthmap-17286927930237.png b/outputs/depthmap-17286927930237.png new file mode 100644 index 0000000000000000000000000000000000000000..86153bd87cea43c2c261bc3c61b65643eaf7a664 Binary files /dev/null and b/outputs/depthmap-17286927930237.png differ diff --git a/outputs/depthmap-17286927930238-left-right.png b/outputs/depthmap-17286927930238-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..a6dbf41ceaba228e47f159f14f4591db23e7a514 --- /dev/null +++ b/outputs/depthmap-17286927930238-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bad4d571435f19494c2e13810e81a40d2690d1f66120eaba0c58e23f91d4209e +size 2684826 diff --git a/outputs/depthmap-17286927930239.png b/outputs/depthmap-17286927930239.png new file mode 100644 index 0000000000000000000000000000000000000000..717048c46d346b87ab246fe2fc85e02246675bdd Binary files /dev/null and b/outputs/depthmap-17286927930239.png differ diff --git a/outputs/depthmap-17286927930240-left-right.png b/outputs/depthmap-17286927930240-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..a254389197b1719ba5ec318c86c833a5b98f5483 --- /dev/null +++ b/outputs/depthmap-17286927930240-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc73c0ddc11226fa5fad4f695a5c638a983c0acd7bbac0a70bc6c1067709d481 +size 1023780 diff --git a/outputs/depthmap-17286927930241.png b/outputs/depthmap-17286927930241.png new file mode 100644 index 0000000000000000000000000000000000000000..7642aad2c27445ecd2b897ec8a72f4793e17fc58 Binary files /dev/null and b/outputs/depthmap-17286927930241.png differ diff --git a/outputs/depthmap-17286927930242-left-right.png b/outputs/depthmap-17286927930242-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..9b3ba163fe4dac684f1d1f112137128b317bdb23 --- /dev/null +++ b/outputs/depthmap-17286927930242-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23b519a6915bf43cbeb6eb98cb09606aa4ce359793ce57124f496fb21da567c9 +size 2609435 diff --git a/outputs/depthmap-17286927930243.png b/outputs/depthmap-17286927930243.png new file mode 100644 index 0000000000000000000000000000000000000000..bb425cfa3313f8f21df56e5cb06404c10a783352 Binary files /dev/null and b/outputs/depthmap-17286927930243.png differ diff --git a/outputs/depthmap-17286927930244-left-right.png b/outputs/depthmap-17286927930244-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..f63c49c4e037e8a7af55239c90deea042d772dab --- /dev/null +++ b/outputs/depthmap-17286927930244-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a893e10cfe63add3af83d3587f094b6008ec1dd9cea48716f9eab710de0e207 +size 1283104 diff --git a/outputs/depthmap-17286927930245.png b/outputs/depthmap-17286927930245.png new file mode 100644 index 0000000000000000000000000000000000000000..d0f68bd6b138a5e1447cafa8da27eae957db2a80 Binary files /dev/null and b/outputs/depthmap-17286927930245.png differ diff --git a/outputs/depthmap-17286927930246-left-right.png b/outputs/depthmap-17286927930246-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..98ef5d67f48c65286b957dfe0cadd90d1eba2110 --- /dev/null +++ b/outputs/depthmap-17286927930246-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f246db4ebea3f7654b1ead21d91a59aba75de828cd27e10d3eaab02a71dbcfe +size 1031013 diff --git a/outputs/depthmap-17286927930247.png b/outputs/depthmap-17286927930247.png new file mode 100644 index 0000000000000000000000000000000000000000..73b04686e7a813c9624437ba3310956f0e987c29 Binary files /dev/null and b/outputs/depthmap-17286927930247.png differ diff --git a/outputs/depthmap-17286927930248-left-right.png b/outputs/depthmap-17286927930248-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..b7908e65b264498c93345fabfccf6455d55689f7 --- /dev/null +++ b/outputs/depthmap-17286927930248-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc218bafcc429dd0bc62f7bbd787ff0c80a7710c5a88e89580dd6b28c1e474c1 +size 1667323 diff --git a/outputs/depthmap-17286927930249.png b/outputs/depthmap-17286927930249.png new file mode 100644 index 0000000000000000000000000000000000000000..a6b2344a8018b493e4c85421f1a44b50ceba5542 Binary files /dev/null and b/outputs/depthmap-17286927930249.png differ diff --git a/outputs/depthmap-17286927930250-left-right.png b/outputs/depthmap-17286927930250-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..069539cd869f0670ba581bfaff7ab43e0db78a3b --- /dev/null +++ b/outputs/depthmap-17286927930250-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ce78c8ee424fc756092bc4dbcfc89e32b48d21581db02827137ad4effd9e1ac +size 1510585 diff --git a/outputs/depthmap-17286927930251.png b/outputs/depthmap-17286927930251.png new file mode 100644 index 0000000000000000000000000000000000000000..222047b2f665f20028e2675479cfa4cf89f9f287 Binary files /dev/null and b/outputs/depthmap-17286927930251.png differ diff --git a/outputs/depthmap-17286927930252-left-right.png b/outputs/depthmap-17286927930252-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..361d4606f59bf6587f55ffe9351d063ed960bd8c --- /dev/null +++ b/outputs/depthmap-17286927930252-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e996f45d9d8767c86b8e1dbcba5e56123d625947f51b3fb151ad089c3137d75 +size 1078434 diff --git a/outputs/depthmap-17286927930253.png b/outputs/depthmap-17286927930253.png new file mode 100644 index 0000000000000000000000000000000000000000..2c6f42c2503e6304c2001b5d754ce683dbd7a4c6 --- /dev/null +++ b/outputs/depthmap-17286927930253.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c4fbd3b794e9c23b887c1f4043baeec503ea502eae2f39d22a7af0032d51d07 +size 1004185 diff --git a/outputs/depthmap-17286927930254-left-right.png b/outputs/depthmap-17286927930254-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..672efbc3590c74993a36a148fe7f1399c316de13 --- /dev/null +++ b/outputs/depthmap-17286927930254-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e78e5f8edaeb1bcc9fabbc5351a546a00ce152cef10f6fe74852250cac90079 +size 3994071 diff --git a/outputs/depthmap-17286927930255.png b/outputs/depthmap-17286927930255.png new file mode 100644 index 0000000000000000000000000000000000000000..dbc0df289c53bd698ea9a2c42d4aa4c59d1ba6c6 Binary files /dev/null and b/outputs/depthmap-17286927930255.png differ diff --git a/outputs/depthmap-17286927930256-left-right.png b/outputs/depthmap-17286927930256-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..00212b8b797b3cb3da395d4c0ff80a89877113aa --- /dev/null +++ b/outputs/depthmap-17286927930256-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62353ec21e7423468d5fa0fadeb37711201c51ed2be020c55a0f8aa7e6f96b33 +size 2345097 diff --git a/outputs/depthmap-17286927930257.png b/outputs/depthmap-17286927930257.png new file mode 100644 index 0000000000000000000000000000000000000000..7288c0400b5343db9d9640d051ea8de6110788d4 Binary files /dev/null and b/outputs/depthmap-17286927930257.png differ diff --git a/outputs/depthmap-17286927930258-left-right.png b/outputs/depthmap-17286927930258-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..f6be16ab2e647e11665c4c7b5d5cdaf6a8f7a44c --- /dev/null +++ b/outputs/depthmap-17286927930258-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6706c0e8a8485ae199be4487fdef6535d7337190c75f4555626bb26a22258e26 +size 1498325 diff --git a/outputs/depthmap-17286927930259.png b/outputs/depthmap-17286927930259.png new file mode 100644 index 0000000000000000000000000000000000000000..defaa9c0d27c568f60db1fd735cee80fc75ea0db Binary files /dev/null and b/outputs/depthmap-17286927930259.png differ diff --git a/outputs/depthmap-17286927930260-left-right.png b/outputs/depthmap-17286927930260-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..082c8e9050533f231b245485e8bab6516b3f4cc7 --- /dev/null +++ b/outputs/depthmap-17286927930260-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d13e3b35072a1908422bacd6e5fb079c78a8bcd55931c655e1f20f5f530a854 +size 2713631 diff --git a/outputs/depthmap-17286927930261.png b/outputs/depthmap-17286927930261.png new file mode 100644 index 0000000000000000000000000000000000000000..ed82068b790e9c7189c542f0170004b203be6c05 --- /dev/null +++ b/outputs/depthmap-17286927930261.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21bfc41602b1bd3cc5ab11d36c4dcf094da643d300a62b3705b824ce9b6d0f04 +size 1400288 diff --git a/outputs/depthmap-17286927930262-left-right.png b/outputs/depthmap-17286927930262-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..fc7e8ccb99507e4a2cd6d12a147fb31e15020e8d --- /dev/null +++ b/outputs/depthmap-17286927930262-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16633bb65832c20277a1c4d881037e85403281714d06e3d4037bf3831fe307b6 +size 3533974 diff --git a/outputs/depthmap-17286927930263.png b/outputs/depthmap-17286927930263.png new file mode 100644 index 0000000000000000000000000000000000000000..9cf32eeff6f44d79bdccdc12f8a2539c3272753e --- /dev/null +++ b/outputs/depthmap-17286927930263.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b488cd1b704419c7cc58e5e903b1abc6c35e6a8a7ed5231c961bba37d6e9524 +size 1495347 diff --git a/outputs/depthmap-17286927930264-left-right.png b/outputs/depthmap-17286927930264-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..c9bb5e4bf3fcfafa0a465050f749347b9216f741 --- /dev/null +++ b/outputs/depthmap-17286927930264-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f779894ecad0659d554cb248d4b7d1764699b419e3bdd95ce3e257dac124aa60 +size 3796379 diff --git a/outputs/depthmap-17286927930265.png b/outputs/depthmap-17286927930265.png new file mode 100644 index 0000000000000000000000000000000000000000..35fd26187ac68a3188aa2c631031493161a165a8 --- /dev/null +++ b/outputs/depthmap-17286927930265.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b867164f6877212922e502053b491dafba979c5a9f88e834f8d005304575c0ba +size 1041754 diff --git a/outputs/depthmap-17286927930266-left-right.png b/outputs/depthmap-17286927930266-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..ddc244c3e30c8b1ba91e0128195b187a3e9d5402 --- /dev/null +++ b/outputs/depthmap-17286927930266-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ada6644cebe32f36c65dbaf54403a2f0a8850704bba4e8d501f250fc9bc9c263 +size 3932151 diff --git a/outputs/depthmap-17286927930267.png b/outputs/depthmap-17286927930267.png new file mode 100644 index 0000000000000000000000000000000000000000..9e9ac753b537f190152feb22f8ddd613bbdc0719 Binary files /dev/null and b/outputs/depthmap-17286927930267.png differ diff --git a/outputs/depthmap-17286927930268-left-right.png b/outputs/depthmap-17286927930268-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..fb65c28b424d95a75a836ac38425edb087a4b61b --- /dev/null +++ b/outputs/depthmap-17286927930268-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ae8b1c71fabb003dd1bf1ff7ee3c106c8f480851815c59e67ef4599f13a0858 +size 1221129 diff --git a/outputs/depthmap-17286927930269.png b/outputs/depthmap-17286927930269.png new file mode 100644 index 0000000000000000000000000000000000000000..1b7f588aced1c244d2f224d1763ee9cd8d2dd5fb Binary files /dev/null and b/outputs/depthmap-17286927930269.png differ diff --git a/outputs/depthmap-17286927930270-left-right.png b/outputs/depthmap-17286927930270-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..71b134b8cbfc601f2bc04cdde14b81f987d40d58 --- /dev/null +++ b/outputs/depthmap-17286927930270-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68c833990dbccb792e53e3e90d9d65206be815a038d4f36d7abdee395d8b33a8 +size 1094879 diff --git a/outputs/depthmap-17286927930271.png b/outputs/depthmap-17286927930271.png new file mode 100644 index 0000000000000000000000000000000000000000..ef6623e0ec4c931e997a55defc1783d77f1c3717 Binary files /dev/null and b/outputs/depthmap-17286927930271.png differ diff --git a/outputs/depthmap-17286927930272-left-right.png b/outputs/depthmap-17286927930272-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..8707ab2ba64b5d0e161d131f40a74e24a713cead --- /dev/null +++ b/outputs/depthmap-17286927930272-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b312881c1b792eeaf439eeef032902d62c8c7368e7c9a99bab1327596d994021 +size 1253227 diff --git a/outputs/depthmap-17286927930273.png b/outputs/depthmap-17286927930273.png new file mode 100644 index 0000000000000000000000000000000000000000..d97b3270c1d96ceef75a81ba8426036c71e36d81 Binary files /dev/null and b/outputs/depthmap-17286927930273.png differ diff --git a/outputs/depthmap-17286927930274-left-right.png b/outputs/depthmap-17286927930274-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..66b2c1e3f0bcb7f0cb21cfbf9d6e26f3044001ac --- /dev/null +++ b/outputs/depthmap-17286927930274-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f83a69022987e5f51eb09205a13558c8745255eb5fbee766a20b496dbad8f5eb +size 2188062 diff --git a/outputs/depthmap-17286927930275.png b/outputs/depthmap-17286927930275.png new file mode 100644 index 0000000000000000000000000000000000000000..c049de4cad142e4106f517959af77f0ce5ca87c6 Binary files /dev/null and b/outputs/depthmap-17286927930275.png differ diff --git a/outputs/depthmap-17286927930276-left-right.png b/outputs/depthmap-17286927930276-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..16926365c5b3af38a9011ed8ac878995a04c12ff --- /dev/null +++ b/outputs/depthmap-17286927930276-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4991990e10640a86fd8a5c0797c44e72fe7170dee870cd607e8d6f7f246c50a5 +size 2357578 diff --git a/outputs/depthmap-17286927930277.png b/outputs/depthmap-17286927930277.png new file mode 100644 index 0000000000000000000000000000000000000000..b2df892528360602a25acd12c01c5adbee54fad5 Binary files /dev/null and b/outputs/depthmap-17286927930277.png differ diff --git a/outputs/depthmap-17286927930278-left-right.png b/outputs/depthmap-17286927930278-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..661fc786b7871676eae5a55591f213b83c8f1568 --- /dev/null +++ b/outputs/depthmap-17286927930278-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e8a4a21fc4bd2a0f3038d9ff1907cc96fc551816d13e793f4365324f5fd9256 +size 2093834 diff --git a/outputs/depthmap-17286927930279.png b/outputs/depthmap-17286927930279.png new file mode 100644 index 0000000000000000000000000000000000000000..d95ebf9e23e58a7cd07faedd023ed6c28b3870e0 Binary files /dev/null and b/outputs/depthmap-17286927930279.png differ diff --git a/outputs/depthmap-17286927930280-left-right.png b/outputs/depthmap-17286927930280-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..1d55992d4ea0e0cbf4654361eccab4dc23c31f28 --- /dev/null +++ b/outputs/depthmap-17286927930280-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51d66000ae777b1b7f098cce258b380003decd5420b133655da6da939887cb23 +size 2626482 diff --git a/outputs/depthmap-17286927930281.png b/outputs/depthmap-17286927930281.png new file mode 100644 index 0000000000000000000000000000000000000000..dfe5b1775afe287cca416dafbec47012cb8798e6 Binary files /dev/null and b/outputs/depthmap-17286927930281.png differ diff --git a/outputs/depthmap-17286927930282-left-right.png b/outputs/depthmap-17286927930282-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..706907f38cf575f1b1e7e31ca6e0e82067b666e8 --- /dev/null +++ b/outputs/depthmap-17286927930282-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fc4a6df57f28187ba316de7c3b81a081b51752b45913eaa16870e535086adb8 +size 1636024 diff --git a/outputs/depthmap-17286927930283.png b/outputs/depthmap-17286927930283.png new file mode 100644 index 0000000000000000000000000000000000000000..d148f7194d961e0470df90f33ff4a9b77113b597 Binary files /dev/null and b/outputs/depthmap-17286927930283.png differ diff --git a/outputs/depthmap-17286927930284-left-right.png b/outputs/depthmap-17286927930284-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..709dccd4703dc59ea1a035b168de5b4b73b3656d --- /dev/null +++ b/outputs/depthmap-17286927930284-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:471c481a61e41c79fa0cb9f224eecaaf1412b2e3b117f415017ddafc3e7cf5a8 +size 1820469 diff --git a/outputs/depthmap-17286927930285.png b/outputs/depthmap-17286927930285.png new file mode 100644 index 0000000000000000000000000000000000000000..77624a5562a21a9f148aea1f0fceccb163652e86 Binary files /dev/null and b/outputs/depthmap-17286927930285.png differ diff --git a/outputs/depthmap-17286927930286-left-right.png b/outputs/depthmap-17286927930286-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..277d5a527e16943f60236b5ed8ab9cd2f62f9af3 --- /dev/null +++ b/outputs/depthmap-17286927930286-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d72953b78670b04b6f9b834ef8d8523a01c2ac06a813bed7377d1d43ab0e827 +size 2318888 diff --git a/outputs/depthmap-17286927930287.png b/outputs/depthmap-17286927930287.png new file mode 100644 index 0000000000000000000000000000000000000000..c6c1ea6d44278dda7bd7292f3a1b2253878cac0b Binary files /dev/null and b/outputs/depthmap-17286927930287.png differ diff --git a/outputs/depthmap-17286927930288-left-right.png b/outputs/depthmap-17286927930288-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..8b1d90bf6bdcfa4ac3178527d108fa4a1844040c --- /dev/null +++ b/outputs/depthmap-17286927930288-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6ab37561eba5c4be922143fc47f42235563fb11229ed467c9cbe292145b0047 +size 2460157 diff --git a/outputs/depthmap-17286927930289.png b/outputs/depthmap-17286927930289.png new file mode 100644 index 0000000000000000000000000000000000000000..0564e387e53db7bc2a4c87ef37db5486ea84f8db Binary files /dev/null and b/outputs/depthmap-17286927930289.png differ diff --git a/outputs/depthmap-17286927930290-left-right.png b/outputs/depthmap-17286927930290-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..77da21e22681c7a2929c931c1a8d024d8475469a --- /dev/null +++ b/outputs/depthmap-17286927930290-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad86d8876f71e799278b75305d8be3f76777422d074f2a1e641ba4222d8f396c +size 2168137 diff --git a/outputs/depthmap-17286927930291.png b/outputs/depthmap-17286927930291.png new file mode 100644 index 0000000000000000000000000000000000000000..eb4c6ddffd43533594f667b99ef0ebf9c16ec85f Binary files /dev/null and b/outputs/depthmap-17286927930291.png differ diff --git a/outputs/depthmap-17286927930292-left-right.png b/outputs/depthmap-17286927930292-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..ce757e7401690083222fdfc5324733837477fc01 --- /dev/null +++ b/outputs/depthmap-17286927930292-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a914633d1cedb88bb8160cedf8388dd44920df304af3e42a5be64729df15491e +size 2202501 diff --git a/outputs/depthmap-17286927930293.png b/outputs/depthmap-17286927930293.png new file mode 100644 index 0000000000000000000000000000000000000000..a602c7ba3b6ce3612aa41d34d79a6e90ab3de112 Binary files /dev/null and b/outputs/depthmap-17286927930293.png differ diff --git a/outputs/depthmap-17286927930294-left-right.png b/outputs/depthmap-17286927930294-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..c4365e629d3b36162eb249e3e48c29557bcacd5f --- /dev/null +++ b/outputs/depthmap-17286927930294-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb0e0a6a04a35360591eecc8c38e9b5077bc21823b546c957e8a7a718c9149e7 +size 2387481 diff --git a/outputs/depthmap-17286927930295.png b/outputs/depthmap-17286927930295.png new file mode 100644 index 0000000000000000000000000000000000000000..6566cd7b6b9ecb94eea9a11000600ef08d5be87f Binary files /dev/null and b/outputs/depthmap-17286927930295.png differ diff --git a/outputs/depthmap-17286927930296-left-right.png b/outputs/depthmap-17286927930296-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..a8ee25d0b656dd284975c2c72ecda06cee677dc4 --- /dev/null +++ b/outputs/depthmap-17286927930296-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39a82628e1489c2ec17197f242eea8baf79f84338fb9823237ae413dcc3ae823 +size 2205335 diff --git a/outputs/depthmap-17286927930297.png b/outputs/depthmap-17286927930297.png new file mode 100644 index 0000000000000000000000000000000000000000..fa8fc4ceda9af9e344c104fdaa60da9190ca0777 Binary files /dev/null and b/outputs/depthmap-17286927930297.png differ diff --git a/outputs/depthmap-17286927930298-left-right.png b/outputs/depthmap-17286927930298-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..7cc4d5ac9219e3173757d6cfacdc388c40b35810 --- /dev/null +++ b/outputs/depthmap-17286927930298-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc8468f2d5412e10ce0c0861c05b590bf9ffd4eeb51b64a34c2382f698c70b91 +size 2380658 diff --git a/outputs/depthmap-17286927930299.png b/outputs/depthmap-17286927930299.png new file mode 100644 index 0000000000000000000000000000000000000000..ab11acc925569ee07f6689d3042db22e42175986 Binary files /dev/null and b/outputs/depthmap-17286927930299.png differ diff --git a/outputs/depthmap-17286927930300-left-right.png b/outputs/depthmap-17286927930300-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..49153a81482d5d8fa31783ad4d314af0174c8cfc --- /dev/null +++ b/outputs/depthmap-17286927930300-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e75aa2be77a2a785d79c35ebb03d89a21cde9778a56e6c339d28ef1e0d57118 +size 1888887 diff --git a/outputs/depthmap-17286927930301.png b/outputs/depthmap-17286927930301.png new file mode 100644 index 0000000000000000000000000000000000000000..20af9c45064eaffa5cf937325277c01495ae7525 Binary files /dev/null and b/outputs/depthmap-17286927930301.png differ diff --git a/outputs/depthmap-17286927930302-left-right.png b/outputs/depthmap-17286927930302-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..b5f8885c154d1b310610d41462812424573d71d8 --- /dev/null +++ b/outputs/depthmap-17286927930302-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95dd2ecf6f924eee4a801e17a9c2f3ce98a7ff3a561f1fe326d61bd62f5d6f0f +size 2963105 diff --git a/outputs/depthmap-17286927930303.png b/outputs/depthmap-17286927930303.png new file mode 100644 index 0000000000000000000000000000000000000000..c0e8dbaf2c94d3babf62dc3abb3f775874c69a75 Binary files /dev/null and b/outputs/depthmap-17286927930303.png differ diff --git a/outputs/depthmap-17286927930304-left-right.png b/outputs/depthmap-17286927930304-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..327de07a7359199326013daaf55b06b3e1be2878 --- /dev/null +++ b/outputs/depthmap-17286927930304-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7af2eada4d4d07d2358dac97b2250020538ef9658041d857970df3a2f4896c4 +size 2299234 diff --git a/outputs/depthmap-17286927930305.png b/outputs/depthmap-17286927930305.png new file mode 100644 index 0000000000000000000000000000000000000000..16a9a122e7cbd84db20864f48d94339b642dfb26 Binary files /dev/null and b/outputs/depthmap-17286927930305.png differ diff --git a/outputs/depthmap-17286927930306-left-right.png b/outputs/depthmap-17286927930306-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..d477e9b9ddb94b0dadf030a0f975b54ac356de61 --- /dev/null +++ b/outputs/depthmap-17286927930306-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88538677ea3b3120edd46421f2e80df6a18abb32c9829495890401f369a07f17 +size 2065889 diff --git a/outputs/depthmap-17286927930307.png b/outputs/depthmap-17286927930307.png new file mode 100644 index 0000000000000000000000000000000000000000..d13df7309a47d6122807881fa1d1af514248f3cf Binary files /dev/null and b/outputs/depthmap-17286927930307.png differ diff --git a/outputs/depthmap-17286927930308-left-right.png b/outputs/depthmap-17286927930308-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..959b7edb35fba6574a4f52190e08d80a44d4d176 --- /dev/null +++ b/outputs/depthmap-17286927930308-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fda2e252e38b7079f511fd441b65e1c4a4aa4750723bae83899d7cea29f70f7 +size 2054705 diff --git a/outputs/depthmap-17286927930309.png b/outputs/depthmap-17286927930309.png new file mode 100644 index 0000000000000000000000000000000000000000..ab6492d89ba0e8c1e7442fc34365419148460ba0 Binary files /dev/null and b/outputs/depthmap-17286927930309.png differ diff --git a/outputs/depthmap-17286927930310-left-right.png b/outputs/depthmap-17286927930310-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..f4cff16eeb702a9cb7c11b9b04ae81ee8ab38745 --- /dev/null +++ b/outputs/depthmap-17286927930310-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c11dc3d76ff431a097b4748e53678e722d7995537c464a7d6e2ba28117aff624 +size 1529955 diff --git a/outputs/depthmap-17286927930311.png b/outputs/depthmap-17286927930311.png new file mode 100644 index 0000000000000000000000000000000000000000..749ba0c09a4b3eac0dda7d4e72db200002a9ba19 Binary files /dev/null and b/outputs/depthmap-17286927930311.png differ diff --git a/outputs/depthmap-17286927930312-left-right.png b/outputs/depthmap-17286927930312-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..ae1f2924324a165d1df96864d06e9455c79e31ba --- /dev/null +++ b/outputs/depthmap-17286927930312-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5214c3cdbf89ddc4d54e4acf4095fb4b6bfc33f354a5c33132d29f24bceb8569 +size 2106996 diff --git a/outputs/depthmap-17286927930313.png b/outputs/depthmap-17286927930313.png new file mode 100644 index 0000000000000000000000000000000000000000..aea9d2ab050e1779be6ddbb8393ea9af66117fe6 Binary files /dev/null and b/outputs/depthmap-17286927930313.png differ diff --git a/outputs/depthmap-17286927930314-left-right.png b/outputs/depthmap-17286927930314-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..539f5873d3487a0fab99d84572b1569bdca2d13a Binary files /dev/null and b/outputs/depthmap-17286927930314-left-right.png differ diff --git a/outputs/depthmap-17286927930315.png b/outputs/depthmap-17286927930315.png new file mode 100644 index 0000000000000000000000000000000000000000..a469ca6bca0e1c72630f9dfd502c76a2c3672c82 Binary files /dev/null and b/outputs/depthmap-17286927930315.png differ diff --git a/outputs/depthmap-17286927930316-left-right.png b/outputs/depthmap-17286927930316-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..546b98b50c545fb9f910016046caf398f07c2e3d --- /dev/null +++ b/outputs/depthmap-17286927930316-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04b78d2b610b280b1e1ae8492cd33e83aa579d4c0e596319e2d2c0fca2866fdc +size 1406583 diff --git a/outputs/depthmap-17286927930317.png b/outputs/depthmap-17286927930317.png new file mode 100644 index 0000000000000000000000000000000000000000..c09ecc8174dc4b9ccec3c96c9147161a54c5c071 Binary files /dev/null and b/outputs/depthmap-17286927930317.png differ diff --git a/outputs/depthmap-17286927930318-left-right.png b/outputs/depthmap-17286927930318-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..553c8eacce379a99ff93c4790be5cc4a475feab7 --- /dev/null +++ b/outputs/depthmap-17286927930318-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c2ddecf911d39f141061049bf4fa4779189fde607d0fe27d8539a1406e2002a +size 2266219 diff --git a/outputs/depthmap-17286927930319.png b/outputs/depthmap-17286927930319.png new file mode 100644 index 0000000000000000000000000000000000000000..f0cb4598ca947e61fa82e70989fc9a97a8110c5c Binary files /dev/null and b/outputs/depthmap-17286927930319.png differ diff --git a/outputs/depthmap-17286927930320-left-right.png b/outputs/depthmap-17286927930320-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..cd00f4ef0d383a9f7ab4b81351246f4ed061cf63 Binary files /dev/null and b/outputs/depthmap-17286927930320-left-right.png differ diff --git a/outputs/depthmap-17286927930321.png b/outputs/depthmap-17286927930321.png new file mode 100644 index 0000000000000000000000000000000000000000..902601628951c6b6ca7390fe4b41f4e8163cf8c7 Binary files /dev/null and b/outputs/depthmap-17286927930321.png differ diff --git a/outputs/depthmap-17286927930322-left-right.png b/outputs/depthmap-17286927930322-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..d13b37f8b61305ed87baa403ddedb1634cb8b7e3 --- /dev/null +++ b/outputs/depthmap-17286927930322-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7af2ec6e398de76cc7ab2c2bcbe35b59b02ab8a147598347337b395f29a824d7 +size 3843190 diff --git a/outputs/depthmap-17286927930323.png b/outputs/depthmap-17286927930323.png new file mode 100644 index 0000000000000000000000000000000000000000..fea4305416a55f55658e2b210e513e99963d016a Binary files /dev/null and b/outputs/depthmap-17286927930323.png differ diff --git a/outputs/depthmap-17286927930324-left-right.png b/outputs/depthmap-17286927930324-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..4ce91d3c285417f72352b3438f83784e93233f23 --- /dev/null +++ b/outputs/depthmap-17286927930324-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc7dedbca6b9100919871286f8d9035e4eaf7cf99ed0c6358751a0c0df312e7c +size 3647304 diff --git a/outputs/depthmap-17286927930325.png b/outputs/depthmap-17286927930325.png new file mode 100644 index 0000000000000000000000000000000000000000..6eb37dd9875f8dfbc6ba4f0897031c90489155d1 Binary files /dev/null and b/outputs/depthmap-17286927930325.png differ diff --git a/outputs/depthmap-17286927930326-left-right.png b/outputs/depthmap-17286927930326-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..7b65f881f3b9f7d8cce3fc5afbd40f327a627e3a --- /dev/null +++ b/outputs/depthmap-17286927930326-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93ba581ebb6322aadc702a39479f99aee8e130152336d348afd4fc9b5e6fac6c +size 1389928 diff --git a/outputs/depthmap-17286927930327.png b/outputs/depthmap-17286927930327.png new file mode 100644 index 0000000000000000000000000000000000000000..7ddb8974034b38777aa6033ecefcd33bc06c37bd Binary files /dev/null and b/outputs/depthmap-17286927930327.png differ diff --git a/outputs/depthmap-17286927930328-left-right.png b/outputs/depthmap-17286927930328-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..dcb4d60929426cf957896edebeb43a2f5ed5cc10 --- /dev/null +++ b/outputs/depthmap-17286927930328-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8307990b1288621a4679fbb2d3bc4993ae05bfeff848aa4ffe10b88cdba37582 +size 2957243 diff --git a/outputs/depthmap-17286927930329.png b/outputs/depthmap-17286927930329.png new file mode 100644 index 0000000000000000000000000000000000000000..346a1dd2490dd9e0b6f500a287e4860e025912d0 Binary files /dev/null and b/outputs/depthmap-17286927930329.png differ diff --git a/outputs/depthmap-17286927930330-left-right.png b/outputs/depthmap-17286927930330-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..3fcfd5bb863ce645becb283070701a2e7d77aa91 --- /dev/null +++ b/outputs/depthmap-17286927930330-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72d8024b26da54c022deba7f346210d13c19afd0d377c7b3602df7660814f866 +size 3694549 diff --git a/outputs/depthmap-17286927930331.png b/outputs/depthmap-17286927930331.png new file mode 100644 index 0000000000000000000000000000000000000000..94e26d8b4c3cf169faf96b30031bb479fbf257a4 Binary files /dev/null and b/outputs/depthmap-17286927930331.png differ diff --git a/outputs/depthmap-17286927930332-left-right.png b/outputs/depthmap-17286927930332-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..8c5f6536138fe44ba8479bdafcde08cddf64c482 --- /dev/null +++ b/outputs/depthmap-17286927930332-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ab750bdce8beddf6b22a9a2db7ebfd1c78431d7235d93e6bfac92129cf7d3ef +size 3451767 diff --git a/outputs/depthmap-17286927930333.png b/outputs/depthmap-17286927930333.png new file mode 100644 index 0000000000000000000000000000000000000000..388a04615ad637045bcbd60f72a594874aab43b9 Binary files /dev/null and b/outputs/depthmap-17286927930333.png differ diff --git a/outputs/depthmap-17286927930334-left-right.png b/outputs/depthmap-17286927930334-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..190147eb59074ab6a4a8ad4f3f28c49e9b32945f --- /dev/null +++ b/outputs/depthmap-17286927930334-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f529210402a702f879578d0e6e7efa86d94e6e74f47d4cfe977aff081c61cd7 +size 2996895 diff --git a/outputs/depthmap-17286927930335.png b/outputs/depthmap-17286927930335.png new file mode 100644 index 0000000000000000000000000000000000000000..2ca4d9a472a3df480b2e8818f202c1e253b7b5b0 Binary files /dev/null and b/outputs/depthmap-17286927930335.png differ diff --git a/outputs/depthmap-17286927930336-left-right.png b/outputs/depthmap-17286927930336-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..db337d74868889f7eed9ed99c9427e6671009171 --- /dev/null +++ b/outputs/depthmap-17286927930336-left-right.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2361380ffa5ddafced79142f474bf1891ce6dfa41a0199d6e7469e099b304b8a +size 2319375 diff --git a/outputs/depthmap-17286927930337.png b/outputs/depthmap-17286927930337.png new file mode 100644 index 0000000000000000000000000000000000000000..af68c7e56868c774cd26a1581214d8032f5598ed Binary files /dev/null and b/outputs/depthmap-17286927930337.png differ diff --git a/outputs/depthmap-17286927930338-left-right.png b/outputs/depthmap-17286927930338-left-right.png new file mode 100644 index 0000000000000000000000000000000000000000..86782998a7e3fe87ac85c32e0125242ed263ee84 Binary files /dev/null and b/outputs/depthmap-17286927930338-left-right.png differ diff --git a/pix2pix/LICENSE b/pix2pix/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..bf522cf08ad9126b0aff43cc86f88434f9c9f767 --- /dev/null +++ b/pix2pix/LICENSE @@ -0,0 +1,58 @@ +Copyright (c) 2017, Jun-Yan Zhu and Taesung Park +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +--------------------------- LICENSE FOR pix2pix -------------------------------- +BSD License + +For pix2pix software +Copyright (c) 2016, Phillip Isola and Jun-Yan Zhu +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +----------------------------- LICENSE FOR DCGAN -------------------------------- +BSD License + +For dcgan.torch software + +Copyright (c) 2015, Facebook, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +Neither the name Facebook nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/pix2pix/__init__.py b/pix2pix/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/pix2pix/__pycache__/__init__.cpython-310.pyc b/pix2pix/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6f16f04cb93584c9058e7c74457ce363d0242098 Binary files /dev/null and b/pix2pix/__pycache__/__init__.cpython-310.pyc differ diff --git a/pix2pix/__pycache__/__init__.cpython-311.pyc b/pix2pix/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..990a5b1e35e42781628d440b0d5c79a33f6f8957 Binary files /dev/null and b/pix2pix/__pycache__/__init__.cpython-311.pyc differ diff --git a/pix2pix/__pycache__/__init__.cpython-312.pyc b/pix2pix/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5095529c89a9000b10ac841a53d3e55d5a4b60e1 Binary files /dev/null and b/pix2pix/__pycache__/__init__.cpython-312.pyc differ diff --git a/pix2pix/data/__init__.py b/pix2pix/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b117d47efb4757a3286cb4dae37fac8e559c88a8 --- /dev/null +++ b/pix2pix/data/__init__.py @@ -0,0 +1,93 @@ +"""This package includes all the modules related to data loading and preprocessing + + To add a custom dataset class called 'dummy', you need to add a file called 'dummy_dataset.py' and define a subclass 'DummyDataset' inherited from BaseDataset. + You need to implement four functions: + -- <__init__>: initialize the class, first call BaseDataset.__init__(self, opt). + -- <__len__>: return the size of dataset. + -- <__getitem__>: get a data point from data loader. + -- : (optionally) add dataset-specific options and set default options. + +Now you can use the dataset class by specifying flag '--dataset_mode dummy'. +See our template dataset class 'template_dataset.py' for more details. +""" +import importlib +import torch.utils.data +from pix2pix.data.base_dataset import BaseDataset + + +def find_dataset_using_name(dataset_name): + """Import the module "data/[dataset_name]_dataset.py". + + In the file, the class called DatasetNameDataset() will + be instantiated. It has to be a subclass of BaseDataset, + and it is case-insensitive. + """ + dataset_filename = "pix2pix.data." + dataset_name + "_dataset" + datasetlib = importlib.import_module(dataset_filename) + + dataset = None + target_dataset_name = dataset_name.replace('_', '') + 'dataset' + for name, cls in datasetlib.__dict__.items(): + if name.lower() == target_dataset_name.lower() \ + and issubclass(cls, BaseDataset): + dataset = cls + + if dataset is None: + raise NotImplementedError("In %s.py, there should be a subclass of BaseDataset with class name that matches %s in lowercase." % (dataset_filename, target_dataset_name)) + + return dataset + + +def get_option_setter(dataset_name): + """Return the static method of the dataset class.""" + dataset_class = find_dataset_using_name(dataset_name) + return dataset_class.modify_commandline_options + + +def create_dataset(opt): + """Create a dataset given the option. + + This function wraps the class CustomDatasetDataLoader. + This is the main interface between this package and 'train.py'/'test.py' + + Example: + >>> from data import create_dataset + >>> dataset = create_dataset(opt) + """ + data_loader = CustomDatasetDataLoader(opt) + dataset = data_loader.load_data() + return dataset + + +class CustomDatasetDataLoader(): + """Wrapper class of Dataset class that performs multi-threaded data loading""" + + def __init__(self, opt): + """Initialize this class + + Step 1: create a dataset instance given the name [dataset_mode] + Step 2: create a multi-threaded data loader. + """ + self.opt = opt + dataset_class = find_dataset_using_name(opt.dataset_mode) + self.dataset = dataset_class(opt) + print("dataset [%s] was created" % type(self.dataset).__name__) + self.dataloader = torch.utils.data.DataLoader( + self.dataset, + batch_size=opt.batch_size, + shuffle=not opt.serial_batches, + num_workers=int(opt.num_threads)) + + def load_data(self): + return self + + def __len__(self): + """Return the number of data in the dataset""" + return min(len(self.dataset), self.opt.max_dataset_size) + + def __iter__(self): + """Return a batch of data""" + for i, data in enumerate(self.dataloader): + if i * self.opt.batch_size >= self.opt.max_dataset_size: + break + yield data diff --git a/pix2pix/data/__pycache__/__init__.cpython-310.pyc b/pix2pix/data/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..970b851a41d593416b64913b6cfb5279138087c1 Binary files /dev/null and b/pix2pix/data/__pycache__/__init__.cpython-310.pyc differ diff --git a/pix2pix/data/__pycache__/__init__.cpython-311.pyc b/pix2pix/data/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a8a30d381b4096e55a0a8d4042869907b34cbfc5 Binary files /dev/null and b/pix2pix/data/__pycache__/__init__.cpython-311.pyc differ diff --git a/pix2pix/data/__pycache__/__init__.cpython-312.pyc b/pix2pix/data/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4539eb745b1b8b0b6f35ac7a3d667f2511c7099d Binary files /dev/null and b/pix2pix/data/__pycache__/__init__.cpython-312.pyc differ diff --git a/pix2pix/data/__pycache__/base_dataset.cpython-310.pyc b/pix2pix/data/__pycache__/base_dataset.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bc0cf41ef9b6321813c3af05c4554f8ffe3149df Binary files /dev/null and b/pix2pix/data/__pycache__/base_dataset.cpython-310.pyc differ diff --git a/pix2pix/data/__pycache__/base_dataset.cpython-311.pyc b/pix2pix/data/__pycache__/base_dataset.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bfae16051c11144d82586849af7dc8f600e7500f Binary files /dev/null and b/pix2pix/data/__pycache__/base_dataset.cpython-311.pyc differ diff --git a/pix2pix/data/__pycache__/base_dataset.cpython-312.pyc b/pix2pix/data/__pycache__/base_dataset.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cdcd239f7cd899d56b1dc21ebd28cfe881c269da Binary files /dev/null and b/pix2pix/data/__pycache__/base_dataset.cpython-312.pyc differ diff --git a/pix2pix/data/__pycache__/depthmerge_dataset.cpython-310.pyc b/pix2pix/data/__pycache__/depthmerge_dataset.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..65a3709180d33a0e29999edc0d0842c96da3fe23 Binary files /dev/null and b/pix2pix/data/__pycache__/depthmerge_dataset.cpython-310.pyc differ diff --git a/pix2pix/data/__pycache__/depthmerge_dataset.cpython-312.pyc b/pix2pix/data/__pycache__/depthmerge_dataset.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7386f0837af82cc0f1299870a14869c7a43a7863 Binary files /dev/null and b/pix2pix/data/__pycache__/depthmerge_dataset.cpython-312.pyc differ diff --git a/pix2pix/data/__pycache__/image_folder.cpython-310.pyc b/pix2pix/data/__pycache__/image_folder.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..329236de9501a0de82415eebf5220bb38e2b4ed3 Binary files /dev/null and b/pix2pix/data/__pycache__/image_folder.cpython-310.pyc differ diff --git a/pix2pix/data/__pycache__/image_folder.cpython-312.pyc b/pix2pix/data/__pycache__/image_folder.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b614c48828b1ab29aeadab2e7e16c6d12cd611df Binary files /dev/null and b/pix2pix/data/__pycache__/image_folder.cpython-312.pyc differ diff --git a/pix2pix/data/base_dataset.py b/pix2pix/data/base_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..7389490c54f3d1431cbceeb8436b8667e19abd51 --- /dev/null +++ b/pix2pix/data/base_dataset.py @@ -0,0 +1,157 @@ +"""This module implements an abstract base class (ABC) 'BaseDataset' for datasets. + +It also includes common transformation functions (e.g., get_transform, __scale_width), which can be later used in subclasses. +""" +import random +import numpy as np +import torch.utils.data as data +from PIL import Image +import torchvision.transforms as transforms +from abc import ABC, abstractmethod + + +class BaseDataset(data.Dataset, ABC): + """This class is an abstract base class (ABC) for datasets. + + To create a subclass, you need to implement the following four functions: + -- <__init__>: initialize the class, first call BaseDataset.__init__(self, opt). + -- <__len__>: return the size of dataset. + -- <__getitem__>: get a data point. + -- : (optionally) add dataset-specific options and set default options. + """ + + def __init__(self, opt): + """Initialize the class; save the options in the class + + Parameters: + opt (Option class)-- stores all the experiment flags; needs to be a subclass of BaseOptions + """ + self.opt = opt + self.root = opt.dataroot + + @staticmethod + def modify_commandline_options(parser, is_train): + """Add new dataset-specific options, and rewrite default values for existing options. + + Parameters: + parser -- original option parser + is_train (bool) -- whether training phase or test phase. You can use this flag to add training-specific or test-specific options. + + Returns: + the modified parser. + """ + return parser + + @abstractmethod + def __len__(self): + """Return the total number of images in the dataset.""" + return 0 + + @abstractmethod + def __getitem__(self, index): + """Return a data point and its metadata information. + + Parameters: + index - - a random integer for data indexing + + Returns: + a dictionary of data with their names. It ususally contains the data itself and its metadata information. + """ + pass + + +def get_params(opt, size): + w, h = size + new_h = h + new_w = w + if opt.preprocess == 'resize_and_crop': + new_h = new_w = opt.load_size + elif opt.preprocess == 'scale_width_and_crop': + new_w = opt.load_size + new_h = opt.load_size * h // w + + x = random.randint(0, np.maximum(0, new_w - opt.crop_size)) + y = random.randint(0, np.maximum(0, new_h - opt.crop_size)) + + flip = random.random() > 0.5 + + return {'crop_pos': (x, y), 'flip': flip} + + +def get_transform(opt, params=None, grayscale=False, method=Image.BICUBIC, convert=True): + transform_list = [] + if grayscale: + transform_list.append(transforms.Grayscale(1)) + if 'resize' in opt.preprocess: + osize = [opt.load_size, opt.load_size] + transform_list.append(transforms.Resize(osize, method)) + elif 'scale_width' in opt.preprocess: + transform_list.append(transforms.Lambda(lambda img: __scale_width(img, opt.load_size, opt.crop_size, method))) + + if 'crop' in opt.preprocess: + if params is None: + transform_list.append(transforms.RandomCrop(opt.crop_size)) + else: + transform_list.append(transforms.Lambda(lambda img: __crop(img, params['crop_pos'], opt.crop_size))) + + if opt.preprocess == 'none': + transform_list.append(transforms.Lambda(lambda img: __make_power_2(img, base=4, method=method))) + + if not opt.no_flip: + if params is None: + transform_list.append(transforms.RandomHorizontalFlip()) + elif params['flip']: + transform_list.append(transforms.Lambda(lambda img: __flip(img, params['flip']))) + + if convert: + transform_list += [transforms.ToTensor()] + if grayscale: + transform_list += [transforms.Normalize((0.5,), (0.5,))] + else: + transform_list += [transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))] + return transforms.Compose(transform_list) + + +def __make_power_2(img, base, method=Image.BICUBIC): + ow, oh = img.size + h = int(round(oh / base) * base) + w = int(round(ow / base) * base) + if h == oh and w == ow: + return img + + __print_size_warning(ow, oh, w, h) + return img.resize((w, h), method) + + +def __scale_width(img, target_size, crop_size, method=Image.BICUBIC): + ow, oh = img.size + if ow == target_size and oh >= crop_size: + return img + w = target_size + h = int(max(target_size * oh / ow, crop_size)) + return img.resize((w, h), method) + + +def __crop(img, pos, size): + ow, oh = img.size + x1, y1 = pos + tw = th = size + if (ow > tw or oh > th): + return img.crop((x1, y1, x1 + tw, y1 + th)) + return img + + +def __flip(img, flip): + if flip: + return img.transpose(Image.FLIP_LEFT_RIGHT) + return img + + +def __print_size_warning(ow, oh, w, h): + """Print warning information about image size(only print once)""" + if not hasattr(__print_size_warning, 'has_printed'): + print("The image size needs to be a multiple of 4. " + "The loaded image size was (%d, %d), so it was adjusted to " + "(%d, %d). This adjustment will be done to all images " + "whose sizes are not multiples of 4" % (ow, oh, w, h)) + __print_size_warning.has_printed = True diff --git a/pix2pix/data/depthmerge_dataset.py b/pix2pix/data/depthmerge_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..0551e4fcddce868f5335042c7f072d96b003fa72 --- /dev/null +++ b/pix2pix/data/depthmerge_dataset.py @@ -0,0 +1,83 @@ +from pix2pix.data.base_dataset import BaseDataset +from pix2pix.data.image_folder import make_dataset +from pix2pix.util.guidedfilter import GuidedFilter + +import numpy as np +import os +import torch +from PIL import Image + + +def normalize(img): + img = img * 2 + img = img - 1 + return img + + +def normalize01(img): + return (img - torch.min(img)) / (torch.max(img)-torch.min(img)) + + +class DepthMergeDataset(BaseDataset): + def __init__(self, opt): + BaseDataset.__init__(self, opt) + self.dir_outer = os.path.join(opt.dataroot, opt.phase, 'outer') + self.dir_inner = os.path.join(opt.dataroot, opt.phase, 'inner') + self.dir_gtfake = os.path.join(opt.dataroot, opt.phase, 'gtfake') + + self.outer_paths = sorted(make_dataset(self.dir_outer, opt.max_dataset_size)) + self.inner_paths = sorted(make_dataset(self.dir_inner, opt.max_dataset_size)) + self.gtfake_paths = sorted(make_dataset(self.dir_gtfake, opt.max_dataset_size)) + + self.dataset_size = len(self.outer_paths) + + if opt.phase == 'train': + self.isTrain = True + else: + self.isTrain = False + + def __getitem__(self, index): + normalize_coef = np.float32(2 ** 16) + + data_outer = Image.open(self.outer_paths[index % self.dataset_size]) # needs to be a tensor + data_outer = np.array(data_outer, dtype=np.float32) + data_outer = data_outer / normalize_coef + + data_inner = Image.open(self.inner_paths[index % self.dataset_size]) # needs to be a tensor + data_inner = np.array(data_inner, dtype=np.float32) + data_inner = data_inner / normalize_coef + + if self.isTrain: + data_gtfake = Image.open(self.gtfake_paths[index % self.dataset_size]) # needs to be a tensor + data_gtfake = np.array(data_gtfake, dtype=np.float32) + data_gtfake = data_gtfake / normalize_coef + + data_inner = GuidedFilter(data_gtfake, data_inner, 64, 0.00000001).smooth.astype('float32') + data_outer = GuidedFilter(data_outer, data_gtfake, 64, 0.00000001).smooth.astype('float32') + + data_outer = torch.from_numpy(data_outer) + data_outer = torch.unsqueeze(data_outer, 0) + data_outer = normalize01(data_outer) + data_outer = normalize(data_outer) + + data_inner = torch.from_numpy(data_inner) + data_inner = torch.unsqueeze(data_inner, 0) + data_inner = normalize01(data_inner) + data_inner = normalize(data_inner) + + if self.isTrain: + data_gtfake = torch.from_numpy(data_gtfake) + data_gtfake = torch.unsqueeze(data_gtfake, 0) + data_gtfake = normalize01(data_gtfake) + data_gtfake = normalize(data_gtfake) + + image_path = self.outer_paths[index % self.dataset_size] + if self.isTrain: + return {'data_inner': data_inner, 'data_outer': data_outer, + 'data_gtfake': data_gtfake, 'image_path': image_path} + else: + return {'data_inner': data_inner, 'data_outer': data_outer, 'image_path': image_path} + + def __len__(self): + """Return the total number of images.""" + return self.dataset_size diff --git a/pix2pix/data/image_folder.py b/pix2pix/data/image_folder.py new file mode 100644 index 0000000000000000000000000000000000000000..005a5209c9422f6e3a55d143481820bd92a87aa6 --- /dev/null +++ b/pix2pix/data/image_folder.py @@ -0,0 +1,65 @@ +"""A modified image folder class + +We modify the official PyTorch image folder (https://github.com/pytorch/vision/blob/master/torchvision/datasets/folder.py) +so that this class can load images from both current directory and its subdirectories. +""" + +import torch.utils.data as data + +from PIL import Image +import os + +IMG_EXTENSIONS = [ + '.jpg', '.JPG', '.jpeg', '.JPEG', + '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP', + '.tif', '.TIF', '.tiff', '.TIFF', +] + + +def is_image_file(filename): + return any(filename.endswith(extension) for extension in IMG_EXTENSIONS) + + +def make_dataset(dir, max_dataset_size=float("inf")): + images = [] + assert os.path.isdir(dir), '%s is not a valid directory' % dir + + for root, _, fnames in sorted(os.walk(dir)): + for fname in fnames: + if is_image_file(fname): + path = os.path.join(root, fname) + images.append(path) + return images[:min(max_dataset_size, len(images))] + + +def default_loader(path): + return Image.open(path).convert('RGB') + + +class ImageFolder(data.Dataset): + + def __init__(self, root, transform=None, return_paths=False, + loader=default_loader): + imgs = make_dataset(root) + if len(imgs) == 0: + raise(RuntimeError("Found 0 images in: " + root + "\n" + "Supported image extensions are: " + ",".join(IMG_EXTENSIONS))) + + self.root = root + self.imgs = imgs + self.transform = transform + self.return_paths = return_paths + self.loader = loader + + def __getitem__(self, index): + path = self.imgs[index] + img = self.loader(path) + if self.transform is not None: + img = self.transform(img) + if self.return_paths: + return img, path + else: + return img + + def __len__(self): + return len(self.imgs) diff --git a/pix2pix/models/__init__.py b/pix2pix/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..54bb84b6b8823b7968745347183d50cb29c66dc8 --- /dev/null +++ b/pix2pix/models/__init__.py @@ -0,0 +1,67 @@ +"""This package contains modules related to objective functions, optimizations, and network architectures. + +To add a custom model class called 'dummy', you need to add a file called 'dummy_model.py' and define a subclass DummyModel inherited from BaseModel. +You need to implement the following five functions: + -- <__init__>: initialize the class; first call BaseModel.__init__(self, opt). + -- : unpack data from dataset and apply preprocessing. + -- : produce intermediate results. + -- : calculate loss, gradients, and update network weights. + -- : (optionally) add model-specific options and set default options. + +In the function <__init__>, you need to define four lists: + -- self.loss_names (str list): specify the training losses that you want to plot and save. + -- self.model_names (str list): define networks used in our training. + -- self.visual_names (str list): specify the images that you want to display and save. + -- self.optimizers (optimizer list): define and initialize optimizers. You can define one optimizer for each network. If two networks are updated at the same time, you can use itertools.chain to group them. See cycle_gan_model.py for an usage. + +Now you can use the model class by specifying flag '--model dummy'. +See our template model class 'template_model.py' for more details. +""" + +import importlib +from pix2pix.models.base_model import BaseModel + + +def find_model_using_name(model_name): + """Import the module "models/[model_name]_model.py". + + In the file, the class called DatasetNameModel() will + be instantiated. It has to be a subclass of BaseModel, + and it is case-insensitive. + """ + model_filename = "pix2pix.models." + model_name + "_model" + modellib = importlib.import_module(model_filename) + model = None + target_model_name = model_name.replace('_', '') + 'model' + for name, cls in modellib.__dict__.items(): + if name.lower() == target_model_name.lower() \ + and issubclass(cls, BaseModel): + model = cls + + if model is None: + print("In %s.py, there should be a subclass of BaseModel with class name that matches %s in lowercase." % (model_filename, target_model_name)) + exit(0) + + return model + + +def get_option_setter(model_name): + """Return the static method of the model class.""" + model_class = find_model_using_name(model_name) + return model_class.modify_commandline_options + + +def create_model(opt): + """Create a model given the option. + + This function warps the class CustomDatasetDataLoader. + This is the main interface between this package and 'train.py'/'test.py' + + Example: + >>> from models import create_model + >>> model = create_model(opt) + """ + model = find_model_using_name(opt.model) + instance = model(opt) + print("model [%s] was created" % type(instance).__name__) + return instance diff --git a/pix2pix/models/__pycache__/__init__.cpython-310.pyc b/pix2pix/models/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4071246a74d6efc65581a667cbc5eedebb92c3e7 Binary files /dev/null and b/pix2pix/models/__pycache__/__init__.cpython-310.pyc differ diff --git a/pix2pix/models/__pycache__/__init__.cpython-311.pyc b/pix2pix/models/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..17601ca19f9a59efae016d2c1885d1e772a95962 Binary files /dev/null and b/pix2pix/models/__pycache__/__init__.cpython-311.pyc differ diff --git a/pix2pix/models/__pycache__/__init__.cpython-312.pyc b/pix2pix/models/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6d0e10280576e94d78402fd236a5afaf13508555 Binary files /dev/null and b/pix2pix/models/__pycache__/__init__.cpython-312.pyc differ diff --git a/pix2pix/models/__pycache__/base_model.cpython-310.pyc b/pix2pix/models/__pycache__/base_model.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cadb9456a49f92ab9794b7a08bf23f6351909563 Binary files /dev/null and b/pix2pix/models/__pycache__/base_model.cpython-310.pyc differ diff --git a/pix2pix/models/__pycache__/base_model.cpython-311.pyc b/pix2pix/models/__pycache__/base_model.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..950a23e6d8da36d1f623aa839b708c60f2a9a0cc Binary files /dev/null and b/pix2pix/models/__pycache__/base_model.cpython-311.pyc differ diff --git a/pix2pix/models/__pycache__/base_model.cpython-312.pyc b/pix2pix/models/__pycache__/base_model.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..85bc5f7311dd8eaff384133bfb2cf48dfb243947 Binary files /dev/null and b/pix2pix/models/__pycache__/base_model.cpython-312.pyc differ diff --git a/pix2pix/models/__pycache__/networks.cpython-310.pyc b/pix2pix/models/__pycache__/networks.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4f3f5a14739f705d1f85c8d56f5c2ef673586aad Binary files /dev/null and b/pix2pix/models/__pycache__/networks.cpython-310.pyc differ diff --git a/pix2pix/models/__pycache__/networks.cpython-311.pyc b/pix2pix/models/__pycache__/networks.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..52110d5e2278d24fa588e3c0e01eef97d00f67aa Binary files /dev/null and b/pix2pix/models/__pycache__/networks.cpython-311.pyc differ diff --git a/pix2pix/models/__pycache__/networks.cpython-312.pyc b/pix2pix/models/__pycache__/networks.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4619dcdd25024455ae33f2b5bef7a46c0aae85e3 Binary files /dev/null and b/pix2pix/models/__pycache__/networks.cpython-312.pyc differ diff --git a/pix2pix/models/__pycache__/pix2pix4depth_model.cpython-310.pyc b/pix2pix/models/__pycache__/pix2pix4depth_model.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9a9509523d3167cbb863633581495da280638b46 Binary files /dev/null and b/pix2pix/models/__pycache__/pix2pix4depth_model.cpython-310.pyc differ diff --git a/pix2pix/models/__pycache__/pix2pix4depth_model.cpython-311.pyc b/pix2pix/models/__pycache__/pix2pix4depth_model.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..848a7c33958e6e57679205bc5766a6fc5fe8deff Binary files /dev/null and b/pix2pix/models/__pycache__/pix2pix4depth_model.cpython-311.pyc differ diff --git a/pix2pix/models/__pycache__/pix2pix4depth_model.cpython-312.pyc b/pix2pix/models/__pycache__/pix2pix4depth_model.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3255125297daa8cf4a2dc3c0c71f1cbdfce30737 Binary files /dev/null and b/pix2pix/models/__pycache__/pix2pix4depth_model.cpython-312.pyc differ diff --git a/pix2pix/models/base_model.py b/pix2pix/models/base_model.py new file mode 100644 index 0000000000000000000000000000000000000000..62f7076501c563286f354c1713484c7d90d3c340 --- /dev/null +++ b/pix2pix/models/base_model.py @@ -0,0 +1,230 @@ +import os +import torch +from collections import OrderedDict +from abc import ABC, abstractmethod +from . import networks + + +class BaseModel(ABC): + """This class is an abstract base class (ABC) for models. + To create a subclass, you need to implement the following five functions: + -- <__init__>: initialize the class; first call BaseModel.__init__(self, opt). + -- : unpack data from dataset and apply preprocessing. + -- : produce intermediate results. + -- : calculate losses, gradients, and update network weights. + -- : (optionally) add model-specific options and set default options. + """ + + def __init__(self, opt): + """Initialize the BaseModel class. + + Parameters: + opt (Option class)-- stores all the experiment flags; needs to be a subclass of BaseOptions + + When creating your custom class, you need to implement your own initialization. + In this function, you should first call + Then, you need to define four lists: + -- self.loss_names (str list): specify the training losses that you want to plot and save. + -- self.model_names (str list): define networks used in our training. + -- self.visual_names (str list): specify the images that you want to display and save. + -- self.optimizers (optimizer list): define and initialize optimizers. You can define one optimizer for each network. If two networks are updated at the same time, you can use itertools.chain to group them. See cycle_gan_model.py for an example. + """ + self.opt = opt + self.gpu_ids = opt.gpu_ids + self.isTrain = opt.isTrain + self.device = torch.device('cuda:{}'.format(self.gpu_ids[0])) if self.gpu_ids else torch.device('cpu') # get device name: CPU or GPU + self.save_dir = os.path.join(opt.checkpoints_dir, opt.name) # save all the checkpoints to save_dir + if opt.preprocess != 'scale_width': # with [scale_width], input images might have different sizes, which hurts the performance of cudnn.benchmark. + torch.backends.cudnn.benchmark = True + self.loss_names = [] + self.model_names = [] + self.visual_names = [] + self.optimizers = [] + self.image_paths = [] + self.metric = 0 # used for learning rate policy 'plateau' + + @staticmethod + def modify_commandline_options(parser, is_train): + """Add new model-specific options, and rewrite default values for existing options. + + Parameters: + parser -- original option parser + is_train (bool) -- whether training phase or test phase. You can use this flag to add training-specific or test-specific options. + + Returns: + the modified parser. + """ + return parser + + @abstractmethod + def set_input(self, input): + """Unpack input data from the dataloader and perform necessary pre-processing steps. + + Parameters: + input (dict): includes the data itself and its metadata information. + """ + pass + + @abstractmethod + def forward(self): + """Run forward pass; called by both functions and .""" + pass + + @abstractmethod + def optimize_parameters(self): + """Calculate losses, gradients, and update network weights; called in every training iteration""" + pass + + def setup(self, opt): + """Load and print networks; create schedulers + + Parameters: + opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions + """ + if self.isTrain: + self.schedulers = [networks.get_scheduler(optimizer, opt) for optimizer in self.optimizers] + if not self.isTrain or opt.continue_train: + load_suffix = 'iter_%d' % opt.load_iter if opt.load_iter > 0 else opt.epoch + self.load_networks(load_suffix) + self.print_networks(opt.verbose) + + def eval(self): + """Make models eval mode during test time""" + for name in self.model_names: + if isinstance(name, str): + net = getattr(self, 'net' + name) + net.eval() + + def test(self): + """Forward function used in test time. + + This function wraps function in no_grad() so we don't save intermediate steps for backprop + It also calls to produce additional visualization results + """ + with torch.no_grad(): + self.forward() + self.compute_visuals() + + def compute_visuals(self): + """Calculate additional output images for visdom and HTML visualization""" + pass + + def get_image_paths(self): + """ Return image paths that are used to load current data""" + return self.image_paths + + def update_learning_rate(self): + """Update learning rates for all the networks; called at the end of every epoch""" + old_lr = self.optimizers[0].param_groups[0]['lr'] + for scheduler in self.schedulers: + if self.opt.lr_policy == 'plateau': + scheduler.step(self.metric) + else: + scheduler.step() + + lr = self.optimizers[0].param_groups[0]['lr'] + print('learning rate %.7f -> %.7f' % (old_lr, lr)) + + def get_current_visuals(self): + """Return visualization images. train.py will display these images with visdom, and save the images to a HTML""" + visual_ret = OrderedDict() + for name in self.visual_names: + if isinstance(name, str): + visual_ret[name] = getattr(self, name) + return visual_ret + + def get_current_losses(self): + """Return traning losses / errors. train.py will print out these errors on console, and save them to a file""" + errors_ret = OrderedDict() + for name in self.loss_names: + if isinstance(name, str): + errors_ret[name] = float(getattr(self, 'loss_' + name)) # float(...) works for both scalar tensor and float number + return errors_ret + + def save_networks(self, epoch): + """Save all the networks to the disk. + + Parameters: + epoch (int) -- current epoch; used in the file name '%s_net_%s.pth' % (epoch, name) + """ + for name in self.model_names: + if isinstance(name, str): + save_filename = '%s_net_%s.pth' % (epoch, name) + save_path = os.path.join(self.save_dir, save_filename) + net = getattr(self, 'net' + name) + + if len(self.gpu_ids) > 0 and torch.cuda.is_available(): + torch.save(net.module.cpu().state_dict(), save_path) + net.cuda(self.gpu_ids[0]) + else: + torch.save(net.cpu().state_dict(), save_path) + + def __patch_instance_norm_state_dict(self, state_dict, module, keys, i=0): + """Fix InstanceNorm checkpoints incompatibility (prior to 0.4)""" + key = keys[i] + if i + 1 == len(keys): # at the end, pointing to a parameter/buffer + if module.__class__.__name__.startswith('InstanceNorm') and \ + (key == 'running_mean' or key == 'running_var'): + if getattr(module, key) is None: + state_dict.pop('.'.join(keys)) + if module.__class__.__name__.startswith('InstanceNorm') and \ + (key == 'num_batches_tracked'): + state_dict.pop('.'.join(keys)) + else: + self.__patch_instance_norm_state_dict(state_dict, getattr(module, key), keys, i + 1) + + def load_networks(self, epoch): + """Load all the networks from the disk. + + Parameters: + epoch (int) -- current epoch; used in the file name '%s_net_%s.pth' % (epoch, name) + """ + for name in self.model_names: + if isinstance(name, str): + load_filename = '%s_net_%s.pth' % (epoch, name) + load_path = os.path.join(self.save_dir, load_filename) + net = getattr(self, 'net' + name) + if isinstance(net, torch.nn.DataParallel): + net = net.module + print('loading the model from %s' % load_path) + # if you are using PyTorch newer than 0.4 (e.g., built from + # GitHub source), you can remove str() on self.device + state_dict = torch.load(load_path, map_location=str(self.device)) + if hasattr(state_dict, '_metadata'): + del state_dict._metadata + + # patch InstanceNorm checkpoints prior to 0.4 + for key in list(state_dict.keys()): # need to copy keys here because we mutate in loop + self.__patch_instance_norm_state_dict(state_dict, net, key.split('.')) + net.load_state_dict(state_dict) + + def print_networks(self, verbose): + """Print the total number of parameters in the network and (if verbose) network architecture + + Parameters: + verbose (bool) -- if verbose: print the network architecture + """ + print('---------- Networks initialized -------------') + for name in self.model_names: + if isinstance(name, str): + net = getattr(self, 'net' + name) + num_params = 0 + for param in net.parameters(): + num_params += param.numel() + if verbose: + print(net) + print('[Network %s] Total number of parameters : %.3f M' % (name, num_params / 1e6)) + print('-----------------------------------------------') + + def set_requires_grad(self, nets, requires_grad=False): + """Set requies_grad=Fasle for all the networks to avoid unnecessary computations + Parameters: + nets (network list) -- a list of networks + requires_grad (bool) -- whether the networks require gradients or not + """ + if not isinstance(nets, list): + nets = [nets] + for net in nets: + if net is not None: + for param in net.parameters(): + param.requires_grad = requires_grad diff --git a/pix2pix/models/base_model_hg.py b/pix2pix/models/base_model_hg.py new file mode 100644 index 0000000000000000000000000000000000000000..e7fb3d313978dec164eff1452ed2986e5655d6b6 --- /dev/null +++ b/pix2pix/models/base_model_hg.py @@ -0,0 +1,58 @@ +import os +import torch + +class BaseModelHG(): + def name(self): + return 'BaseModel' + + def initialize(self, opt): + self.opt = opt + self.gpu_ids = opt.gpu_ids + self.isTrain = opt.isTrain + self.Tensor = torch.cuda.FloatTensor if self.gpu_ids else torch.Tensor + self.save_dir = os.path.join(opt.checkpoints_dir, opt.name) + + def set_input(self, input): + self.input = input + + def forward(self): + pass + + # used in test time, no backprop + def test(self): + pass + + def get_image_paths(self): + pass + + def optimize_parameters(self): + pass + + def get_current_visuals(self): + return self.input + + def get_current_errors(self): + return {} + + def save(self, label): + pass + + # helper saving function that can be used by subclasses + def save_network(self, network, network_label, epoch_label, gpu_ids): + save_filename = '_%s_net_%s.pth' % (epoch_label, network_label) + save_path = os.path.join(self.save_dir, save_filename) + torch.save(network.cpu().state_dict(), save_path) + if len(gpu_ids) and torch.cuda.is_available(): + network.cuda(device_id=gpu_ids[0]) + + # helper loading function that can be used by subclasses + def load_network(self, network, network_label, epoch_label): + save_filename = '%s_net_%s.pth' % (epoch_label, network_label) + save_path = os.path.join(self.save_dir, save_filename) + print(save_path) + model = torch.load(save_path) + return model + # network.load_state_dict(torch.load(save_path)) + + def update_learning_rate(): + pass diff --git a/pix2pix/models/networks.py b/pix2pix/models/networks.py new file mode 100644 index 0000000000000000000000000000000000000000..ef8cb1ab67f765a48db9f1328d0ba2e5ebfbc3c5 --- /dev/null +++ b/pix2pix/models/networks.py @@ -0,0 +1,623 @@ +import torch +import torch.nn as nn +from torch.nn import init +import functools +from torch.optim import lr_scheduler + + +############################################################################### +# Helper Functions +############################################################################### + + +class Identity(nn.Module): + def forward(self, x): + return x + + +def get_norm_layer(norm_type='instance'): + """Return a normalization layer + + Parameters: + norm_type (str) -- the name of the normalization layer: batch | instance | none + + For BatchNorm, we use learnable affine parameters and track running statistics (mean/stddev). + For InstanceNorm, we do not use learnable affine parameters. We do not track running statistics. + """ + if norm_type == 'batch': + norm_layer = functools.partial(nn.BatchNorm2d, affine=True, track_running_stats=True) + elif norm_type == 'instance': + norm_layer = functools.partial(nn.InstanceNorm2d, affine=False, track_running_stats=False) + elif norm_type == 'none': + def norm_layer(x): return Identity() + else: + raise NotImplementedError('normalization layer [%s] is not found' % norm_type) + return norm_layer + + +def get_scheduler(optimizer, opt): + """Return a learning rate scheduler + + Parameters: + optimizer -- the optimizer of the network + opt (option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions.  + opt.lr_policy is the name of learning rate policy: linear | step | plateau | cosine + + For 'linear', we keep the same learning rate for the first epochs + and linearly decay the rate to zero over the next epochs. + For other schedulers (step, plateau, and cosine), we use the default PyTorch schedulers. + See https://pytorch.org/docs/stable/optim.html for more details. + """ + if opt.lr_policy == 'linear': + def lambda_rule(epoch): + lr_l = 1.0 - max(0, epoch + opt.epoch_count - opt.n_epochs) / float(opt.n_epochs_decay + 1) + return lr_l + scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule) + elif opt.lr_policy == 'step': + scheduler = lr_scheduler.StepLR(optimizer, step_size=opt.lr_decay_iters, gamma=0.1) + elif opt.lr_policy == 'plateau': + scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.2, threshold=0.01, patience=5) + elif opt.lr_policy == 'cosine': + scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=opt.n_epochs, eta_min=0) + else: + return NotImplementedError('learning rate policy [%s] is not implemented', opt.lr_policy) + return scheduler + + +def init_weights(net, init_type='normal', init_gain=0.02): + """Initialize network weights. + + Parameters: + net (network) -- network to be initialized + init_type (str) -- the name of an initialization method: normal | xavier | kaiming | orthogonal + init_gain (float) -- scaling factor for normal, xavier and orthogonal. + + We use 'normal' in the original pix2pix and CycleGAN paper. But xavier and kaiming might + work better for some applications. Feel free to try yourself. + """ + def init_func(m): # define the initialization function + classname = m.__class__.__name__ + if hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1): + if init_type == 'normal': + init.normal_(m.weight.data, 0.0, init_gain) + elif init_type == 'xavier': + init.xavier_normal_(m.weight.data, gain=init_gain) + elif init_type == 'kaiming': + init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') + elif init_type == 'orthogonal': + init.orthogonal_(m.weight.data, gain=init_gain) + else: + raise NotImplementedError('initialization method [%s] is not implemented' % init_type) + if hasattr(m, 'bias') and m.bias is not None: + init.constant_(m.bias.data, 0.0) + elif classname.find('BatchNorm2d') != -1: # BatchNorm Layer's weight is not a matrix; only normal distribution applies. + init.normal_(m.weight.data, 1.0, init_gain) + init.constant_(m.bias.data, 0.0) + + print('initialize network with %s' % init_type) + net.apply(init_func) # apply the initialization function + + +def init_net(net, init_type='normal', init_gain=0.02, gpu_ids=[]): + """Initialize a network: 1. register CPU/GPU device (with multi-GPU support); 2. initialize the network weights + Parameters: + net (network) -- the network to be initialized + init_type (str) -- the name of an initialization method: normal | xavier | kaiming | orthogonal + gain (float) -- scaling factor for normal, xavier and orthogonal. + gpu_ids (int list) -- which GPUs the network runs on: e.g., 0,1,2 + + Return an initialized network. + """ + if len(gpu_ids) > 0: + assert(torch.cuda.is_available()) + net.to(gpu_ids[0]) + net = torch.nn.DataParallel(net, gpu_ids) # multi-GPUs + init_weights(net, init_type, init_gain=init_gain) + return net + + +def define_G(input_nc, output_nc, ngf, netG, norm='batch', use_dropout=False, init_type='normal', init_gain=0.02, gpu_ids=[]): + """Create a generator + + Parameters: + input_nc (int) -- the number of channels in input images + output_nc (int) -- the number of channels in output images + ngf (int) -- the number of filters in the last conv layer + netG (str) -- the architecture's name: resnet_9blocks | resnet_6blocks | unet_256 | unet_128 + norm (str) -- the name of normalization layers used in the network: batch | instance | none + use_dropout (bool) -- if use dropout layers. + init_type (str) -- the name of our initialization method. + init_gain (float) -- scaling factor for normal, xavier and orthogonal. + gpu_ids (int list) -- which GPUs the network runs on: e.g., 0,1,2 + + Returns a generator + + Our current implementation provides two types of generators: + U-Net: [unet_128] (for 128x128 input images) and [unet_256] (for 256x256 input images) + The original U-Net paper: https://arxiv.org/abs/1505.04597 + + Resnet-based generator: [resnet_6blocks] (with 6 Resnet blocks) and [resnet_9blocks] (with 9 Resnet blocks) + Resnet-based generator consists of several Resnet blocks between a few downsampling/upsampling operations. + We adapt Torch code from Justin Johnson's neural style transfer project (https://github.com/jcjohnson/fast-neural-style). + + + The generator has been initialized by . It uses RELU for non-linearity. + """ + net = None + norm_layer = get_norm_layer(norm_type=norm) + + if netG == 'resnet_9blocks': + net = ResnetGenerator(input_nc, output_nc, ngf, norm_layer=norm_layer, use_dropout=use_dropout, n_blocks=9) + elif netG == 'resnet_6blocks': + net = ResnetGenerator(input_nc, output_nc, ngf, norm_layer=norm_layer, use_dropout=use_dropout, n_blocks=6) + elif netG == 'resnet_12blocks': + net = ResnetGenerator(input_nc, output_nc, ngf, norm_layer=norm_layer, use_dropout=use_dropout, n_blocks=12) + elif netG == 'unet_128': + net = UnetGenerator(input_nc, output_nc, 7, ngf, norm_layer=norm_layer, use_dropout=use_dropout) + elif netG == 'unet_256': + net = UnetGenerator(input_nc, output_nc, 8, ngf, norm_layer=norm_layer, use_dropout=use_dropout) + elif netG == 'unet_672': + net = UnetGenerator(input_nc, output_nc, 5, ngf, norm_layer=norm_layer, use_dropout=use_dropout) + elif netG == 'unet_960': + net = UnetGenerator(input_nc, output_nc, 6, ngf, norm_layer=norm_layer, use_dropout=use_dropout) + elif netG == 'unet_1024': + net = UnetGenerator(input_nc, output_nc, 10, ngf, norm_layer=norm_layer, use_dropout=use_dropout) + else: + raise NotImplementedError('Generator model name [%s] is not recognized' % netG) + return init_net(net, init_type, init_gain, gpu_ids) + + +def define_D(input_nc, ndf, netD, n_layers_D=3, norm='batch', init_type='normal', init_gain=0.02, gpu_ids=[]): + """Create a discriminator + + Parameters: + input_nc (int) -- the number of channels in input images + ndf (int) -- the number of filters in the first conv layer + netD (str) -- the architecture's name: basic | n_layers | pixel + n_layers_D (int) -- the number of conv layers in the discriminator; effective when netD=='n_layers' + norm (str) -- the type of normalization layers used in the network. + init_type (str) -- the name of the initialization method. + init_gain (float) -- scaling factor for normal, xavier and orthogonal. + gpu_ids (int list) -- which GPUs the network runs on: e.g., 0,1,2 + + Returns a discriminator + + Our current implementation provides three types of discriminators: + [basic]: 'PatchGAN' classifier described in the original pix2pix paper. + It can classify whether 70×70 overlapping patches are real or fake. + Such a patch-level discriminator architecture has fewer parameters + than a full-image discriminator and can work on arbitrarily-sized images + in a fully convolutional fashion. + + [n_layers]: With this mode, you can specify the number of conv layers in the discriminator + with the parameter (default=3 as used in [basic] (PatchGAN).) + + [pixel]: 1x1 PixelGAN discriminator can classify whether a pixel is real or not. + It encourages greater color diversity but has no effect on spatial statistics. + + The discriminator has been initialized by . It uses Leakly RELU for non-linearity. + """ + net = None + norm_layer = get_norm_layer(norm_type=norm) + + if netD == 'basic': # default PatchGAN classifier + net = NLayerDiscriminator(input_nc, ndf, n_layers=3, norm_layer=norm_layer) + elif netD == 'n_layers': # more options + net = NLayerDiscriminator(input_nc, ndf, n_layers_D, norm_layer=norm_layer) + elif netD == 'pixel': # classify if each pixel is real or fake + net = PixelDiscriminator(input_nc, ndf, norm_layer=norm_layer) + else: + raise NotImplementedError('Discriminator model name [%s] is not recognized' % netD) + return init_net(net, init_type, init_gain, gpu_ids) + + +############################################################################## +# Classes +############################################################################## +class GANLoss(nn.Module): + """Define different GAN objectives. + + The GANLoss class abstracts away the need to create the target label tensor + that has the same size as the input. + """ + + def __init__(self, gan_mode, target_real_label=1.0, target_fake_label=0.0): + """ Initialize the GANLoss class. + + Parameters: + gan_mode (str) - - the type of GAN objective. It currently supports vanilla, lsgan, and wgangp. + target_real_label (bool) - - label for a real image + target_fake_label (bool) - - label of a fake image + + Note: Do not use sigmoid as the last layer of Discriminator. + LSGAN needs no sigmoid. vanilla GANs will handle it with BCEWithLogitsLoss. + """ + super(GANLoss, self).__init__() + self.register_buffer('real_label', torch.tensor(target_real_label)) + self.register_buffer('fake_label', torch.tensor(target_fake_label)) + self.gan_mode = gan_mode + if gan_mode == 'lsgan': + self.loss = nn.MSELoss() + elif gan_mode == 'vanilla': + self.loss = nn.BCEWithLogitsLoss() + elif gan_mode in ['wgangp']: + self.loss = None + else: + raise NotImplementedError('gan mode %s not implemented' % gan_mode) + + def get_target_tensor(self, prediction, target_is_real): + """Create label tensors with the same size as the input. + + Parameters: + prediction (tensor) - - tpyically the prediction from a discriminator + target_is_real (bool) - - if the ground truth label is for real images or fake images + + Returns: + A label tensor filled with ground truth label, and with the size of the input + """ + + if target_is_real: + target_tensor = self.real_label + else: + target_tensor = self.fake_label + return target_tensor.expand_as(prediction) + + def __call__(self, prediction, target_is_real): + """Calculate loss given Discriminator's output and grount truth labels. + + Parameters: + prediction (tensor) - - tpyically the prediction output from a discriminator + target_is_real (bool) - - if the ground truth label is for real images or fake images + + Returns: + the calculated loss. + """ + if self.gan_mode in ['lsgan', 'vanilla']: + target_tensor = self.get_target_tensor(prediction, target_is_real) + loss = self.loss(prediction, target_tensor) + elif self.gan_mode == 'wgangp': + if target_is_real: + loss = -prediction.mean() + else: + loss = prediction.mean() + return loss + + +def cal_gradient_penalty(netD, real_data, fake_data, device, type='mixed', constant=1.0, lambda_gp=10.0): + """Calculate the gradient penalty loss, used in WGAN-GP paper https://arxiv.org/abs/1704.00028 + + Arguments: + netD (network) -- discriminator network + real_data (tensor array) -- real images + fake_data (tensor array) -- generated images from the generator + device (str) -- GPU / CPU: from torch.device('cuda:{}'.format(self.gpu_ids[0])) if self.gpu_ids else torch.device('cpu') + type (str) -- if we mix real and fake data or not [real | fake | mixed]. + constant (float) -- the constant used in formula ( ||gradient||_2 - constant)^2 + lambda_gp (float) -- weight for this loss + + Returns the gradient penalty loss + """ + if lambda_gp > 0.0: + if type == 'real': # either use real images, fake images, or a linear interpolation of two. + interpolatesv = real_data + elif type == 'fake': + interpolatesv = fake_data + elif type == 'mixed': + alpha = torch.rand(real_data.shape[0], 1, device=device) + alpha = alpha.expand(real_data.shape[0], real_data.nelement() // real_data.shape[0]).contiguous().view(*real_data.shape) + interpolatesv = alpha * real_data + ((1 - alpha) * fake_data) + else: + raise NotImplementedError('{} not implemented'.format(type)) + interpolatesv.requires_grad_(True) + disc_interpolates = netD(interpolatesv) + gradients = torch.autograd.grad(outputs=disc_interpolates, inputs=interpolatesv, + grad_outputs=torch.ones(disc_interpolates.size()).to(device), + create_graph=True, retain_graph=True, only_inputs=True) + gradients = gradients[0].view(real_data.size(0), -1) # flat the data + gradient_penalty = (((gradients + 1e-16).norm(2, dim=1) - constant) ** 2).mean() * lambda_gp # added eps + return gradient_penalty, gradients + else: + return 0.0, None + + +class ResnetGenerator(nn.Module): + """Resnet-based generator that consists of Resnet blocks between a few downsampling/upsampling operations. + + We adapt Torch code and idea from Justin Johnson's neural style transfer project(https://github.com/jcjohnson/fast-neural-style) + """ + + def __init__(self, input_nc, output_nc, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False, n_blocks=6, padding_type='reflect'): + """Construct a Resnet-based generator + + Parameters: + input_nc (int) -- the number of channels in input images + output_nc (int) -- the number of channels in output images + ngf (int) -- the number of filters in the last conv layer + norm_layer -- normalization layer + use_dropout (bool) -- if use dropout layers + n_blocks (int) -- the number of ResNet blocks + padding_type (str) -- the name of padding layer in conv layers: reflect | replicate | zero + """ + assert(n_blocks >= 0) + super(ResnetGenerator, self).__init__() + if type(norm_layer) == functools.partial: + use_bias = norm_layer.func == nn.InstanceNorm2d + else: + use_bias = norm_layer == nn.InstanceNorm2d + + model = [nn.ReflectionPad2d(3), + nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0, bias=use_bias), + norm_layer(ngf), + nn.ReLU(True)] + + n_downsampling = 2 + for i in range(n_downsampling): # add downsampling layers + mult = 2 ** i + model += [nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1, bias=use_bias), + norm_layer(ngf * mult * 2), + nn.ReLU(True)] + + mult = 2 ** n_downsampling + for i in range(n_blocks): # add ResNet blocks + + model += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)] + + for i in range(n_downsampling): # add upsampling layers + mult = 2 ** (n_downsampling - i) + model += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), + kernel_size=3, stride=2, + padding=1, output_padding=1, + bias=use_bias), + norm_layer(int(ngf * mult / 2)), + nn.ReLU(True)] + model += [nn.ReflectionPad2d(3)] + model += [nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)] + model += [nn.Tanh()] + + self.model = nn.Sequential(*model) + + def forward(self, input): + """Standard forward""" + return self.model(input) + + +class ResnetBlock(nn.Module): + """Define a Resnet block""" + + def __init__(self, dim, padding_type, norm_layer, use_dropout, use_bias): + """Initialize the Resnet block + + A resnet block is a conv block with skip connections + We construct a conv block with build_conv_block function, + and implement skip connections in function. + Original Resnet paper: https://arxiv.org/pdf/1512.03385.pdf + """ + super(ResnetBlock, self).__init__() + self.conv_block = self.build_conv_block(dim, padding_type, norm_layer, use_dropout, use_bias) + + def build_conv_block(self, dim, padding_type, norm_layer, use_dropout, use_bias): + """Construct a convolutional block. + + Parameters: + dim (int) -- the number of channels in the conv layer. + padding_type (str) -- the name of padding layer: reflect | replicate | zero + norm_layer -- normalization layer + use_dropout (bool) -- if use dropout layers. + use_bias (bool) -- if the conv layer uses bias or not + + Returns a conv block (with a conv layer, a normalization layer, and a non-linearity layer (ReLU)) + """ + conv_block = [] + p = 0 + if padding_type == 'reflect': + conv_block += [nn.ReflectionPad2d(1)] + elif padding_type == 'replicate': + conv_block += [nn.ReplicationPad2d(1)] + elif padding_type == 'zero': + p = 1 + else: + raise NotImplementedError('padding [%s] is not implemented' % padding_type) + + conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias=use_bias), norm_layer(dim), nn.ReLU(True)] + if use_dropout: + conv_block += [nn.Dropout(0.5)] + + p = 0 + if padding_type == 'reflect': + conv_block += [nn.ReflectionPad2d(1)] + elif padding_type == 'replicate': + conv_block += [nn.ReplicationPad2d(1)] + elif padding_type == 'zero': + p = 1 + else: + raise NotImplementedError('padding [%s] is not implemented' % padding_type) + conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias=use_bias), norm_layer(dim)] + + return nn.Sequential(*conv_block) + + def forward(self, x): + """Forward function (with skip connections)""" + out = x + self.conv_block(x) # add skip connections + return out + + +class UnetGenerator(nn.Module): + """Create a Unet-based generator""" + + def __init__(self, input_nc, output_nc, num_downs, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False): + """Construct a Unet generator + Parameters: + input_nc (int) -- the number of channels in input images + output_nc (int) -- the number of channels in output images + num_downs (int) -- the number of downsamplings in UNet. For example, # if |num_downs| == 7, + image of size 128x128 will become of size 1x1 # at the bottleneck + ngf (int) -- the number of filters in the last conv layer + norm_layer -- normalization layer + + We construct the U-Net from the innermost layer to the outermost layer. + It is a recursive process. + """ + super(UnetGenerator, self).__init__() + # construct unet structure + unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=None, norm_layer=norm_layer, innermost=True) # add the innermost layer + for i in range(num_downs - 5): # add intermediate layers with ngf * 8 filters + unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer, use_dropout=use_dropout) + # gradually reduce the number of filters from ngf * 8 to ngf + unet_block = UnetSkipConnectionBlock(ngf * 4, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer) + unet_block = UnetSkipConnectionBlock(ngf * 2, ngf * 4, input_nc=None, submodule=unet_block, norm_layer=norm_layer) + unet_block = UnetSkipConnectionBlock(ngf, ngf * 2, input_nc=None, submodule=unet_block, norm_layer=norm_layer) + self.model = UnetSkipConnectionBlock(output_nc, ngf, input_nc=input_nc, submodule=unet_block, outermost=True, norm_layer=norm_layer) # add the outermost layer + + def forward(self, input): + """Standard forward""" + return self.model(input) + + +class UnetSkipConnectionBlock(nn.Module): + """Defines the Unet submodule with skip connection. + X -------------------identity---------------------- + |-- downsampling -- |submodule| -- upsampling --| + """ + + def __init__(self, outer_nc, inner_nc, input_nc=None, + submodule=None, outermost=False, innermost=False, norm_layer=nn.BatchNorm2d, use_dropout=False): + """Construct a Unet submodule with skip connections. + + Parameters: + outer_nc (int) -- the number of filters in the outer conv layer + inner_nc (int) -- the number of filters in the inner conv layer + input_nc (int) -- the number of channels in input images/features + submodule (UnetSkipConnectionBlock) -- previously defined submodules + outermost (bool) -- if this module is the outermost module + innermost (bool) -- if this module is the innermost module + norm_layer -- normalization layer + use_dropout (bool) -- if use dropout layers. + """ + super(UnetSkipConnectionBlock, self).__init__() + self.outermost = outermost + if type(norm_layer) == functools.partial: + use_bias = norm_layer.func == nn.InstanceNorm2d + else: + use_bias = norm_layer == nn.InstanceNorm2d + if input_nc is None: + input_nc = outer_nc + downconv = nn.Conv2d(input_nc, inner_nc, kernel_size=4, + stride=2, padding=1, bias=use_bias) + downrelu = nn.LeakyReLU(0.2, True) + downnorm = norm_layer(inner_nc) + uprelu = nn.ReLU(True) + upnorm = norm_layer(outer_nc) + + if outermost: + upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc, + kernel_size=4, stride=2, + padding=1) + down = [downconv] + up = [uprelu, upconv, nn.Tanh()] + model = down + [submodule] + up + elif innermost: + upconv = nn.ConvTranspose2d(inner_nc, outer_nc, + kernel_size=4, stride=2, + padding=1, bias=use_bias) + down = [downrelu, downconv] + up = [uprelu, upconv, upnorm] + model = down + up + else: + upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc, + kernel_size=4, stride=2, + padding=1, bias=use_bias) + down = [downrelu, downconv, downnorm] + up = [uprelu, upconv, upnorm] + + if use_dropout: + model = down + [submodule] + up + [nn.Dropout(0.5)] + else: + model = down + [submodule] + up + + self.model = nn.Sequential(*model) + + def forward(self, x): + if self.outermost: + return self.model(x) + else: # add skip connections + return torch.cat([x, self.model(x)], 1) + + +class NLayerDiscriminator(nn.Module): + """Defines a PatchGAN discriminator""" + + def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d): + """Construct a PatchGAN discriminator + + Parameters: + input_nc (int) -- the number of channels in input images + ndf (int) -- the number of filters in the last conv layer + n_layers (int) -- the number of conv layers in the discriminator + norm_layer -- normalization layer + """ + super(NLayerDiscriminator, self).__init__() + if type(norm_layer) == functools.partial: # no need to use bias as BatchNorm2d has affine parameters + use_bias = norm_layer.func == nn.InstanceNorm2d + else: + use_bias = norm_layer == nn.InstanceNorm2d + + kw = 4 + padw = 1 + sequence = [nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw), nn.LeakyReLU(0.2, True)] + nf_mult = 1 + nf_mult_prev = 1 + for n in range(1, n_layers): # gradually increase the number of filters + nf_mult_prev = nf_mult + nf_mult = min(2 ** n, 8) + sequence += [ + nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=2, padding=padw, bias=use_bias), + norm_layer(ndf * nf_mult), + nn.LeakyReLU(0.2, True) + ] + + nf_mult_prev = nf_mult + nf_mult = min(2 ** n_layers, 8) + sequence += [ + nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=1, padding=padw, bias=use_bias), + norm_layer(ndf * nf_mult), + nn.LeakyReLU(0.2, True) + ] + + sequence += [nn.Conv2d(ndf * nf_mult, 1, kernel_size=kw, stride=1, padding=padw)] # output 1 channel prediction map + self.model = nn.Sequential(*sequence) + + def forward(self, input): + """Standard forward.""" + return self.model(input) + + +class PixelDiscriminator(nn.Module): + """Defines a 1x1 PatchGAN discriminator (pixelGAN)""" + + def __init__(self, input_nc, ndf=64, norm_layer=nn.BatchNorm2d): + """Construct a 1x1 PatchGAN discriminator + + Parameters: + input_nc (int) -- the number of channels in input images + ndf (int) -- the number of filters in the last conv layer + norm_layer -- normalization layer + """ + super(PixelDiscriminator, self).__init__() + if type(norm_layer) == functools.partial: # no need to use bias as BatchNorm2d has affine parameters + use_bias = norm_layer.func == nn.InstanceNorm2d + else: + use_bias = norm_layer == nn.InstanceNorm2d + + self.net = [ + nn.Conv2d(input_nc, ndf, kernel_size=1, stride=1, padding=0), + nn.LeakyReLU(0.2, True), + nn.Conv2d(ndf, ndf * 2, kernel_size=1, stride=1, padding=0, bias=use_bias), + norm_layer(ndf * 2), + nn.LeakyReLU(0.2, True), + nn.Conv2d(ndf * 2, 1, kernel_size=1, stride=1, padding=0, bias=use_bias)] + + self.net = nn.Sequential(*self.net) + + def forward(self, input): + """Standard forward.""" + return self.net(input) diff --git a/pix2pix/models/pix2pix4depth_model.py b/pix2pix/models/pix2pix4depth_model.py new file mode 100644 index 0000000000000000000000000000000000000000..44a331b085cbc2f3a820434c797032131ff74fcf --- /dev/null +++ b/pix2pix/models/pix2pix4depth_model.py @@ -0,0 +1,155 @@ +import torch +from .base_model import BaseModel +from . import networks + + +class Pix2Pix4DepthModel(BaseModel): + """ This class implements the pix2pix model, for learning a mapping from input images to output images given paired data. + + The model training requires '--dataset_mode aligned' dataset. + By default, it uses a '--netG unet256' U-Net generator, + a '--netD basic' discriminator (PatchGAN), + and a '--gan_mode' vanilla GAN loss (the cross-entropy objective used in the orignal GAN paper). + + pix2pix paper: https://arxiv.org/pdf/1611.07004.pdf + """ + @staticmethod + def modify_commandline_options(parser, is_train=True): + """Add new dataset-specific options, and rewrite default values for existing options. + + Parameters: + parser -- original option parser + is_train (bool) -- whether training phase or test phase. You can use this flag to add training-specific or test-specific options. + + Returns: + the modified parser. + + For pix2pix, we do not use image buffer + The training objective is: GAN Loss + lambda_L1 * ||G(A)-B||_1 + By default, we use vanilla GAN loss, UNet with batchnorm, and aligned datasets. + """ + # changing the default values to match the pix2pix paper (https://phillipi.github.io/pix2pix/) + parser.set_defaults(input_nc=2,output_nc=1,norm='none', netG='unet_1024', dataset_mode='depthmerge') + if is_train: + parser.set_defaults(pool_size=0, gan_mode='vanilla',) + parser.add_argument('--lambda_L1', type=float, default=1000, help='weight for L1 loss') + return parser + + def __init__(self, opt): + """Initialize the pix2pix class. + + Parameters: + opt (Option class)-- stores all the experiment flags; needs to be a subclass of BaseOptions + """ + BaseModel.__init__(self, opt) + # specify the training losses you want to print out. The training/test scripts will call + + self.loss_names = ['G_GAN', 'G_L1', 'D_real', 'D_fake'] + # self.loss_names = ['G_L1'] + + # specify the images you want to save/display. The training/test scripts will call + if self.isTrain: + self.visual_names = ['outer','inner', 'fake_B', 'real_B'] + else: + self.visual_names = ['fake_B'] + + # specify the models you want to save to the disk. The training/test scripts will call and + if self.isTrain: + self.model_names = ['G','D'] + else: # during test time, only load G + self.model_names = ['G'] + + # define networks (both generator and discriminator) + self.netG = networks.define_G(opt.input_nc, opt.output_nc, 64, 'unet_1024', 'none', + False, 'normal', 0.02, self.gpu_ids) + + if self.isTrain: # define a discriminator; conditional GANs need to take both input and output images; Therefore, #channels for D is input_nc + output_nc + self.netD = networks.define_D(opt.input_nc + opt.output_nc, opt.ndf, opt.netD, + opt.n_layers_D, opt.norm, opt.init_type, opt.init_gain, self.gpu_ids) + + if self.isTrain: + # define loss functions + self.criterionGAN = networks.GANLoss(opt.gan_mode).to(self.device) + self.criterionL1 = torch.nn.L1Loss() + # initialize optimizers; schedulers will be automatically created by function . + self.optimizer_G = torch.optim.Adam(self.netG.parameters(), lr=1e-4, betas=(opt.beta1, 0.999)) + self.optimizer_D = torch.optim.Adam(self.netD.parameters(), lr=2e-06, betas=(opt.beta1, 0.999)) + self.optimizers.append(self.optimizer_G) + self.optimizers.append(self.optimizer_D) + + def set_input_train(self, input): + self.outer = input['data_outer'].to(self.device) + self.outer = torch.nn.functional.interpolate(self.outer,(1024,1024),mode='bilinear',align_corners=False) + + self.inner = input['data_inner'].to(self.device) + self.inner = torch.nn.functional.interpolate(self.inner,(1024,1024),mode='bilinear',align_corners=False) + + self.image_paths = input['image_path'] + + if self.isTrain: + self.gtfake = input['data_gtfake'].to(self.device) + self.gtfake = torch.nn.functional.interpolate(self.gtfake, (1024, 1024), mode='bilinear', align_corners=False) + self.real_B = self.gtfake + + self.real_A = torch.cat((self.outer, self.inner), 1) + + def set_input(self, outer, inner): + inner = torch.from_numpy(inner).unsqueeze(0).unsqueeze(0) + outer = torch.from_numpy(outer).unsqueeze(0).unsqueeze(0) + + inner = (inner - torch.min(inner))/(torch.max(inner)-torch.min(inner)) + outer = (outer - torch.min(outer))/(torch.max(outer)-torch.min(outer)) + + inner = self.normalize(inner) + outer = self.normalize(outer) + + self.real_A = torch.cat((outer, inner), 1).to(self.device) + + + def normalize(self, input): + input = input * 2 + input = input - 1 + return input + + def forward(self): + """Run forward pass; called by both functions and .""" + self.fake_B = self.netG(self.real_A) # G(A) + + def backward_D(self): + """Calculate GAN loss for the discriminator""" + # Fake; stop backprop to the generator by detaching fake_B + fake_AB = torch.cat((self.real_A, self.fake_B), 1) # we use conditional GANs; we need to feed both input and output to the discriminator + pred_fake = self.netD(fake_AB.detach()) + self.loss_D_fake = self.criterionGAN(pred_fake, False) + # Real + real_AB = torch.cat((self.real_A, self.real_B), 1) + pred_real = self.netD(real_AB) + self.loss_D_real = self.criterionGAN(pred_real, True) + # combine loss and calculate gradients + self.loss_D = (self.loss_D_fake + self.loss_D_real) * 0.5 + self.loss_D.backward() + + def backward_G(self): + """Calculate GAN and L1 loss for the generator""" + # First, G(A) should fake the discriminator + fake_AB = torch.cat((self.real_A, self.fake_B), 1) + pred_fake = self.netD(fake_AB) + self.loss_G_GAN = self.criterionGAN(pred_fake, True) + # Second, G(A) = B + self.loss_G_L1 = self.criterionL1(self.fake_B, self.real_B) * self.opt.lambda_L1 + # combine loss and calculate gradients + self.loss_G = self.loss_G_L1 + self.loss_G_GAN + self.loss_G.backward() + + def optimize_parameters(self): + self.forward() # compute fake images: G(A) + # update D + self.set_requires_grad(self.netD, True) # enable backprop for D + self.optimizer_D.zero_grad() # set D's gradients to zero + self.backward_D() # calculate gradients for D + self.optimizer_D.step() # update D's weights + # update G + self.set_requires_grad(self.netD, False) # D requires no gradients when optimizing G + self.optimizer_G.zero_grad() # set G's gradients to zero + self.backward_G() # calculate graidents for G + self.optimizer_G.step() # udpate G's weights \ No newline at end of file diff --git a/pix2pix/options/__init__.py b/pix2pix/options/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..06559aa558cf178b946c4523b28b098d1dfad606 --- /dev/null +++ b/pix2pix/options/__init__.py @@ -0,0 +1 @@ +"""This package options includes option modules: training options, test options, and basic options (used in both training and test).""" diff --git a/pix2pix/options/__pycache__/__init__.cpython-310.pyc b/pix2pix/options/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4f289f2eb1e4ae3790d6c5a1faf986711550482b Binary files /dev/null and b/pix2pix/options/__pycache__/__init__.cpython-310.pyc differ diff --git a/pix2pix/options/__pycache__/__init__.cpython-311.pyc b/pix2pix/options/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..79bfbd53ebbb0b606d92476beed7b5253b50d4ce Binary files /dev/null and b/pix2pix/options/__pycache__/__init__.cpython-311.pyc differ diff --git a/pix2pix/options/__pycache__/__init__.cpython-312.pyc b/pix2pix/options/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7589531facde73bb68d0fbb2a9e9a0c231f32897 Binary files /dev/null and b/pix2pix/options/__pycache__/__init__.cpython-312.pyc differ diff --git a/pix2pix/options/__pycache__/base_options.cpython-310.pyc b/pix2pix/options/__pycache__/base_options.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b71b127efc7eecc4331996e1f89d276b1fd6d59e Binary files /dev/null and b/pix2pix/options/__pycache__/base_options.cpython-310.pyc differ diff --git a/pix2pix/options/__pycache__/base_options.cpython-311.pyc b/pix2pix/options/__pycache__/base_options.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4205046aacc543af35e447f6e65ebbb2a92300e8 Binary files /dev/null and b/pix2pix/options/__pycache__/base_options.cpython-311.pyc differ diff --git a/pix2pix/options/__pycache__/base_options.cpython-312.pyc b/pix2pix/options/__pycache__/base_options.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bddfc96da6505218e25c8f48972707999c7f31e5 Binary files /dev/null and b/pix2pix/options/__pycache__/base_options.cpython-312.pyc differ diff --git a/pix2pix/options/__pycache__/test_options.cpython-310.pyc b/pix2pix/options/__pycache__/test_options.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c348769106fa30d3f157afd11d333daf47ec3ce9 Binary files /dev/null and b/pix2pix/options/__pycache__/test_options.cpython-310.pyc differ diff --git a/pix2pix/options/__pycache__/test_options.cpython-311.pyc b/pix2pix/options/__pycache__/test_options.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..da1eb369241b5f8a4c48af08d5715ec019280f9d Binary files /dev/null and b/pix2pix/options/__pycache__/test_options.cpython-311.pyc differ diff --git a/pix2pix/options/__pycache__/test_options.cpython-312.pyc b/pix2pix/options/__pycache__/test_options.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4b4f01e84b2b7edce39734c8497a989208d219ef Binary files /dev/null and b/pix2pix/options/__pycache__/test_options.cpython-312.pyc differ diff --git a/pix2pix/options/base_options.py b/pix2pix/options/base_options.py new file mode 100644 index 0000000000000000000000000000000000000000..494a8e68217849dce4e192c6dde7a2174ca3a81f --- /dev/null +++ b/pix2pix/options/base_options.py @@ -0,0 +1,156 @@ +import argparse +import os +from pix2pix.util import util +import torch +import pix2pix.models +import pix2pix.data +import numpy as np + +class BaseOptions(): + """This class defines options used during both training and test time. + + It also implements several helper functions such as parsing, printing, and saving the options. + It also gathers additional options defined in functions in both dataset class and model class. + """ + + def __init__(self): + """Reset the class; indicates the class hasn't been initailized""" + self.initialized = False + + def initialize(self, parser): + """Define the common options that are used in both training and test.""" + # basic parameters + parser.add_argument('--dataroot', help='path to images (should have subfolders trainA, trainB, valA, valB, etc)') + parser.add_argument('--name', type=str, default='void', help='mahdi_unet_new, scaled_unet') + parser.add_argument('--gpu_ids', type=str, default='0', help='gpu ids: e.g. 0 0,1,2, 0,2. use -1 for CPU') + parser.add_argument('--checkpoints_dir', type=str, default='./pix2pix/checkpoints', help='models are saved here') + # model parameters + parser.add_argument('--model', type=str, default='cycle_gan', help='chooses which model to use. [cycle_gan | pix2pix | test | colorization]') + parser.add_argument('--input_nc', type=int, default=2, help='# of input image channels: 3 for RGB and 1 for grayscale') + parser.add_argument('--output_nc', type=int, default=1, help='# of output image channels: 3 for RGB and 1 for grayscale') + parser.add_argument('--ngf', type=int, default=64, help='# of gen filters in the last conv layer') + parser.add_argument('--ndf', type=int, default=64, help='# of discrim filters in the first conv layer') + parser.add_argument('--netD', type=str, default='basic', help='specify discriminator architecture [basic | n_layers | pixel]. The basic model is a 70x70 PatchGAN. n_layers allows you to specify the layers in the discriminator') + parser.add_argument('--netG', type=str, default='resnet_9blocks', help='specify generator architecture [resnet_9blocks | resnet_6blocks | unet_256 | unet_128]') + parser.add_argument('--n_layers_D', type=int, default=3, help='only used if netD==n_layers') + parser.add_argument('--norm', type=str, default='instance', help='instance normalization or batch normalization [instance | batch | none]') + parser.add_argument('--init_type', type=str, default='normal', help='network initialization [normal | xavier | kaiming | orthogonal]') + parser.add_argument('--init_gain', type=float, default=0.02, help='scaling factor for normal, xavier and orthogonal.') + parser.add_argument('--no_dropout', action='store_true', help='no dropout for the generator') + # dataset parameters + parser.add_argument('--dataset_mode', type=str, default='unaligned', help='chooses how datasets are loaded. [unaligned | aligned | single | colorization]') + parser.add_argument('--direction', type=str, default='AtoB', help='AtoB or BtoA') + parser.add_argument('--serial_batches', action='store_true', help='if true, takes images in order to make batches, otherwise takes them randomly') + parser.add_argument('--num_threads', default=4, type=int, help='# threads for loading data') + parser.add_argument('--batch_size', type=int, default=1, help='input batch size') + parser.add_argument('--load_size', type=int, default=672, help='scale images to this size') + parser.add_argument('--crop_size', type=int, default=672, help='then crop to this size') + parser.add_argument('--max_dataset_size', type=int, default=10000, help='Maximum number of samples allowed per dataset. If the dataset directory contains more than max_dataset_size, only a subset is loaded.') + parser.add_argument('--preprocess', type=str, default='resize_and_crop', help='scaling and cropping of images at load time [resize_and_crop | crop | scale_width | scale_width_and_crop | none]') + parser.add_argument('--no_flip', action='store_true', help='if specified, do not flip the images for data augmentation') + parser.add_argument('--display_winsize', type=int, default=256, help='display window size for both visdom and HTML') + # additional parameters + parser.add_argument('--epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model') + parser.add_argument('--load_iter', type=int, default='0', help='which iteration to load? if load_iter > 0, the code will load models by iter_[load_iter]; otherwise, the code will load models by [epoch]') + parser.add_argument('--verbose', action='store_true', help='if specified, print more debugging information') + parser.add_argument('--suffix', default='', type=str, help='customized suffix: opt.name = opt.name + suffix: e.g., {model}_{netG}_size{load_size}') + + parser.add_argument('--data_dir', type=str, required=False, + help='input files directory images can be .png .jpg .tiff') + parser.add_argument('--output_dir', type=str, required=False, + help='result dir. result depth will be png. vides are JMPG as avi') + parser.add_argument('--savecrops', type=int, required=False) + parser.add_argument('--savewholeest', type=int, required=False) + parser.add_argument('--output_resolution', type=int, required=False, + help='0 for no restriction 1 for resize to input size') + parser.add_argument('--net_receptive_field_size', type=int, required=False) + parser.add_argument('--pix2pixsize', type=int, required=False) + parser.add_argument('--generatevideo', type=int, required=False) + parser.add_argument('--depthNet', type=int, required=False, help='0: midas 1:strurturedRL') + parser.add_argument('--R0', action='store_true') + parser.add_argument('--R20', action='store_true') + parser.add_argument('--Final', action='store_true') + parser.add_argument('--colorize_results', action='store_true') + parser.add_argument('--max_res', type=float, default=np.inf) + + self.initialized = True + return parser + + def gather_options(self): + """Initialize our parser with basic options(only once). + Add additional model-specific and dataset-specific options. + These options are defined in the function + in model and dataset classes. + """ + if not self.initialized: # check if it has been initialized + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser = self.initialize(parser) + + # get the basic options + opt, _ = parser.parse_known_args() + + # modify model-related parser options + model_name = opt.model + model_option_setter = pix2pix.models.get_option_setter(model_name) + parser = model_option_setter(parser, self.isTrain) + opt, _ = parser.parse_known_args() # parse again with new defaults + + # modify dataset-related parser options + dataset_name = opt.dataset_mode + dataset_option_setter = pix2pix.data.get_option_setter(dataset_name) + parser = dataset_option_setter(parser, self.isTrain) + + # save and return the parser + self.parser = parser + #return parser.parse_args() #EVIL + return opt + + def print_options(self, opt): + """Print and save options + + It will print both current options and default values(if different). + It will save options into a text file / [checkpoints_dir] / opt.txt + """ + message = '' + message += '----------------- Options ---------------\n' + for k, v in sorted(vars(opt).items()): + comment = '' + default = self.parser.get_default(k) + if v != default: + comment = '\t[default: %s]' % str(default) + message += '{:>25}: {:<30}{}\n'.format(str(k), str(v), comment) + message += '----------------- End -------------------' + print(message) + + # save to the disk + expr_dir = os.path.join(opt.checkpoints_dir, opt.name) + util.mkdirs(expr_dir) + file_name = os.path.join(expr_dir, '{}_opt.txt'.format(opt.phase)) + with open(file_name, 'wt') as opt_file: + opt_file.write(message) + opt_file.write('\n') + + def parse(self): + """Parse our options, create checkpoints directory suffix, and set up gpu device.""" + opt = self.gather_options() + opt.isTrain = self.isTrain # train or test + + # process opt.suffix + if opt.suffix: + suffix = ('_' + opt.suffix.format(**vars(opt))) if opt.suffix != '' else '' + opt.name = opt.name + suffix + + #self.print_options(opt) + + # set gpu ids + str_ids = opt.gpu_ids.split(',') + opt.gpu_ids = [] + for str_id in str_ids: + id = int(str_id) + if id >= 0: + opt.gpu_ids.append(id) + #if len(opt.gpu_ids) > 0: + # torch.cuda.set_device(opt.gpu_ids[0]) + + self.opt = opt + return self.opt diff --git a/pix2pix/options/test_options.py b/pix2pix/options/test_options.py new file mode 100644 index 0000000000000000000000000000000000000000..cef95de28fbce210fb4addbf96308f128ea5124b --- /dev/null +++ b/pix2pix/options/test_options.py @@ -0,0 +1,22 @@ +from .base_options import BaseOptions + + +class TestOptions(BaseOptions): + """This class includes test options. + + It also includes shared options defined in BaseOptions. + """ + + def initialize(self, parser): + parser = BaseOptions.initialize(self, parser) # define shared options + parser.add_argument('--aspect_ratio', type=float, default=1.0, help='aspect ratio of result images') + parser.add_argument('--phase', type=str, default='test', help='train, val, test, etc') + # Dropout and Batchnorm has different behavioir during training and test. + parser.add_argument('--eval', action='store_true', help='use eval mode during test time.') + parser.add_argument('--num_test', type=int, default=50, help='how many test images to run') + # rewrite devalue values + parser.set_defaults(model='pix2pix4depth') + # To avoid cropping, the load_size should be the same as crop_size + parser.set_defaults(load_size=parser.get_default('crop_size')) + self.isTrain = False + return parser diff --git a/pix2pix/options/train_options.py b/pix2pix/options/train_options.py new file mode 100644 index 0000000000000000000000000000000000000000..a965185e8cdbbae92687a6327174c2bcabec2703 --- /dev/null +++ b/pix2pix/options/train_options.py @@ -0,0 +1,40 @@ +from .base_options import BaseOptions + + +class TrainOptions(BaseOptions): + """This class includes training options. + + It also includes shared options defined in BaseOptions. + """ + + def initialize(self, parser): + parser = BaseOptions.initialize(self, parser) + # visdom and HTML visualization parameters + parser.add_argument('--display_freq', type=int, default=2500, help='frequency of showing training results on screen') + parser.add_argument('--display_ncols', type=int, default=4, help='if positive, display all images in a single visdom web panel with certain number of images per row.') + parser.add_argument('--display_id', type=int, default=1, help='window id of the web display') + parser.add_argument('--display_server', type=str, default="http://localhost", help='visdom server of the web display') + parser.add_argument('--display_env', type=str, default='main', help='visdom display environment name (default is "main")') + parser.add_argument('--display_port', type=int, default=8097, help='visdom port of the web display') + parser.add_argument('--update_html_freq', type=int, default=1000, help='frequency of saving training results to html') + parser.add_argument('--print_freq', type=int, default=100, help='frequency of showing training results on console') + parser.add_argument('--no_html', action='store_true', help='do not save intermediate training results to [opt.checkpoints_dir]/[opt.name]/web/') + # network saving and loading parameters + parser.add_argument('--save_latest_freq', type=int, default=5000, help='frequency of saving the latest results') + parser.add_argument('--save_epoch_freq', type=int, default=10, help='frequency of saving checkpoints at the end of epochs') + parser.add_argument('--save_by_iter', action='store_true', help='whether saves model by iteration') + parser.add_argument('--continue_train', action='store_true', help='continue training: load the latest model') + parser.add_argument('--epoch_count', type=int, default=1, help='the starting epoch count, we save the model by , +, ...') + parser.add_argument('--phase', type=str, default='train', help='train, val, test, etc') + # training parameters + parser.add_argument('--n_epochs', type=int, default=100, help='number of epochs with the initial learning rate') + parser.add_argument('--n_epochs_decay', type=int, default=100, help='number of epochs to linearly decay learning rate to zero') + parser.add_argument('--beta1', type=float, default=0.5, help='momentum term of adam') + parser.add_argument('--lr', type=float, default=0.0001, help='initial learning rate for adam') + parser.add_argument('--gan_mode', type=str, default='lsgan', help='the type of GAN objective. [vanilla| lsgan | wgangp]. vanilla GAN loss is the cross-entropy objective used in the original GAN paper.') + parser.add_argument('--pool_size', type=int, default=50, help='the size of image buffer that stores previously generated images') + parser.add_argument('--lr_policy', type=str, default='linear', help='learning rate policy. [linear | step | plateau | cosine]') + parser.add_argument('--lr_decay_iters', type=int, default=50, help='multiply by a gamma every lr_decay_iters iterations') + + self.isTrain = True + return parser diff --git a/pix2pix/test.py b/pix2pix/test.py new file mode 100644 index 0000000000000000000000000000000000000000..450f77bd03d3093bb39a90f2a1a05276cbcef6d8 --- /dev/null +++ b/pix2pix/test.py @@ -0,0 +1,97 @@ +"""General-purpose test script for image-to-image translation. + +Once you have trained your model with train.py, you can use this script to test the model. +It will load a saved model from '--checkpoints_dir' and save the results to '--results_dir'. + +It first creates model and dataset given the option. It will hard-code some parameters. +It then runs inference for '--num_test' images and save results to an HTML file. + +Example (You need to train models first or download pre-trained models from our website): + Test a CycleGAN model (both sides): + python test.py --dataroot ./datasets/maps --name maps_cyclegan --model cycle_gan + + Test a CycleGAN model (one side only): + python test.py --dataroot datasets/horse2zebra/testA --name horse2zebra_pretrained --model test --no_dropout + + The option '--model test' is used for generating CycleGAN results only for one side. + This option will automatically set '--dataset_mode single', which only loads the images from one set. + On the contrary, using '--model cycle_gan' requires loading and generating results in both directions, + which is sometimes unnecessary. The results will be saved at ./results/. + Use '--results_dir ' to specify the results directory. + + Test a pix2pix model: + python test.py --dataroot ./datasets/facades --name facades_pix2pix --model pix2pix --direction BtoA + +See options/base_options.py and options/test_options.py for more test options. +See training and test tips at: https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/master/docs/tips.md +See frequently asked questions at: https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/master/docs/qa.md +""" +import os +from options.test_options import TestOptions +from data import create_dataset +from models import create_model +from util.visualizer import save_images +from util import html +from PIL import Image +import numpy as np +import torch +from util.guidedfilter import GuidedFilter + +if __name__ == '__main__': + opt = TestOptions().parse() # get test options + # hard-code some parameters for test + opt.num_threads = 0 # test code only supports num_threads = 1 + opt.batch_size = 1 # test code only supports batch_size = 1 + opt.serial_batches = True # disable data shuffling; comment this line if results on randomly chosen images are needed. + opt.no_flip = True # no flip; comment this line if results on flipped images are needed. + opt.display_id = -1 # no visdom display; the test code saves the results to a HTML file. + dataset = create_dataset(opt) # create a dataset given opt.dataset_mode and other options + model = create_model(opt) # create a model given opt.model and other options + model.setup(opt) # regular setup: load and print networks; create schedulers + # create a website + web_dir = os.path.join(opt.results_dir, opt.name, '{}_{}'.format(opt.phase, opt.epoch)) # define the website directory + if opt.load_iter > 0: # load_iter is 0 by default + web_dir = '{:s}_iter{:d}'.format(web_dir, opt.load_iter) + print('creating web directory', web_dir) + # webpage = html.HTML(web_dir, 'Experiment = %s, Phase = %s, Epoch = %s' % (opt.name, opt.phase, opt.epoch)) + # test with eval mode. This only affects layers like batchnorm and dropout. + # For [pix2pix]: we use batchnorm and dropout in the original pix2pix. You can experiment it with and without eval() mode. + # For [CycleGAN]: It should not affect CycleGAN as CycleGAN uses instancenorm without dropout. + normalize_coef = np.float32(2 ** (16)) + + model.eval() + for i, data in enumerate(dataset): + model.set_input_train(data) # unpack data from data loader + model.test() # run inference + visuals = model.get_current_visuals() # get image results + img_path = model.get_image_paths() # get image paths + filename = os.path.basename(img_path[0]) + print('processing (%04d)-th image... %s' % (i, filename)) + + inner = visuals['inner'] + inner = inner.cpu() + inner = torch.squeeze(inner) + inner = inner.numpy() + inner = (inner + 1) / 2 + + out = visuals['fake_B'] + out = out.cpu() + out = torch.squeeze(out) + out = out.numpy() + out = (out+1)/2 + + # out = GuidedFilter(inner, out, 32, 0).smooth.astype('float32') + out = GuidedFilter(inner, out, 64, 0).smooth.astype('float32') + + out = out * (normalize_coef - 1) + out = out.astype('uint16') + out = Image.fromarray(out) + out = out.convert('I;16') + # out = out.resize(input_size) + + save_dirname = os.path.join('results','mahdi_pix2pix_unet_l1_basic','test_latest') + if not os.path.exists(save_dirname): + os.makedirs(save_dirname) + out.save(os.path.join(save_dirname, filename)) + # save_images(webpage, visuals, img_path, aspect_ratio=opt.aspect_ratio, width=opt.display_winsize) + # webpage.save() # save the HTML diff --git a/pix2pix/train.py b/pix2pix/train.py new file mode 100644 index 0000000000000000000000000000000000000000..b3e198610e392a0a151a4a17dc0551a337698cde --- /dev/null +++ b/pix2pix/train.py @@ -0,0 +1,67 @@ +"""General-purpose training script for image-to-image translation. + +This script works for various models (with option '--model': e.g., pix2pix, cyclegan, colorization) and +different datasets (with option '--dataset_mode': e.g., aligned, unaligned, single, colorization). +You need to specify the dataset ('--dataroot'), experiment name ('--name'), and model ('--model'). + +It first creates model, dataset, and visualizer given the option. +It then does standard network training. During the training, it also visualize/save the images, print/save the loss plot, and save models. +The script supports continue/resume training. Use '--continue_train' to resume your previous training. + +Example: + Train a CycleGAN model: + python train.py --dataroot ./datasets/maps --name maps_cyclegan --model cycle_gan + Train a pix2pix model: + python train.py --dataroot ./datasets/facades --name facades_pix2pix --model pix2pix --direction BtoA + +See options/base_options.py and options/train_options.py for more training options. +See training and test tips at: https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/master/docs/tips.md +See frequently asked questions at: https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/master/docs/qa.md +""" +import time +from options.train_options import TrainOptions +from data import create_dataset +from models import create_model +from util.visualizer import Visualizer + +if __name__ == '__main__': + opt = TrainOptions().parse() # get training options + # opt.serial_batches = True + dataset = create_dataset(opt) # create a dataset given opt.dataset_mode and other options + dataset_size = len(dataset) # get the number of images in the dataset. + print('The number of training images = %d' % dataset_size) + + model = create_model(opt) # create a model given opt.model and other options + model.setup(opt) # regular setup: load and print networks; create schedulers + visualizer = Visualizer(opt) # create a visualizer that display/save images and plots + + for epoch in range(opt.epoch_count, opt.n_epochs + opt.n_epochs_decay + 1): # outer loop for different epochs; we save the model by , + + epoch_start_time = time.time() # timer for entire epoch + iter_data_time = time.time() # timer for data loading per iteration + epoch_iter = 0 # the number of training iterations in current epoch, reset to 0 every epoch + visualizer.reset() # reset the visualizer: make sure it saves the results to HTML at least once every epoch + model.update_learning_rate() # update learning rates in the beginning of every epoch. + for i, data in enumerate(dataset): # inner loop within one epoch + iter_start_time = time.time() # timer for computation per iteration + + epoch_iter += opt.batch_size + model.set_input_train(data) # unpack data from dataset and apply preprocessing + model.optimize_parameters() # calculate loss functions, get gradients, update network weights + + if epoch_iter == dataset_size: + model.compute_visuals() + visualizer.display_current_results(model.get_current_visuals(), epoch, True) + + if epoch_iter % 500 == 0 or epoch_iter == dataset_size: # print training losses and save logging information to the disk + losses = model.get_current_losses() + t_data = iter_start_time - iter_data_time + t_comp = (time.time() - iter_start_time) / opt.batch_size + visualizer.print_current_losses(epoch, epoch_iter, losses, t_comp, t_data) + + + if epoch % opt.save_epoch_freq == 0: # cache our model every epochs + print('saving the model at the end of epoch %d' % epoch) + model.save_networks('latest') + model.save_networks(epoch) + + print('End of epoch %d / %d \t Time Taken: %d sec' % (epoch, opt.n_epochs + opt.n_epochs_decay, time.time() - epoch_start_time)) diff --git a/pix2pix/util/__init__.py b/pix2pix/util/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b73b864dc3cac1425752b6e0f60da59a47094813 --- /dev/null +++ b/pix2pix/util/__init__.py @@ -0,0 +1 @@ +"""This package includes a miscellaneous collection of useful helper functions.""" diff --git a/pix2pix/util/__pycache__/__init__.cpython-310.pyc b/pix2pix/util/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..38c0c87b0c421e7a46836293b8669e90f8379723 Binary files /dev/null and b/pix2pix/util/__pycache__/__init__.cpython-310.pyc differ diff --git a/pix2pix/util/__pycache__/__init__.cpython-311.pyc b/pix2pix/util/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1594e22ee1de7a2ffa0af685adf4e1660d8d5c42 Binary files /dev/null and b/pix2pix/util/__pycache__/__init__.cpython-311.pyc differ diff --git a/pix2pix/util/__pycache__/__init__.cpython-312.pyc b/pix2pix/util/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..aeceb9e3ef076537087d6f528af61eac2abdd913 Binary files /dev/null and b/pix2pix/util/__pycache__/__init__.cpython-312.pyc differ diff --git a/pix2pix/util/__pycache__/guidedfilter.cpython-310.pyc b/pix2pix/util/__pycache__/guidedfilter.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..07e590a4dd1a29b915c0203f0c3982edbaadcda2 Binary files /dev/null and b/pix2pix/util/__pycache__/guidedfilter.cpython-310.pyc differ diff --git a/pix2pix/util/__pycache__/guidedfilter.cpython-312.pyc b/pix2pix/util/__pycache__/guidedfilter.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..449098eb43e76255a43d2062f60d61b315181734 Binary files /dev/null and b/pix2pix/util/__pycache__/guidedfilter.cpython-312.pyc differ diff --git a/pix2pix/util/__pycache__/util.cpython-310.pyc b/pix2pix/util/__pycache__/util.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a845472c96cf003fbe6da95a6e857fee8955cffc Binary files /dev/null and b/pix2pix/util/__pycache__/util.cpython-310.pyc differ diff --git a/pix2pix/util/__pycache__/util.cpython-311.pyc b/pix2pix/util/__pycache__/util.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..82972ae3c76fbb009490980cd2a46377cb4c7e02 Binary files /dev/null and b/pix2pix/util/__pycache__/util.cpython-311.pyc differ diff --git a/pix2pix/util/__pycache__/util.cpython-312.pyc b/pix2pix/util/__pycache__/util.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0424be9beb1403936b063b6f148db6405c5fe2ee Binary files /dev/null and b/pix2pix/util/__pycache__/util.cpython-312.pyc differ diff --git a/pix2pix/util/get_data.py b/pix2pix/util/get_data.py new file mode 100644 index 0000000000000000000000000000000000000000..1efa19f2de3f39213c0a433d9570c8f7bb928709 --- /dev/null +++ b/pix2pix/util/get_data.py @@ -0,0 +1,110 @@ +from __future__ import print_function +import os +import tarfile +import requests +from warnings import warn +from zipfile import ZipFile +from bs4 import BeautifulSoup +from os.path import abspath, isdir, join, basename + + +class GetData(object): + """A Python script for downloading CycleGAN or pix2pix datasets. + + Parameters: + technique (str) -- One of: 'cyclegan' or 'pix2pix'. + verbose (bool) -- If True, print additional information. + + Examples: + >>> from util.get_data import GetData + >>> gd = GetData(technique='cyclegan') + >>> new_data_path = gd.get(save_path='./datasets') # options will be displayed. + + Alternatively, You can use bash scripts: 'scripts/download_pix2pix_model.sh' + and 'scripts/download_cyclegan_model.sh'. + """ + + def __init__(self, technique='cyclegan', verbose=True): + url_dict = { + 'pix2pix': 'http://efrosgans.eecs.berkeley.edu/pix2pix/datasets/', + 'cyclegan': 'https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets' + } + self.url = url_dict.get(technique.lower()) + self._verbose = verbose + + def _print(self, text): + if self._verbose: + print(text) + + @staticmethod + def _get_options(r): + soup = BeautifulSoup(r.text, 'lxml') + options = [h.text for h in soup.find_all('a', href=True) + if h.text.endswith(('.zip', 'tar.gz'))] + return options + + def _present_options(self): + r = requests.get(self.url) + options = self._get_options(r) + print('Options:\n') + for i, o in enumerate(options): + print("{0}: {1}".format(i, o)) + choice = input("\nPlease enter the number of the " + "dataset above you wish to download:") + return options[int(choice)] + + def _download_data(self, dataset_url, save_path): + if not isdir(save_path): + os.makedirs(save_path) + + base = basename(dataset_url) + temp_save_path = join(save_path, base) + + with open(temp_save_path, "wb") as f: + r = requests.get(dataset_url) + f.write(r.content) + + if base.endswith('.tar.gz'): + obj = tarfile.open(temp_save_path) + elif base.endswith('.zip'): + obj = ZipFile(temp_save_path, 'r') + else: + raise ValueError("Unknown File Type: {0}.".format(base)) + + self._print("Unpacking Data...") + obj.extractall(save_path) + obj.close() + os.remove(temp_save_path) + + def get(self, save_path, dataset=None): + """ + + Download a dataset. + + Parameters: + save_path (str) -- A directory to save the data to. + dataset (str) -- (optional). A specific dataset to download. + Note: this must include the file extension. + If None, options will be presented for you + to choose from. + + Returns: + save_path_full (str) -- the absolute path to the downloaded data. + + """ + if dataset is None: + selected_dataset = self._present_options() + else: + selected_dataset = dataset + + save_path_full = join(save_path, selected_dataset.split('.')[0]) + + if isdir(save_path_full): + warn("\n'{0}' already exists. Voiding Download.".format( + save_path_full)) + else: + self._print('Downloading Data...') + url = "{0}/{1}".format(self.url, selected_dataset) + self._download_data(url, save_path=save_path) + + return abspath(save_path_full) diff --git a/pix2pix/util/guidedfilter.py b/pix2pix/util/guidedfilter.py new file mode 100644 index 0000000000000000000000000000000000000000..e01ff9150be3b3f75c47535c3c30bbcccc69bc89 --- /dev/null +++ b/pix2pix/util/guidedfilter.py @@ -0,0 +1,47 @@ +import numpy as np + +class GuidedFilter(): + def __init__(self, source, reference, r=64, eps= 0.05**2): + self.source = source; + self.reference = reference; + self.r = r + self.eps = eps + + self.smooth = self.guidedfilter(self.source,self.reference,self.r,self.eps) + + def boxfilter(self,img, r): + (rows, cols) = img.shape + imDst = np.zeros_like(img) + + imCum = np.cumsum(img, 0) + imDst[0 : r+1, :] = imCum[r : 2*r+1, :] + imDst[r+1 : rows-r, :] = imCum[2*r+1 : rows, :] - imCum[0 : rows-2*r-1, :] + imDst[rows-r: rows, :] = np.tile(imCum[rows-1, :], [r, 1]) - imCum[rows-2*r-1 : rows-r-1, :] + + imCum = np.cumsum(imDst, 1) + imDst[:, 0 : r+1] = imCum[:, r : 2*r+1] + imDst[:, r+1 : cols-r] = imCum[:, 2*r+1 : cols] - imCum[:, 0 : cols-2*r-1] + imDst[:, cols-r: cols] = np.tile(imCum[:, cols-1], [r, 1]).T - imCum[:, cols-2*r-1 : cols-r-1] + + return imDst + + def guidedfilter(self,I, p, r, eps): + (rows, cols) = I.shape + N = self.boxfilter(np.ones([rows, cols]), r) + + meanI = self.boxfilter(I, r) / N + meanP = self.boxfilter(p, r) / N + meanIp = self.boxfilter(I * p, r) / N + covIp = meanIp - meanI * meanP + + meanII = self.boxfilter(I * I, r) / N + varI = meanII - meanI * meanI + + a = covIp / (varI + eps) + b = meanP - a * meanI + + meanA = self.boxfilter(a, r) / N + meanB = self.boxfilter(b, r) / N + + q = meanA * I + meanB + return q \ No newline at end of file diff --git a/pix2pix/util/html.py b/pix2pix/util/html.py new file mode 100644 index 0000000000000000000000000000000000000000..c0c4e6a66ba5a34e30cee3beb13e21465c72ef38 --- /dev/null +++ b/pix2pix/util/html.py @@ -0,0 +1,86 @@ +import dominate +from dominate.tags import meta, h3, table, tr, td, p, a, img, br +import os + + +class HTML: + """This HTML class allows us to save images and write texts into a single HTML file. + + It consists of functions such as (add a text header to the HTML file), + (add a row of images to the HTML file), and (save the HTML to the disk). + It is based on Python library 'dominate', a Python library for creating and manipulating HTML documents using a DOM API. + """ + + def __init__(self, web_dir, title, refresh=0): + """Initialize the HTML classes + + Parameters: + web_dir (str) -- a directory that stores the webpage. HTML file will be created at /index.html; images will be saved at 0: + with self.doc.head: + meta(http_equiv="refresh", content=str(refresh)) + + def get_image_dir(self): + """Return the directory that stores images""" + return self.img_dir + + def add_header(self, text): + """Insert a header to the HTML file + + Parameters: + text (str) -- the header text + """ + with self.doc: + h3(text) + + def add_images(self, ims, txts, links, width=400): + """add images to the HTML file + + Parameters: + ims (str list) -- a list of image paths + txts (str list) -- a list of image names shown on the website + links (str list) -- a list of hyperref links; when you click an image, it will redirect you to a new page + """ + self.t = table(border=1, style="table-layout: fixed;") # Insert a table + self.doc.add(self.t) + with self.t: + with tr(): + for im, txt, link in zip(ims, txts, links): + with td(style="word-wrap: break-word;", halign="center", valign="top"): + with p(): + with a(href=os.path.join('images', link)): + img(style="width:%dpx" % width, src=os.path.join('images', im)) + br() + p(txt) + + def save(self): + """save the current content to the HMTL file""" + html_file = '%s/index.html' % self.web_dir + f = open(html_file, 'wt') + f.write(self.doc.render()) + f.close() + + +if __name__ == '__main__': # we show an example usage here. + html = HTML('web/', 'test_html') + html.add_header('hello world') + + ims, txts, links = [], [], [] + for n in range(4): + ims.append('image_%d.png' % n) + txts.append('text_%d' % n) + links.append('image_%d.png' % n) + html.add_images(ims, txts, links) + html.save() diff --git a/pix2pix/util/image_pool.py b/pix2pix/util/image_pool.py new file mode 100644 index 0000000000000000000000000000000000000000..3f57d7fb08b58f4484d777b0dfbab58c63adeea0 --- /dev/null +++ b/pix2pix/util/image_pool.py @@ -0,0 +1,54 @@ +import random +import torch + + +class ImagePool(): + """This class implements an image buffer that stores previously generated images. + + This buffer enables us to update discriminators using a history of generated images + rather than the ones produced by the latest generators. + """ + + def __init__(self, pool_size): + """Initialize the ImagePool class + + Parameters: + pool_size (int) -- the size of image buffer, if pool_size=0, no buffer will be created + """ + self.pool_size = pool_size + if self.pool_size > 0: # create an empty pool + self.num_imgs = 0 + self.images = [] + + def query(self, images): + """Return an image from the pool. + + Parameters: + images: the latest generated images from the generator + + Returns images from the buffer. + + By 50/100, the buffer will return input images. + By 50/100, the buffer will return images previously stored in the buffer, + and insert the current images to the buffer. + """ + if self.pool_size == 0: # if the buffer size is 0, do nothing + return images + return_images = [] + for image in images: + image = torch.unsqueeze(image.data, 0) + if self.num_imgs < self.pool_size: # if the buffer is not full; keep inserting current images to the buffer + self.num_imgs = self.num_imgs + 1 + self.images.append(image) + return_images.append(image) + else: + p = random.uniform(0, 1) + if p > 0.5: # by 50% chance, the buffer will return a previously stored image, and insert the current image into the buffer + random_id = random.randint(0, self.pool_size - 1) # randint is inclusive + tmp = self.images[random_id].clone() + self.images[random_id] = image + return_images.append(tmp) + else: # by another 50% chance, the buffer will return the current image + return_images.append(image) + return_images = torch.cat(return_images, 0) # collect all the images and return + return return_images diff --git a/pix2pix/util/util.py b/pix2pix/util/util.py new file mode 100644 index 0000000000000000000000000000000000000000..a688b7ae897431dca28d31d024e9191fe4d01402 --- /dev/null +++ b/pix2pix/util/util.py @@ -0,0 +1,105 @@ +"""This module contains simple helper functions """ +from __future__ import print_function +import torch +import numpy as np +from PIL import Image +import os + + +def tensor2im(input_image, imtype=np.uint16): + """"Converts a Tensor array into a numpy image array. + + Parameters: + input_image (tensor) -- the input image tensor array + imtype (type) -- the desired type of the converted numpy array + """ + if not isinstance(input_image, np.ndarray): + if isinstance(input_image, torch.Tensor): # get the data from a variable + image_tensor = input_image.data + else: + return input_image + image_numpy = torch.squeeze(image_tensor).cpu().numpy() # convert it into a numpy array + image_numpy = (image_numpy + 1) / 2.0 * (2**16-1) # + else: # if it is a numpy array, do nothing + image_numpy = input_image + return image_numpy.astype(imtype) + + +def diagnose_network(net, name='network'): + """Calculate and print the mean of average absolute(gradients) + + Parameters: + net (torch network) -- Torch network + name (str) -- the name of the network + """ + mean = 0.0 + count = 0 + for param in net.parameters(): + if param.grad is not None: + mean += torch.mean(torch.abs(param.grad.data)) + count += 1 + if count > 0: + mean = mean / count + print(name) + print(mean) + + +def save_image(image_numpy, image_path, aspect_ratio=1.0): + """Save a numpy image to the disk + + Parameters: + image_numpy (numpy array) -- input numpy array + image_path (str) -- the path of the image + """ + image_pil = Image.fromarray(image_numpy) + + image_pil = image_pil.convert('I;16') + + # image_pil = Image.fromarray(image_numpy) + # h, w, _ = image_numpy.shape + # + # if aspect_ratio > 1.0: + # image_pil = image_pil.resize((h, int(w * aspect_ratio)), Image.BICUBIC) + # if aspect_ratio < 1.0: + # image_pil = image_pil.resize((int(h / aspect_ratio), w), Image.BICUBIC) + + image_pil.save(image_path) + + +def print_numpy(x, val=True, shp=False): + """Print the mean, min, max, median, std, and size of a numpy array + + Parameters: + val (bool) -- if print the values of the numpy array + shp (bool) -- if print the shape of the numpy array + """ + x = x.astype(np.float64) + if shp: + print('shape,', x.shape) + if val: + x = x.flatten() + print('mean = %3.3f, min = %3.3f, max = %3.3f, median = %3.3f, std=%3.3f' % ( + np.mean(x), np.min(x), np.max(x), np.median(x), np.std(x))) + + +def mkdirs(paths): + """create empty directories if they don't exist + + Parameters: + paths (str list) -- a list of directory paths + """ + if isinstance(paths, list) and not isinstance(paths, str): + for path in paths: + mkdir(path) + else: + mkdir(paths) + + +def mkdir(path): + """create a single empty directory if it didn't exist + + Parameters: + path (str) -- a single directory path + """ + if not os.path.exists(path): + os.makedirs(path) diff --git a/pix2pix/util/visualizer.py b/pix2pix/util/visualizer.py new file mode 100644 index 0000000000000000000000000000000000000000..ebb281b0923b5eb8bec512a2bb2a5719554d5ced --- /dev/null +++ b/pix2pix/util/visualizer.py @@ -0,0 +1,166 @@ +import numpy as np +import os +import sys +import ntpath +import time +from . import util, html +from subprocess import Popen, PIPE +import torch + + +if sys.version_info[0] == 2: + VisdomExceptionBase = Exception +else: + VisdomExceptionBase = ConnectionError + + +def save_images(webpage, visuals, image_path, aspect_ratio=1.0, width=256): + """Save images to the disk. + + Parameters: + webpage (the HTML class) -- the HTML webpage class that stores these imaegs (see html.py for more details) + visuals (OrderedDict) -- an ordered dictionary that stores (name, images (either tensor or numpy) ) pairs + image_path (str) -- the string is used to create image paths + aspect_ratio (float) -- the aspect ratio of saved images + width (int) -- the images will be resized to width x width + + This function will save images stored in 'visuals' to the HTML file specified by 'webpage'. + """ + image_dir = webpage.get_image_dir() + short_path = ntpath.basename(image_path[0]) + name = os.path.splitext(short_path)[0] + + webpage.add_header(name) + ims, txts, links = [], [], [] + + for label, im_data in visuals.items(): + im = util.tensor2im(im_data) + image_name = '%s_%s.png' % (name, label) + save_path = os.path.join(image_dir, image_name) + util.save_image(im, save_path, aspect_ratio=aspect_ratio) + ims.append(image_name) + txts.append(label) + links.append(image_name) + webpage.add_images(ims, txts, links, width=width) + + +class Visualizer(): + """This class includes several functions that can display/save images and print/save logging information. + + It uses a Python library 'visdom' for display, and a Python library 'dominate' (wrapped in 'HTML') for creating HTML files with images. + """ + + def __init__(self, opt): + """Initialize the Visualizer class + + Parameters: + opt -- stores all the experiment flags; needs to be a subclass of BaseOptions + Step 1: Cache the training/test options + Step 2: connect to a visdom server + Step 3: create an HTML object for saveing HTML filters + Step 4: create a logging file to store training losses + """ + self.opt = opt # cache the option + self.display_id = opt.display_id + self.use_html = opt.isTrain and not opt.no_html + self.win_size = opt.display_winsize + self.name = opt.name + self.port = opt.display_port + self.saved = False + + if self.use_html: # create an HTML object at /web/; images will be saved under /web/images/ + self.web_dir = os.path.join(opt.checkpoints_dir, opt.name, 'web') + self.img_dir = os.path.join(self.web_dir, 'images') + print('create web directory %s...' % self.web_dir) + util.mkdirs([self.web_dir, self.img_dir]) + # create a logging file to store training losses + self.log_name = os.path.join(opt.checkpoints_dir, opt.name, 'loss_log.txt') + with open(self.log_name, "a") as log_file: + now = time.strftime("%c") + log_file.write('================ Training Loss (%s) ================\n' % now) + + def reset(self): + """Reset the self.saved status""" + self.saved = False + + def create_visdom_connections(self): + """If the program could not connect to Visdom server, this function will start a new server at port < self.port > """ + cmd = sys.executable + ' -m visdom.server -p %d &>/dev/null &' % self.port + print('\n\nCould not connect to Visdom server. \n Trying to start a server....') + print('Command: %s' % cmd) + Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE) + + def display_current_results(self, visuals, epoch, save_result): + """Display current results on visdom; save current results to an HTML file. + + Parameters: + visuals (OrderedDict) - - dictionary of images to display or save + epoch (int) - - the current epoch + save_result (bool) - - if save the current results to an HTML file + """ + if self.use_html and (save_result or not self.saved): # save images to an HTML file if they haven't been saved. + self.saved = True + # save images to the disk + for label, image in visuals.items(): + image_numpy = util.tensor2im(image) + img_path = os.path.join(self.img_dir, 'epoch%.3d_%s.png' % (epoch, label)) + util.save_image(image_numpy, img_path) + + # update website + webpage = html.HTML(self.web_dir, 'Experiment name = %s' % self.name, refresh=1) + for n in range(epoch, 0, -1): + webpage.add_header('epoch [%d]' % n) + ims, txts, links = [], [], [] + + for label, image_numpy in visuals.items(): + # image_numpy = util.tensor2im(image) + img_path = 'epoch%.3d_%s.png' % (n, label) + ims.append(img_path) + txts.append(label) + links.append(img_path) + webpage.add_images(ims, txts, links, width=self.win_size) + webpage.save() + + # def plot_current_losses(self, epoch, counter_ratio, losses): + # """display the current losses on visdom display: dictionary of error labels and values + # + # Parameters: + # epoch (int) -- current epoch + # counter_ratio (float) -- progress (percentage) in the current epoch, between 0 to 1 + # losses (OrderedDict) -- training losses stored in the format of (name, float) pairs + # """ + # if not hasattr(self, 'plot_data'): + # self.plot_data = {'X': [], 'Y': [], 'legend': list(losses.keys())} + # self.plot_data['X'].append(epoch + counter_ratio) + # self.plot_data['Y'].append([losses[k] for k in self.plot_data['legend']]) + # try: + # self.vis.line( + # X=np.stack([np.array(self.plot_data['X'])] * len(self.plot_data['legend']), 1), + # Y=np.array(self.plot_data['Y']), + # opts={ + # 'title': self.name + ' loss over time', + # 'legend': self.plot_data['legend'], + # 'xlabel': 'epoch', + # 'ylabel': 'loss'}, + # win=self.display_id) + # except VisdomExceptionBase: + # self.create_visdom_connections() + + # losses: same format as |losses| of plot_current_losses + def print_current_losses(self, epoch, iters, losses, t_comp, t_data): + """print current losses on console; also save the losses to the disk + + Parameters: + epoch (int) -- current epoch + iters (int) -- current training iteration during this epoch (reset to 0 at the end of every epoch) + losses (OrderedDict) -- training losses stored in the format of (name, float) pairs + t_comp (float) -- computational time per data point (normalized by batch_size) + t_data (float) -- data loading time per data point (normalized by batch_size) + """ + message = '(epoch: %d, iters: %d, time: %.3f, data: %.3f) ' % (epoch, iters, t_comp, t_data) + for k, v in losses.items(): + message += '%s: %.3f ' % (k, v) + + print(message) # print the message + with open(self.log_name, "a") as log_file: + log_file.write('%s\n' % message) # save the message diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..cc285516c1382a9afd6a155e7d17bfabb228a53a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,23 @@ +# Requirements for running in standalone mode +# First, install the corect version of PyTorch! +# PyTorch Compute Platform must match the configuration of the hardware. + +# pip install -r requirements.txt +torch +gradio>=3.38.0,<4.0 # User UI +timm~=0.9.2 # For midas +matplotlib +trimesh # For creating simple meshes +numba>=0.57.0 # Speeding up CPU stereoimage generation +vispy>=0.13.0 +rembg>=2.0.50 # Remove background +moviepy>=1.0.2,<2.0 +transforms3d>=0.4.1 +imageio>=2.4.1,<3.0 +imageio-ffmpeg +networkx>=2.5 +diffusers>=0.20.1 # For Marigold +pyqt5; sys_platform == 'windows' +pyqt6; sys_platform != 'windows' +PyOpenGL>=3.1.7; sys_platform == 'darwin' +https://github.com/huchenlei/Depth-Anything/releases/download/v1.0.0/depth_anything-2024.1.22.0-py2.py3-none-any.whl diff --git a/scripts/depthmap.py b/scripts/depthmap.py new file mode 100644 index 0000000000000000000000000000000000000000..599cc5f485c89a6db5b02ff0c5a02692863aecee --- /dev/null +++ b/scripts/depthmap.py @@ -0,0 +1,104 @@ +import traceback +import gradio as gr +from modules import shared +import modules.scripts as scripts +from PIL import Image + +from src import backbone +from src import common_ui +from src.core import core_generation_funnel +from src.gradio_args_transport import GradioComponentBundle +from src.misc import * + + +class Script(scripts.Script): + def title(self): + return SCRIPT_NAME + + def show(self, is_img2img): + return True + + def ui(self, is_img2img): + gr.HTML() # Work around a Gradio bug + with gr.Column(variant='panel'): + gr.HTML() # Work around a Gradio bug + ret = common_ui.main_ui_panel(False) + ret += ret.enkey_tail() + return ret.enkey_body() + + # run from script in txt2img or img2img + def run(self, p, *inputs): + from modules import processing + from modules.processing import create_infotext + + inputs = GradioComponentBundle.enkey_to_dict(inputs) + + # sd process + processed = processing.process_images(p) + processed.sampler = p.sampler # for create_infotext + processed.tiling = p.tiling # for create_infotext + + inputimages = [] + for count in range(0, len(processed.images)): + # skip first grid image + if count == 0 and len(processed.images) > 1 and shared.opts.return_grid: + continue + inputimages.append(processed.images[count]) + + gen_obj = core_generation_funnel(p.outpath_samples, inputimages, None, None, inputs, backbone.gather_ops()) + + for input_i, type, result in gen_obj: + if not isinstance(result, Image.Image): + continue + + # get generation parameters + # TODO: could reuse + if hasattr(processed, 'all_prompts') and shared.opts.enable_pnginfo: + info = create_infotext( + p, processed.all_prompts, processed.all_seeds, processed.all_subseeds, "", 0, input_i) + else: + info = None + + processed.images.append(result) + if inputs["save_outputs"]: + try: + suffix = "" if type == "depth" else f"{type}" + backbone.save_image(result, path=p.outpath_samples, basename="", seed=processed.all_seeds[input_i], + prompt=processed.all_prompts[input_i], extension=shared.opts.samples_format, + info=info, + p=processed, + suffix=suffix) + except Exception as e: + if not ('image has wrong mode' in str(e) or 'I;16' in str(e)): + raise e + print('Catched exception: image has wrong mode!') + traceback.print_exc() + return processed + + +# TODO: some of them may be put into the main ui pane +# TODO: allow in standalone mode +def on_ui_settings(): + section = ('depthmap-script', "Depthmap extension") + + def add_option(name, default_value, description, name_prefix='depthmap_script'): + shared.opts.add_option(f"{name_prefix}_{name}", shared.OptionInfo(default_value, description, section=section)) + + add_option('keepmodels', False, "Do not unload depth and pix2pix models.") + + add_option('boost_rmax', 1600, "Maximum wholesize for boost (Rmax)") + add_option('marigold_ensembles', 5, "How many ensembles to use for Marigold") + add_option('marigold_steps', 10, "How many denoising steps to use for Marigold") + + add_option('save_ply', False, "Save additional PLY file with 3D inpainted mesh.") + add_option('show_3d', True, "Enable showing 3D Meshes in output tab. (Experimental)") + add_option('show_3d_inpaint', True, "Also show 3D Inpainted Mesh in 3D Mesh output tab. (Experimental)") + add_option('mesh_maxsize', 2048, "Max size for generating simple mesh.") + + add_option('gen_heatmap_from_ui', False, "Show an option to generate HeatMap in the UI") + add_option('extra_stereomodes', False, "Enable more possible outputs for stereoimage generation") + + +from modules import script_callbacks +script_callbacks.on_ui_settings(on_ui_settings) +script_callbacks.on_ui_tabs(lambda: [(common_ui.on_ui_tabs(), "Depth", "depthmap_interface")]) diff --git a/scripts/depthmap_api.py b/scripts/depthmap_api.py new file mode 100644 index 0000000000000000000000000000000000000000..088b0d7c6e4e90688bf3c1bf01d58e2c07ab5b36 --- /dev/null +++ b/scripts/depthmap_api.py @@ -0,0 +1,186 @@ +# DO NOT HOST PUBLICLY - SECURITY RISKS! +# (the API will only be on with --api starting option) +# Currently no API stability guarantees are provided - API may break on any new commit (but hopefully won't). + +import os +import numpy as np +from fastapi import FastAPI, Body +from fastapi.exceptions import HTTPException +from PIL import Image + +import gradio as gr + +from typing import Dict, List +from modules.api import api + +from src.core import core_generation_funnel, run_makevideo +from src.misc import SCRIPT_VERSION +from src import backbone +from src.common_constants import GenerationOptions as go + + +def encode_to_base64(image): + if type(image) is str: + return image + elif type(image) is Image.Image: + return api.encode_pil_to_base64(image) + elif type(image) is np.ndarray: + return encode_np_to_base64(image) + else: + return "" + + +def encode_np_to_base64(image): + pil = Image.fromarray(image) + return api.encode_pil_to_base64(pil) + + +def to_base64_PIL(encoding: str): + return Image.fromarray(np.array(api.decode_base64_to_image(encoding)).astype('uint8')) + + +def depth_api(_: gr.Blocks, app: FastAPI): + @app.get("/depth/version") + async def version(): + return {"version": SCRIPT_VERSION} + + @app.get("/depth/get_options") + async def get_options(): + return {"options": sorted([x.name.lower() for x in go])} + + # TODO: some potential inputs not supported (like custom depthmaps) + @app.post("/depth/generate") + async def process( + depth_input_images: List[str] = Body([], title='Input Images'), + options: Dict[str, object] = Body("options", title='Generation options'), + ): + # TODO: restrict mesh options + + if len(depth_input_images) == 0: + raise HTTPException(status_code=422, detail="No images supplied") + print(f"Processing {str(len(depth_input_images))} images trough the API") + + pil_images = [] + for input_image in depth_input_images: + pil_images.append(to_base64_PIL(input_image)) + outpath = backbone.get_outpath() + gen_obj = core_generation_funnel(outpath, pil_images, None, None, options) + + results_based = [] + for count, type, result in gen_obj: + if not isinstance(result, Image.Image): + continue + results_based += [encode_to_base64(result)] + + return {"images": results_based, "info": "Success"} + + @app.post("/depth/generate/video") + async def process_video( + depth_input_images: List[str] = Body([], title='Input Images'), + options: Dict[str, object] = Body("options", title='Generation options'), + ): + if len(depth_input_images) == 0: + raise HTTPException(status_code=422, detail="No images supplied") + print(f"Processing {str(len(depth_input_images))} images trough the API") + + # You can use either these strings, or integers + available_models = { + 'res101': 0, + 'dpt_beit_large_512': 1, #midas 3.1 + 'dpt_beit_large_384': 2, #midas 3.1 + 'dpt_large_384': 3, #midas 3.0 + 'dpt_hybrid_384': 4, #midas 3.0 + 'midas_v21': 5, + 'midas_v21_small': 6, + 'zoedepth_n': 7, #indoor + 'zoedepth_k': 8, #outdoor + 'zoedepth_nk': 9, + 'marigold_v1': 10, + 'depth_anything': 11, + 'depth_anything_v2_small': 12, + 'depth_anything_v2_base': 13, + 'depth_anything_v2_large': 14 + } + + model_type = options["model_type"] + + model_id = None + if isinstance(model_type, str): + # Check if the string is in the available_models dictionary + if model_type in available_models: + model_id = available_models[model_type] + else: + available_strings = list(available_models.keys()) + raise HTTPException(status_code=400, detail={'error': 'Invalid model string', 'available_models': available_strings}) + elif isinstance(model_type, int): + model_id = model_type + else: + raise HTTPException(status_code=400, detail={'error': 'Invalid model parameter type'}) + + options["model_type"] = model_id + + video_parameters = options["video_parameters"] + + required_params = ["vid_numframes", "vid_fps", "vid_traj", "vid_shift", "vid_border", "dolly", "vid_format", "vid_ssaa", "output_filename"] + + missing_params = [param for param in required_params if param not in video_parameters] + + if missing_params: + raise HTTPException(status_code=400, detail={'error': f"Missing required parameter(s): {', '.join(missing_params)}"}) + + vid_numframes = video_parameters["vid_numframes"] + vid_fps = video_parameters["vid_fps"] + vid_traj = video_parameters["vid_traj"] + vid_shift = video_parameters["vid_shift"] + vid_border = video_parameters["vid_border"] + dolly = video_parameters["dolly"] + vid_format = video_parameters["vid_format"] + vid_ssaa = int(video_parameters["vid_ssaa"]) + + output_filename = video_parameters["output_filename"] + output_path = os.path.dirname(output_filename) + basename, extension = os.path.splitext(os.path.basename(output_filename)) + + # Comparing video_format with the extension + if vid_format != extension[1:]: + raise HTTPException(status_code=400, detail={'error': f"Video format '{vid_format}' does not match with the extension '{extension}'."}) + + pil_images = [] + for input_image in depth_input_images: + pil_images.append(to_base64_PIL(input_image)) + outpath = backbone.get_outpath() + + mesh_fi_filename = video_parameters.get('mesh_fi_filename', None) + + if mesh_fi_filename and os.path.exists(mesh_fi_filename): + mesh_fi = mesh_fi_filename + print("Loaded existing mesh from: ", mesh_fi) + else: + # If there is no mesh file generate it. + options["GEN_INPAINTED_MESH"] = True + + gen_obj = core_generation_funnel(outpath, pil_images, None, None, options) + + mesh_fi = None + for count, type, result in gen_obj: + if type == 'inpainted_mesh': + mesh_fi = result + break + + if mesh_fi: + print("Created mesh in: ", mesh_fi) + else: + raise HTTPException(status_code=400, detail={'error': "The mesh has not been created"}) + + run_makevideo(mesh_fi, vid_numframes, vid_fps, vid_traj, vid_shift, vid_border, dolly, vid_format, vid_ssaa, output_path, basename) + + return {"info": "Success"} + + +try: + import modules.script_callbacks as script_callbacks + if backbone.get_cmd_opt('api', False): + script_callbacks.on_app_started(depth_api) + print("Started the depthmap API. DO NOT HOST PUBLICLY - SECURITY RISKS!") +except: + print('DepthMap API could not start') diff --git a/src/__pycache__/backbone.cpython-310.pyc b/src/__pycache__/backbone.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fa8aa0a55f4d31d696fdb48f582c894105401c8c Binary files /dev/null and b/src/__pycache__/backbone.cpython-310.pyc differ diff --git a/src/__pycache__/backbone.cpython-311.pyc b/src/__pycache__/backbone.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2ffb9bcf776b7a5827baa39ced5da5fa4c3f68fa Binary files /dev/null and b/src/__pycache__/backbone.cpython-311.pyc differ diff --git a/src/__pycache__/backbone.cpython-312.pyc b/src/__pycache__/backbone.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..719458ea883aec0a1c462d27dbf3b16ba7414e23 Binary files /dev/null and b/src/__pycache__/backbone.cpython-312.pyc differ diff --git a/src/__pycache__/common_constants.cpython-310.pyc b/src/__pycache__/common_constants.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..574998fa6f82e253f71385bef192bb4445e0420d Binary files /dev/null and b/src/__pycache__/common_constants.cpython-310.pyc differ diff --git a/src/__pycache__/common_constants.cpython-311.pyc b/src/__pycache__/common_constants.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c6f2d0ad57ab0c696f684293eb2c0ce2d301adfd Binary files /dev/null and b/src/__pycache__/common_constants.cpython-311.pyc differ diff --git a/src/__pycache__/common_constants.cpython-312.pyc b/src/__pycache__/common_constants.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..adb6c32c4a8d9aeb7a8651ca7877500d36ea3bc1 Binary files /dev/null and b/src/__pycache__/common_constants.cpython-312.pyc differ diff --git a/src/__pycache__/common_ui.cpython-310.pyc b/src/__pycache__/common_ui.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8b50cd86f2387f246001c3a2c8dd29b59f1f5b79 Binary files /dev/null and b/src/__pycache__/common_ui.cpython-310.pyc differ diff --git a/src/__pycache__/common_ui.cpython-311.pyc b/src/__pycache__/common_ui.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a29a7595f0b4c2035b46c5d3693f4c183ef6797b Binary files /dev/null and b/src/__pycache__/common_ui.cpython-311.pyc differ diff --git a/src/__pycache__/common_ui.cpython-312.pyc b/src/__pycache__/common_ui.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..00bee2e4e526adfba0f77b68c3014619e618a91a Binary files /dev/null and b/src/__pycache__/common_ui.cpython-312.pyc differ diff --git a/src/__pycache__/core.cpython-310.pyc b/src/__pycache__/core.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1fdac8844a644104912c95ca910bffc6392bffab Binary files /dev/null and b/src/__pycache__/core.cpython-310.pyc differ diff --git a/src/__pycache__/core.cpython-311.pyc b/src/__pycache__/core.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..54a262257c7e99e1942d03eb971baa72e13f6645 Binary files /dev/null and b/src/__pycache__/core.cpython-311.pyc differ diff --git a/src/__pycache__/core.cpython-312.pyc b/src/__pycache__/core.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..21c92f46bdc46062cf634f0e5c73a1eb2a56afd0 Binary files /dev/null and b/src/__pycache__/core.cpython-312.pyc differ diff --git a/src/__pycache__/depthmap_generation.cpython-310.pyc b/src/__pycache__/depthmap_generation.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..41fceacb0ce073c59d84165dade90b84519e9da2 Binary files /dev/null and b/src/__pycache__/depthmap_generation.cpython-310.pyc differ diff --git a/src/__pycache__/depthmap_generation.cpython-311.pyc b/src/__pycache__/depthmap_generation.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fe71e526514b50a414fd8c3f361b61bbd8599494 Binary files /dev/null and b/src/__pycache__/depthmap_generation.cpython-311.pyc differ diff --git a/src/__pycache__/depthmap_generation.cpython-312.pyc b/src/__pycache__/depthmap_generation.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5afd4c820aa6c80868b792c37435d52260b8222a Binary files /dev/null and b/src/__pycache__/depthmap_generation.cpython-312.pyc differ diff --git a/src/__pycache__/gradio_args_transport.cpython-310.pyc b/src/__pycache__/gradio_args_transport.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6b0536f85cd1b1c32c1860738bfc08c19702189e Binary files /dev/null and b/src/__pycache__/gradio_args_transport.cpython-310.pyc differ diff --git a/src/__pycache__/gradio_args_transport.cpython-312.pyc b/src/__pycache__/gradio_args_transport.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4edbffea552df51e7e801ae189457b49950018fc Binary files /dev/null and b/src/__pycache__/gradio_args_transport.cpython-312.pyc differ diff --git a/src/__pycache__/misc.cpython-310.pyc b/src/__pycache__/misc.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9308ff24ad2b49f479dd466d9b3b029d68ddbba0 Binary files /dev/null and b/src/__pycache__/misc.cpython-310.pyc differ diff --git a/src/__pycache__/misc.cpython-311.pyc b/src/__pycache__/misc.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5cee818acc51be723f1f218afa1292dd0b67a82b Binary files /dev/null and b/src/__pycache__/misc.cpython-311.pyc differ diff --git a/src/__pycache__/misc.cpython-312.pyc b/src/__pycache__/misc.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..55adc8ea7053b5c4f5a1b8ea0713e7c9106c5264 Binary files /dev/null and b/src/__pycache__/misc.cpython-312.pyc differ diff --git a/src/__pycache__/normalmap_generation.cpython-310.pyc b/src/__pycache__/normalmap_generation.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..62f5af84c7d23d2f3186e76d743aa1c1bec9fc1c Binary files /dev/null and b/src/__pycache__/normalmap_generation.cpython-310.pyc differ diff --git a/src/__pycache__/normalmap_generation.cpython-311.pyc b/src/__pycache__/normalmap_generation.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7477a6606680abb395a005ef04abfa74a6ec9170 Binary files /dev/null and b/src/__pycache__/normalmap_generation.cpython-311.pyc differ diff --git a/src/__pycache__/normalmap_generation.cpython-312.pyc b/src/__pycache__/normalmap_generation.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a1737d7090b6851126bc06e2d63f230fd3372416 Binary files /dev/null and b/src/__pycache__/normalmap_generation.cpython-312.pyc differ diff --git a/src/__pycache__/stereoimage_generation.cpython-310.pyc b/src/__pycache__/stereoimage_generation.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f9b11bfb80ae590f397a1e7baf0379e39bbbde68 Binary files /dev/null and b/src/__pycache__/stereoimage_generation.cpython-310.pyc differ diff --git a/src/__pycache__/stereoimage_generation.cpython-311.pyc b/src/__pycache__/stereoimage_generation.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..19015ea8c68830af4c8d3eb9f9daa2dab3526b29 Binary files /dev/null and b/src/__pycache__/stereoimage_generation.cpython-311.pyc differ diff --git a/src/__pycache__/stereoimage_generation.cpython-312.pyc b/src/__pycache__/stereoimage_generation.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7e519e47bf9df055964654640d87d06b1b3d4141 Binary files /dev/null and b/src/__pycache__/stereoimage_generation.cpython-312.pyc differ diff --git a/src/__pycache__/video_mode.cpython-310.pyc b/src/__pycache__/video_mode.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..029c58ba505d55a9fb817b4d8f25f759b88b645f Binary files /dev/null and b/src/__pycache__/video_mode.cpython-310.pyc differ diff --git a/src/__pycache__/video_mode.cpython-311.pyc b/src/__pycache__/video_mode.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f0cc0d99d92a0785c6b61c168d2db2d0f71fdd16 Binary files /dev/null and b/src/__pycache__/video_mode.cpython-311.pyc differ diff --git a/src/__pycache__/video_mode.cpython-312.pyc b/src/__pycache__/video_mode.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..750e63484912127b10977e0c5bbf77e5835deeea Binary files /dev/null and b/src/__pycache__/video_mode.cpython-312.pyc differ diff --git a/src/backbone.py b/src/backbone.py new file mode 100644 index 0000000000000000000000000000000000000000..dcf997f44725faa034bb4fe838a55bd6475c7938 --- /dev/null +++ b/src/backbone.py @@ -0,0 +1,148 @@ +# DepthMap can be run inside stable-diffusion-webui, but also separately. +# All the stable-diffusion-webui stuff that the DepthMap relies on +# must be resided in this file (or in the scripts folder). +import pathlib +from datetime import datetime +import enum +import sys + + +class BackboneType(enum.Enum): + WEBUI = 1 + STANDALONE = 2 + + +try: + # stable-diffusion-webui backbone + from modules.images import save_image # Should fail if not on stable-diffusion-webui + from modules.devices import torch_gc # TODO: is this really sufficient? + from modules.images import get_next_sequence_number + from modules.call_queue import wrap_gradio_gpu_call + from modules.shared import listfiles + + def get_opt(name, default): + from modules.shared import opts + if hasattr(opts, name): + return opts.__getattr__(name) + return default + + def get_cmd_opt(name, default): + """Get command line argument""" + from modules.shared import cmd_opts + if hasattr(cmd_opts, name): + return cmd_opts.__getattribute__(name) + return default + + def gather_ops(): + """Parameters for depthmap generation""" + ops = {} + for s in ['boost_rmax', 'precision', 'no_half', 'marigold_ensembles', 'marigold_steps']: + c = get_opt('depthmap_script_' + s, None) + if c is None: + c = get_cmd_opt(s, None) + if c is not None: + ops[s] = c + # sanitize for integers. + for s in ['marigold_ensembles', 'marigold_steps']: + if s in ops: + ops[s] = int(ops[s]) + return ops + + + def get_outpath(): + """Get path where results are saved by default""" + path = get_opt('outdir_samples', None) + if path is None or len(path) == 0: + path = get_opt('outdir_extras_samples', None) + assert path is not None and len(path) > 0 + return path + + + def unload_sd_model(): + from modules import shared, devices + if shared.sd_model is not None: + if shared.sd_model.cond_stage_model is not None: + shared.sd_model.cond_stage_model.to(devices.cpu) + if shared.sd_model.first_stage_model is not None: + shared.sd_model.first_stage_model.to(devices.cpu) + # Maybe something else??? + + + def reload_sd_model(): + from modules import shared, devices + if shared.sd_model is not None: + if shared.sd_model.cond_stage_model is not None: + shared.sd_model.cond_stage_model.to(devices.device) + if shared.sd_model.first_stage_model: + shared.sd_model.first_stage_model.to(devices.device) + # Maybe something else??? + + def get_hide_dirs(): + import modules.shared + return modules.shared.hide_dirs + + USED_BACKBONE = BackboneType.WEBUI +except: + # Standalone backbone + print( # " DepthMap did not detect stable-diffusion-webui; launching with the standalone backbone.\n" + " The standalone mode is not on par with the stable-diffusion-webui mode.\n" + " Some features may be missing or work differently. Please report bugs.\n") + + def save_image(image, path, basename, **kwargs): + import os + os.makedirs(path, exist_ok=True) + if 'suffix' not in kwargs or len(kwargs['suffix']) == 0: + kwargs['suffix'] = '' + else: + kwargs['suffix'] = f"-{kwargs['suffix']}" + format = get_opt('samples_format', kwargs['extension']) + fullfn = os.path.join( + path, f"{basename}-{get_next_sequence_number(path, basename)}{kwargs['suffix']}.{format}") + image.save(fullfn, format=format) + + def torch_gc(): + # TODO: is this really sufficient? + import torch + if torch.cuda.is_available(): + with torch.cuda.device('cuda'): + torch.cuda.empty_cache() + torch.cuda.ipc_collect() + + launched_at = int(datetime.now().timestamp()) + backbone_current_seq_number = 0 + + # Make sure to preserve the function signature when calling! + def get_next_sequence_number(outpath, basename): + global backbone_current_seq_number + backbone_current_seq_number += 1 + return int(f"{launched_at}{backbone_current_seq_number:04}") + + def wrap_gradio_gpu_call(f): return f # Displaying various stats is not supported + + def listfiles(dirname): + import os + filenames = [os.path.join(dirname, x) for x in sorted(os.listdir(dirname)) if not x.startswith(".")] + return [file for file in filenames if os.path.isfile(file)] + + def get_opt(name, default): return default # Configuring is not supported + + + def get_cmd_opt(name, default): return default # Configuring is not supported + + def gather_ops(): # Configuring is not supported + return {'boost_rmax': 1600, + 'precision': 'autocast', + 'no_half': False, + 'marigold_ensembles': 5, + 'marigold_steps': 12} + + def get_outpath(): return str(pathlib.Path('.', 'outputs')) + + def unload_sd_model(): pass # Not needed + + def reload_sd_model(): pass # Not needed + + def get_hide_dirs(): return {} # Directories will not be hidden from traversal (except when starts with the dot) + + + USED_BACKBONE = BackboneType.STANDALONE diff --git a/src/common_constants.py b/src/common_constants.py new file mode 100644 index 0000000000000000000000000000000000000000..4b645dd0637d80598241f3842d6b76dfaec9a618 --- /dev/null +++ b/src/common_constants.py @@ -0,0 +1,66 @@ +import enum + + +class GenerationOptions(enum.Enum): + """This Enum provides the options that are used in the usual generation + (that is, consumed by the core_generation_funnel). + Please use this to avoid typos. Also, this enum provides default values for these options.""" + def __new__(cls, *args, **kwds): + value = len(cls.__members__) + 1 + obj = object.__new__(cls) + obj._value_ = value + return obj + + def __init__(self, default_value=None, *args): + """Saves default value as a member (called "df") of a member of this enum""" + self.df = default_value + + COMPUTE_DEVICE = "GPU" + MODEL_TYPE = "Depth Anything v2 Base" # Will become enum element + BOOST = False + NET_SIZE_MATCH = False + NET_WIDTH = 448 + NET_HEIGHT = 448 + TILING_MODE = False + + DO_OUTPUT_DEPTH = True + OUTPUT_DEPTH_INVERT = False + OUTPUT_DEPTH_COMBINE = False + OUTPUT_DEPTH_COMBINE_AXIS = "Horizontal" # Format (str) is subject to change + DO_OUTPUT_DEPTH_PREDICTION = False # Hidden, do not use, subject to change + + CLIPDEPTH = False + CLIPDEPTH_MODE = "Range" + CLIPDEPTH_FAR = 0.0 + CLIPDEPTH_NEAR = 1.0 + + GEN_STEREO = False + STEREO_MODES = ["left-right", "red-cyan-anaglyph"] + STEREO_DIVERGENCE = 2.5 + STEREO_SEPARATION = 0.0 + STEREO_FILL_ALGO = "polylines_sharp" + STEREO_OFFSET_EXPONENT = 1.0 + STEREO_BALANCE = 0.0 + + GEN_NORMALMAP = False + NORMALMAP_PRE_BLUR = False + NORMALMAP_PRE_BLUR_KERNEL = 3 + NORMALMAP_SOBEL = True + NORMALMAP_SOBEL_KERNEL = 3 + NORMALMAP_POST_BLUR = False + NORMALMAP_POST_BLUR_KERNEL = 3 + NORMALMAP_INVERT = False + + GEN_HEATMAP = False + + GEN_SIMPLE_MESH = False + SIMPLE_MESH_OCCLUDE = True + SIMPLE_MESH_SPHERICAL = False + + GEN_INPAINTED_MESH = False + GEN_INPAINTED_MESH_DEMOS = False + + GEN_REMBG = False + SAVE_BACKGROUND_REMOVAL_MASKS = False # Legacy, will be reworked + PRE_DEPTH_BACKGROUND_REMOVAL = False # Legacy, will be reworked + REMBG_MODEL = "u2net" diff --git a/src/common_ui.py b/src/common_ui.py new file mode 100644 index 0000000000000000000000000000000000000000..0fd46101cf525ce2bd33305f022bb976dbe66a6a --- /dev/null +++ b/src/common_ui.py @@ -0,0 +1,595 @@ +import traceback +from pathlib import Path +import gradio as gr +from PIL import Image + +from src import backbone, video_mode +from src.core import core_generation_funnel, unload_models, run_makevideo +from src.depthmap_generation import ModelHolder +from src.gradio_args_transport import GradioComponentBundle +from src.misc import * +from src.common_constants import GenerationOptions as go + +# Ugly workaround to fix gradio tempfile issue +def ensure_gradio_temp_directory(): + try: + import tempfile + path = os.path.join(tempfile.gettempdir(), 'gradio') + if not (os.path.exists(path)): + os.mkdir(path) + except Exception as e: + traceback.print_exc() + + +ensure_gradio_temp_directory() + + +def main_ui_panel(is_depth_tab): + inp = GradioComponentBundle() + # TODO: Greater visual separation + with gr.Blocks(): + with gr.Row() as cur_option_root: + inp -= 'depthmap_gen_row_0', cur_option_root + inp += go.COMPUTE_DEVICE, gr.Radio(label="Compute on", choices=['GPU', 'CPU'], value='GPU') + # TODO: Should return value instead of index. Maybe Enum should be used? + inp += go.MODEL_TYPE, gr.Dropdown(label="Model", + choices=['res101', 'dpt_beit_large_512 (midas 3.1)', + 'dpt_beit_large_384 (midas 3.1)', 'dpt_large_384 (midas 3.0)', + 'dpt_hybrid_384 (midas 3.0)', + 'midas_v21', 'midas_v21_small', + 'zoedepth_n (indoor)', 'zoedepth_k (outdoor)', 'zoedepth_nk', + 'Marigold v1', 'Depth Anything', 'Depth Anything v2 Small', + 'Depth Anything v2 Base', 'Depth Anything v2 Large'], + value='Depth Anything v2 Base', type="index") + with gr.Box() as cur_option_root: + inp -= 'depthmap_gen_row_1', cur_option_root + with gr.Row(): + inp += go.BOOST, gr.Checkbox(label="BOOST", + info="Generate depth map parts in a mosaic fashion - very slow", + value=False) + inp += go.NET_SIZE_MATCH, gr.Checkbox(label="Match net size to input size", + info="Net size affects quality, performance and VRAM usage") + with gr.Row() as options_depend_on_match_size: + inp += go.NET_WIDTH, gr.Slider(minimum=64, maximum=2048, step=64, label='Net width') + inp += go.NET_HEIGHT, gr.Slider(minimum=64, maximum=2048, step=64, label='Net height') + with gr.Row(): + inp += go.TILING_MODE, gr.Checkbox( + label='Tiling mode', info='Reduces seams that appear if the depthmap is tiled into a grid' + ) + + with gr.Box() as cur_option_root: + inp -= 'depthmap_gen_row_2', cur_option_root + with gr.Row(): + with gr.Group(): # 50% of width + inp += "save_outputs", gr.Checkbox(label="Save Outputs", value=True) + with gr.Group(): # 50% of width + inp += go.DO_OUTPUT_DEPTH, gr.Checkbox(label="Output DepthMap") + inp += go.OUTPUT_DEPTH_INVERT, gr.Checkbox(label="Invert (black=near, white=far)") + with gr.Row() as options_depend_on_output_depth_1: + inp += go.OUTPUT_DEPTH_COMBINE, gr.Checkbox( + label="Combine input and depthmap into one image") + inp += go.OUTPUT_DEPTH_COMBINE_AXIS, gr.Radio( + label="Combine axis", choices=['Vertical', 'Horizontal'], type="value", visible=False) + + with gr.Box() as cur_option_root: + inp -= 'depthmap_gen_row_3', cur_option_root + with gr.Row(): + inp += go.CLIPDEPTH, gr.Checkbox(label="Clip and renormalize DepthMap") + inp += go.CLIPDEPTH_MODE,\ + gr.Dropdown(label="Mode", choices=['Range', 'Outliers'], type="value", visible=False) + with gr.Row(visible=False) as clip_options_row_1: + inp += go.CLIPDEPTH_FAR, gr.Slider(minimum=0, maximum=1, step=0.001, label='Far clip') + inp += go.CLIPDEPTH_NEAR, gr.Slider(minimum=0, maximum=1, step=0.001, label='Near clip') + + with gr.Box(): + with gr.Row(): + inp += go.GEN_STEREO, gr.Checkbox(label="Generate stereoscopic (3D) image(s)") + with gr.Column(visible=False) as stereo_options: + with gr.Row(): + inp += go.STEREO_MODES, gr.CheckboxGroup( + ["left-right", "right-left", "top-bottom", "bottom-top", "red-cyan-anaglyph", + "left-only", "only-right", "cyan-red-reverseanaglyph" + ][0:8 if backbone.get_opt('depthmap_script_extra_stereomodes', False) else 5], label="Output") + with gr.Row(): + inp += go.STEREO_DIVERGENCE, gr.Slider(minimum=0.05, maximum=15.005, step=0.01, + label='Divergence (3D effect)') + inp += go.STEREO_SEPARATION, gr.Slider(minimum=-5.0, maximum=5.0, step=0.01, + label='Separation (moves images apart)') + with gr.Row(): + inp += go.STEREO_FILL_ALGO, gr.Dropdown(label="Gap fill technique", + choices=['none', 'naive', 'naive_interpolating', 'polylines_soft', + 'polylines_sharp'], + type="value") + inp += go.STEREO_OFFSET_EXPONENT, gr.Slider(label="Magic exponent", minimum=1, maximum=2, step=1) + inp += go.STEREO_BALANCE, gr.Slider(minimum=-1.0, maximum=1.0, step=0.05, + label='Balance between eyes') + + with gr.Box(): + with gr.Row(): + inp += go.GEN_NORMALMAP, gr.Checkbox(label="Generate NormalMap") + with gr.Column(visible=False) as normalmap_options: + with gr.Row(): + inp += go.NORMALMAP_PRE_BLUR, gr.Checkbox(label="Smooth before calculating normals") + inp += go.NORMALMAP_PRE_BLUR_KERNEL, gr.Slider(minimum=1, maximum=31, step=2, label='Pre-smooth kernel size', visible=False) + inp.add_rule(go.NORMALMAP_PRE_BLUR_KERNEL, 'visible-if', go.NORMALMAP_PRE_BLUR) + with gr.Row(): + inp += go.NORMALMAP_SOBEL, gr.Checkbox(label="Sobel gradient") + inp += go.NORMALMAP_SOBEL_KERNEL, gr.Slider(minimum=1, maximum=31, step=2, label='Sobel kernel size') + inp.add_rule(go.NORMALMAP_SOBEL_KERNEL, 'visible-if', go.NORMALMAP_SOBEL) + with gr.Row(): + inp += go.NORMALMAP_POST_BLUR, gr.Checkbox(label="Smooth after calculating normals") + inp += go.NORMALMAP_POST_BLUR_KERNEL, gr.Slider(minimum=1, maximum=31, step=2, label='Post-smooth kernel size', visible=False) + inp.add_rule(go.NORMALMAP_POST_BLUR_KERNEL, 'visible-if', go.NORMALMAP_POST_BLUR) + with gr.Row(): + inp += go.NORMALMAP_INVERT, gr.Checkbox(label="Invert") + + if backbone.get_opt('depthmap_script_gen_heatmap_from_ui', False): + with gr.Box(): + with gr.Row(): + inp += go.GEN_HEATMAP, gr.Checkbox(label="Generate HeatMap") + + with gr.Box(): + with gr.Column(): + inp += go.GEN_SIMPLE_MESH, gr.Checkbox(label="Generate simple 3D mesh") + with gr.Column(visible=False) as mesh_options: + with gr.Row(): + gr.HTML(value="Generates fast, accurate only with ZoeDepth models and no boost, no custom maps.") + with gr.Row(): + inp += go.SIMPLE_MESH_OCCLUDE, gr.Checkbox(label="Remove occluded edges") + inp += go.SIMPLE_MESH_SPHERICAL, gr.Checkbox(label="Equirectangular projection") + + if is_depth_tab: + with gr.Box(): + with gr.Column(): + inp += go.GEN_INPAINTED_MESH, gr.Checkbox( + label="Generate 3D inpainted mesh") + with gr.Column(visible=False) as inpaint_options_row_0: + gr.HTML("Generation is sloooow. Required for generating videos from mesh.") + inp += go.GEN_INPAINTED_MESH_DEMOS, gr.Checkbox( + label="Generate 4 demo videos with 3D inpainted mesh.") + gr.HTML("More options for generating video can be found in the Generate video tab.") + + with gr.Box(): + # TODO: it should be clear from the UI that there is an option of the background removal + # that does not use the model selected above + with gr.Row(): + inp += go.GEN_REMBG, gr.Checkbox(label="Remove background") + with gr.Column(visible=False) as bgrem_options: + with gr.Row(): + inp += go.SAVE_BACKGROUND_REMOVAL_MASKS, gr.Checkbox(label="Save the foreground masks") + inp += go.PRE_DEPTH_BACKGROUND_REMOVAL, gr.Checkbox(label="Pre-depth background removal") + with gr.Row(): + inp += go.REMBG_MODEL, gr.Dropdown( + label="Rembg Model", type="value", + choices=['u2net', 'u2netp', 'u2net_human_seg', 'silueta', "isnet-general-use", "isnet-anime"]) + + with gr.Box(): + gr.HTML(f"{SCRIPT_FULL_NAME}
") + gr.HTML("Information, comment and share @ " + "https://github.com/thygate/stable-diffusion-webui-depthmap-script") + + def update_default_net_size(model_type): + w, h = ModelHolder.get_default_net_size(model_type) + return inp[go.NET_WIDTH].update(value=w), inp[go.NET_HEIGHT].update(value=h) + + inp[go.MODEL_TYPE].change( + fn=update_default_net_size, + inputs=inp[go.MODEL_TYPE], + outputs=[inp[go.NET_WIDTH], inp[go.NET_HEIGHT]] + ) + + inp[go.BOOST].change( # Go boost! Wroom!.. + fn=lambda a, b: (inp[go.NET_SIZE_MATCH].update(visible=not a), + options_depend_on_match_size.update(visible=not a and not b)), + inputs=[inp[go.BOOST], inp[go.NET_SIZE_MATCH]], + outputs=[inp[go.NET_SIZE_MATCH], options_depend_on_match_size] + ) + inp.add_rule(options_depend_on_match_size, 'visible-if-not', go.NET_SIZE_MATCH) + inp[go.TILING_MODE].change( # Go boost! Wroom!.. + fn=lambda a: ( + inp[go.BOOST].update(value=False), inp[go.NET_SIZE_MATCH].update(value=True) + ) if a else (inp[go.BOOST].update(), inp[go.NET_SIZE_MATCH].update()), + inputs=[inp[go.TILING_MODE]], + outputs=[inp[go.BOOST], inp[go.NET_SIZE_MATCH]] + ) + + inp.add_rule(options_depend_on_output_depth_1, 'visible-if', go.DO_OUTPUT_DEPTH) + inp.add_rule(go.OUTPUT_DEPTH_INVERT, 'visible-if', go.DO_OUTPUT_DEPTH) + inp.add_rule(go.OUTPUT_DEPTH_COMBINE_AXIS, 'visible-if', go.OUTPUT_DEPTH_COMBINE) + inp.add_rule(go.CLIPDEPTH_MODE, 'visible-if', go.CLIPDEPTH) + inp.add_rule(clip_options_row_1, 'visible-if', go.CLIPDEPTH) + + inp[go.CLIPDEPTH_FAR].change( + fn=lambda a, b: a if b < a else b, + inputs=[inp[go.CLIPDEPTH_FAR], inp[go.CLIPDEPTH_NEAR]], + outputs=[inp[go.CLIPDEPTH_NEAR]], + show_progress=False + ) + inp[go.CLIPDEPTH_NEAR].change( + fn=lambda a, b: a if b > a else b, + inputs=[inp[go.CLIPDEPTH_NEAR], inp[go.CLIPDEPTH_FAR]], + outputs=[inp[go.CLIPDEPTH_FAR]], + show_progress=False + ) + + inp.add_rule(stereo_options, 'visible-if', go.GEN_STEREO) + inp.add_rule(normalmap_options, 'visible-if', go.GEN_NORMALMAP) + inp.add_rule(mesh_options, 'visible-if', go.GEN_SIMPLE_MESH) + if is_depth_tab: + inp.add_rule(inpaint_options_row_0, 'visible-if', go.GEN_INPAINTED_MESH) + inp.add_rule(bgrem_options, 'visible-if', go.GEN_REMBG) + + return inp + +def open_folder_action(): + # Adapted from stable-diffusion-webui + f = backbone.get_outpath() + if backbone.get_cmd_opt('hide_ui_dir_config', False): + return + if not os.path.exists(f) or not os.path.isdir(f): + raise Exception("Couldn't open output folder") # .isdir is security-related, do not remove! + import platform + import subprocess as sp + path = os.path.normpath(f) + if platform.system() == "Windows": + os.startfile(path) + elif platform.system() == "Darwin": + sp.Popen(["open", path]) + elif "microsoft-standard-WSL2" in platform.uname().release: + sp.Popen(["wsl-open", path]) + else: + sp.Popen(["xdg-open", path]) + + +def depthmap_mode_video(inp): + gr.HTML(value="Single video mode allows generating videos from videos. Please " + "keep in mind that all the frames of the video need to be processed - therefore it is important to " + "pick settings so that the generation is not too slow. For the best results, " + "use a zoedepth model, since they provide the highest level of coherency between frames.") + inp += gr.File(elem_id='depthmap_vm_input', label="Video or animated file", + file_count="single", interactive=True, type="file") + inp += gr.Checkbox(elem_id="depthmap_vm_custom_checkbox", + label="Use custom/pregenerated DepthMap video", value=False) + inp += gr.Dropdown(elem_id="depthmap_vm_smoothening_mode", label="Smoothening", + type="value", choices=['none', 'experimental'], value='experimental') + inp += gr.File(elem_id='depthmap_vm_custom', file_count="single", + interactive=True, type="file", visible=False) + with gr.Row(): + inp += gr.Checkbox(elem_id='depthmap_vm_compress_checkbox', label="Compress colorvideos?", value=False) + inp += gr.Slider(elem_id='depthmap_vm_compress_bitrate', label="Bitrate (kbit)", visible=False, + minimum=1000, value=15000, maximum=50000, step=250) + + inp.add_rule('depthmap_vm_custom', 'visible-if', 'depthmap_vm_custom_checkbox') + inp.add_rule('depthmap_vm_smoothening_mode', 'visible-if-not', 'depthmap_vm_custom_checkbox') + inp.add_rule('depthmap_vm_compress_bitrate', 'visible-if', 'depthmap_vm_compress_checkbox') + + return inp + + +custom_css = """ +#depthmap_vm_input {height: 75px} +#depthmap_vm_custom {height: 75px} +""" + + +def on_ui_tabs(): + inp = GradioComponentBundle() + with gr.Blocks(analytics_enabled=False, title="DepthMap", css=custom_css) as depthmap_interface: + with gr.Row(equal_height=False): + with gr.Column(variant='panel'): + inp += 'depthmap_mode', gr.HTML(visible=False, value='0') + with gr.Tabs(): + with gr.TabItem('Single Image') as depthmap_mode_0: + with gr.Group(): + with gr.Row(): + inp += gr.Image(label="Source", source="upload", interactive=True, type="pil", + elem_id="depthmap_input_image") + # TODO: depthmap generation settings should disappear when using this + inp += gr.File(label="Custom DepthMap", file_count="single", interactive=True, + type="file", elem_id='custom_depthmap_img', visible=False) + inp += gr.Checkbox(elem_id="custom_depthmap", label="Use custom DepthMap", value=False) + with gr.TabItem('Batch Process') as depthmap_mode_1: + inp += gr.File(elem_id='image_batch', label="Batch Process", file_count="multiple", + interactive=True, type="file") + with gr.TabItem('Batch from Directory') as depthmap_mode_2: + inp += gr.Textbox(elem_id="depthmap_batch_input_dir", label="Input directory", + **backbone.get_hide_dirs(), + placeholder="A directory on the same machine where the server is running.") + inp += gr.Textbox(elem_id="depthmap_batch_output_dir", label="Output directory", + **backbone.get_hide_dirs(), + placeholder="Leave blank to save images to the default path.") + gr.HTML("Files in the output directory may be overwritten.") + inp += gr.Checkbox(elem_id="depthmap_batch_reuse", + label="Skip generation and use (edited/custom) depthmaps " + "in output directory when a file already exists.", + value=True) + with gr.TabItem('Single Video') as depthmap_mode_3: + inp = depthmap_mode_video(inp) + submit = gr.Button('Generate', elem_id="depthmap_generate", variant='primary') + inp |= main_ui_panel(True) # Main panel is inserted here + unloadmodels = gr.Button('Unload models', elem_id="depthmap_unloadmodels") + + with gr.Column(variant='panel'): + with gr.Tabs(elem_id="mode_depthmap_output"): + with gr.TabItem('Depth Output'): + with gr.Group(): + result_images = gr.Gallery(label='Output', show_label=False, + elem_id=f"depthmap_gallery", columns=4) + with gr.Column(): + html_info = gr.HTML() + folder_symbol = '\U0001f4c2' # 📂 + gr.Button(folder_symbol, visible=not backbone.get_cmd_opt('hide_ui_dir_config', False)).click( + fn=lambda: open_folder_action(), inputs=[], outputs=[], + ) + + with gr.TabItem('3D Mesh'): + with gr.Group(): + result_depthmesh = gr.Model3D(label="3d Mesh", clear_color=[1.0, 1.0, 1.0, 1.0]) + with gr.Row(): + # loadmesh = gr.Button('Load') + clearmesh = gr.Button('Clear') + + with gr.TabItem('Generate video'): + # generate video + with gr.Group(): + with gr.Row(): + gr.Markdown("Generate video from inpainted(!) mesh.") + with gr.Row(): + depth_vid = gr.Video(interactive=False) + with gr.Column(): + vid_html_info_x = gr.HTML() + vid_html_info = gr.HTML() + fn_mesh = gr.Textbox(label="Input Mesh (.ply | .obj)", **backbone.get_hide_dirs(), + placeholder="A file on the same machine where " + "the server is running.") + with gr.Row(): + vid_numframes = gr.Textbox(label="Number of frames", value="300") + vid_fps = gr.Textbox(label="Framerate", value="40") + vid_format = gr.Dropdown(label="Format", choices=['mp4', 'webm'], value='mp4', + type="value", elem_id="video_format") + vid_ssaa = gr.Dropdown(label="SSAA", choices=['1', '2', '3', '4'], value='3', + type="value", elem_id="video_ssaa") + with gr.Row(): + vid_traj = gr.Dropdown(label="Trajectory", + choices=['straight-line', 'double-straight-line', 'circle'], + value='double-straight-line', type="index", + elem_id="video_trajectory") + vid_shift = gr.Textbox(label="Translate: x, y, z", value="-0.015, 0.0, -0.05") + vid_border = gr.Textbox(label="Crop: top, left, bottom, right", + value="0.03, 0.03, 0.05, 0.03") + vid_dolly = gr.Checkbox(label="Dolly", value=False, elem_classes="smalltxt") + with gr.Row(): + submit_vid = gr.Button('Generate Video', elem_id="depthmap_generatevideo", + variant='primary') + + inp += inp.enkey_tail() + + depthmap_mode_0.select(lambda: '0', None, inp['depthmap_mode']) + depthmap_mode_1.select(lambda: '1', None, inp['depthmap_mode']) + depthmap_mode_2.select(lambda: '2', None, inp['depthmap_mode']) + depthmap_mode_3.select(lambda: '3', None, inp['depthmap_mode']) + + def custom_depthmap_change_fn(mode, zero_on, three_on): + hide = mode == '0' and zero_on or mode == '3' and three_on + return inp['custom_depthmap_img'].update(visible=hide), \ + inp['depthmap_gen_row_0'].update(visible=not hide), \ + inp['depthmap_gen_row_1'].update(visible=not hide), \ + inp['depthmap_gen_row_3'].update(visible=not hide), not hide + custom_depthmap_change_els = ['depthmap_mode', 'custom_depthmap', 'depthmap_vm_custom_checkbox'] + for el in custom_depthmap_change_els: + inp[el].change( + fn=custom_depthmap_change_fn, + inputs=[inp[el] for el in custom_depthmap_change_els], + outputs=[inp[st] for st in [ + 'custom_depthmap_img', 'depthmap_gen_row_0', 'depthmap_gen_row_1', 'depthmap_gen_row_3', + go.DO_OUTPUT_DEPTH]]) + + unloadmodels.click( + fn=unload_models, + inputs=[], + outputs=[] + ) + + clearmesh.click( + fn=lambda: None, + inputs=[], + outputs=[result_depthmesh] + ) + + submit.click( + fn=backbone.wrap_gradio_gpu_call(run_generate), + inputs=inp.enkey_body(), + outputs=[ + result_images, + fn_mesh, + result_depthmesh, + html_info + ] + ) + + submit_vid.click( + fn=backbone.wrap_gradio_gpu_call(run_makevideo), + inputs=[ + fn_mesh, + vid_numframes, + vid_fps, + vid_traj, + vid_shift, + vid_border, + vid_dolly, + vid_format, + vid_ssaa + ], + outputs=[ + depth_vid, + vid_html_info_x, + vid_html_info + ] + ) + + return depthmap_interface + + +def format_exception(e: Exception): + traceback.print_exc() + msg = '

' + 'ERROR: ' + str(e) + '

' + '\n' + if 'out of GPU memory' in msg: + pass + elif "torch.hub.load('facebookresearch/dinov2'," in traceback.format_exc(): + msg += ('

To use Depth Anything integration in WebUI mode, please add "--disable-safe-unpickle" to the command line flags. ' + 'Alternatively, use Standalone mode. This is a known issue.') + elif "Error(s) in loading state_dict " in traceback.format_exc(): + msg += ('

There was issue during loading the model.' + 'Please add "--disable-safe-unpickle" to the command line flags. This is a known issue.') + elif 'out of GPU memory' not in msg: + msg += \ + 'Please report this issue ' \ + f'here. ' \ + 'Make sure to provide the full stacktrace: \n' + msg += '' + traceback.format_exc() + '' + return msg + + +def run_generate(*inputs): + inputs = GradioComponentBundle.enkey_to_dict(inputs) + depthmap_mode = inputs['depthmap_mode'] + depthmap_batch_input_dir = inputs['depthmap_batch_input_dir'] + image_batch = inputs['image_batch'] + depthmap_input_image = inputs['depthmap_input_image'] + depthmap_batch_output_dir = inputs['depthmap_batch_output_dir'] + depthmap_batch_reuse = inputs['depthmap_batch_reuse'] + custom_depthmap = inputs['custom_depthmap'] + custom_depthmap_img = inputs['custom_depthmap_img'] + + inputimages = [] + inputdepthmaps = [] # Allow supplying custom depthmaps + inputnames = [] # Also keep track of original file names + + if depthmap_mode == '3': + try: + custom_depthmap = inputs['depthmap_vm_custom'] \ + if inputs['depthmap_vm_custom_checkbox'] else None + colorvids_bitrate = inputs['depthmap_vm_compress_bitrate'] \ + if inputs['depthmap_vm_compress_checkbox'] else None + ret = video_mode.gen_video( + inputs['depthmap_vm_input'], backbone.get_outpath(), inputs, custom_depthmap, colorvids_bitrate, + inputs['depthmap_vm_smoothening_mode']) + return [], None, None, ret + except Exception as e: + ret = format_exception(e) + return [], None, None, ret + + if depthmap_mode == '2' and depthmap_batch_output_dir != '': + outpath = depthmap_batch_output_dir + else: + outpath = backbone.get_outpath() + + if depthmap_mode == '0': # Single image + if depthmap_input_image is None: + return [], None, None, "Please select an input image" + inputimages.append(depthmap_input_image) + inputnames.append(None) + if custom_depthmap: + if custom_depthmap_img is None: + return [], None, None, \ + "Custom depthmap is not specified. Please either supply it or disable this option." + inputdepthmaps.append(Image.open(os.path.abspath(custom_depthmap_img.name))) + else: + inputdepthmaps.append(None) + if depthmap_mode == '1': # Batch Process + if image_batch is None: + return [], None, None, "Please select input images", "" + for img in image_batch: + image = Image.open(os.path.abspath(img.name)) + inputimages.append(image) + inputnames.append(os.path.splitext(img.orig_name)[0]) + print(f'{len(inputimages)} images will be processed') + elif depthmap_mode == '2': # Batch from Directory + # TODO: There is a RAM leak when we process batches, I can smell it! Or maybe it is gone. + assert not backbone.get_cmd_opt('hide_ui_dir_config', False), '--hide-ui-dir-config option must be disabled' + if depthmap_batch_input_dir == '': + return [], None, None, "Please select an input directory." + if depthmap_batch_input_dir == depthmap_batch_output_dir: + return [], None, None, "Please pick different directories for batch processing." + image_list = backbone.listfiles(depthmap_batch_input_dir) + for path in image_list: + try: + inputimages.append(Image.open(path)) + inputnames.append(path) + + custom_depthmap = None + if depthmap_batch_reuse: + basename = Path(path).stem + # Custom names are not used in samples directory + if outpath != backbone.get_opt('outdir_extras_samples', None): + # Possible filenames that the custom depthmaps may have + name_candidates = [f'{basename}-0000.{backbone.get_opt("samples_format", "png")}', # current format + f'{basename}.png', # human-intuitive format + f'{Path(path).name}'] # human-intuitive format (worse) + for fn_cand in name_candidates: + path_cand = os.path.join(outpath, fn_cand) + if os.path.isfile(path_cand): + custom_depthmap = Image.open(os.path.abspath(path_cand)) + break + inputdepthmaps.append(custom_depthmap) + except Exception as e: + print(f'Failed to load {path}, ignoring. Exception: {str(e)}') + inputdepthmaps_n = len([1 for x in inputdepthmaps if x is not None]) + print(f'{len(inputimages)} images will be processed, {inputdepthmaps_n} existing depthmaps will be reused') + + gen_obj = core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inputs, backbone.gather_ops()) + + # Saving images + img_results = [] + results_total = 0 + inpainted_mesh_fi = mesh_simple_fi = None + msg = "" # Empty string is never returned + while True: + try: + input_i, type, result = next(gen_obj) + results_total += 1 + except StopIteration: + # TODO: return more info + msg = '

Successfully generated

' if results_total > 0 else \ + '

Successfully generated nothing - please check the settings and try again

' + break + except Exception as e: + msg = format_exception(e) + break + if type == 'simple_mesh': + mesh_simple_fi = result + continue + if type == 'inpainted_mesh': + inpainted_mesh_fi = result + continue + if not isinstance(result, Image.Image): + print(f'This is not supposed to happen! Somehow output type {type} is not supported! Input_i: {input_i}.') + continue + img_results += [(input_i, type, result)] + + if inputs["save_outputs"]: + try: + basename = 'depthmap' + if depthmap_mode == '2' and inputnames[input_i] is not None: + if outpath != backbone.get_opt('outdir_extras_samples', None): + basename = Path(inputnames[input_i]).stem + suffix = "" if type == "depth" else f"{type}" + backbone.save_image(result, path=outpath, basename=basename, seed=None, + prompt=None, extension=backbone.get_opt('samples_format', 'png'), short_filename=True, + no_prompt=True, grid=False, pnginfo_section_name="extras", + suffix=suffix) + except Exception as e: + if not ('image has wrong mode' in str(e) or 'I;16' in str(e)): + raise e + print('Catched exception: image has wrong mode!') + traceback.print_exc() + + # Deciding what mesh to display (and if) + display_mesh_fi = None + if backbone.get_opt('depthmap_script_show_3d', True): + display_mesh_fi = mesh_simple_fi + if backbone.get_opt('depthmap_script_show_3d_inpaint', True): + if inpainted_mesh_fi is not None and len(inpainted_mesh_fi) > 0: + display_mesh_fi = inpainted_mesh_fi + return map(lambda x: x[2], img_results), inpainted_mesh_fi, display_mesh_fi, msg.replace('\n', '
') diff --git a/src/core.py b/src/core.py new file mode 100644 index 0000000000000000000000000000000000000000..a831115ee7eaa1609b2fa15519706bb380850918 --- /dev/null +++ b/src/core.py @@ -0,0 +1,773 @@ +from pathlib import Path +from PIL import Image + +try: + from tqdm import trange +except: + from builtins import range as trange + +import torch, gc +import cv2 +import os.path +import numpy as np +import copy +import platform +import math + +# Our code +from src.misc import * +from src.common_constants import GenerationOptions as go +from src.common_constants import * +from src.stereoimage_generation import create_stereoimages +from src.normalmap_generation import create_normalmap +from src.depthmap_generation import ModelHolder +from src import backbone + +try: + # 3d-photo-inpainting imports + from inpaint.mesh import write_mesh, read_mesh, output_3d_photo + from inpaint.networks import Inpaint_Color_Net, Inpaint_Depth_Net, Inpaint_Edge_Net + from inpaint.utils import path_planning + from inpaint.bilateral_filtering import sparse_bilateral_filtering +except Exception as e: + print('Impaint import failed. Impaint will not work.') + import traceback + traceback.print_exc() + +global video_mesh_data, video_mesh_fn +video_mesh_data = None +video_mesh_fn = None + +model_holder = ModelHolder() + + +def convert_to_i16(arr): + # Single channel, 16 bit image. This loses some precision! + # uint16 conversion uses round-down, therefore values should be [0; 2**16) + numbytes = 2 + max_val = (2 ** (8 * numbytes)) + out = np.clip(arr * max_val + 0.0001, 0, max_val - 0.1) # -0.1 from above is needed to avoid overflowing + return out.astype("uint16") + +def convert_i16_to_rgb(image, like): + # three channel, 8 bits per channel image + output = np.zeros_like(like) + output[:, :, 0] = image / 256.0 + output[:, :, 1] = image / 256.0 + output[:, :, 2] = image / 256.0 + return output + + +class CoreGenerationFunnelInp: + """This class takes a dictionary and creates a core_generation_funnel inp. + Non-applicable parameters are silently discarded (no error)""" + def __init__(self, values): + if isinstance(values, CoreGenerationFunnelInp): + values = values.values + values = {(k.name if isinstance(k, GenerationOptions) else k).lower(): v for k, v in values.items()} + + self.values = {} + for setting in GenerationOptions: + name = setting.name.lower() + self.values[name] = values[name] if name in values else setting.df + + def __getitem__(self, item): + if isinstance(item, GenerationOptions): + return self.values[item.name.lower()] + return self.values[item] + + def __getattr__(self, item): + return self[item] + + +def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp, ops=None): + if len(inputimages) == 0 or inputimages[0] is None: + return + if inputdepthmaps is None or len(inputdepthmaps) == 0: + inputdepthmaps: list[Image] = [None for _ in range(len(inputimages))] + inputdepthmaps_complete = all([x is not None for x in inputdepthmaps]) + + inp = CoreGenerationFunnelInp(inp) + + if ops is None: + ops = backbone.gather_ops() + model_holder.update_settings(**ops) + + # TODO: ideally, run_depthmap should not save meshes - that makes the function not pure + print(SCRIPT_FULL_NAME) + print(f'Backbone: {backbone.USED_BACKBONE.name}') + + backbone.unload_sd_model() + + # TODO: this still should not be here + background_removed_images = [] + # remove on base image before depth calculation + if inp[go.GEN_REMBG]: + if inp[go.PRE_DEPTH_BACKGROUND_REMOVAL]: + inputimages = batched_background_removal(inputimages, inp[go.REMBG_MODEL]) + background_removed_images = inputimages + else: + background_removed_images = batched_background_removal(inputimages, inp[go.REMBG_MODEL]) + + # init torch device + if inp[go.COMPUTE_DEVICE] == 'GPU': + if torch.cuda.is_available(): + device = torch.device("cuda") + else: + print('WARNING: Cuda device was not found, cpu will be used') + device = torch.device("cpu") + else: + device = torch.device("cpu") + print("device: %s" % device) + + # TODO: This should not be here + inpaint_imgs = [] + inpaint_depths = [] + + try: + if not inputdepthmaps_complete: + print("Loading model(s) ..") + model_holder.ensure_models(inp[go.MODEL_TYPE], device, inp[go.BOOST], inp[go.TILING_MODE]) + print("Computing output(s) ..") + # iterate over input images + for count in trange(0, len(inputimages)): + # Convert single channel input (PIL) images to rgb + if inputimages[count].mode == 'I': + inputimages[count].point(lambda p: p * 0.0039063096, mode='RGB') + inputimages[count] = inputimages[count].convert('RGB') + + raw_prediction = None + """Raw prediction, as returned by a model. None if input depthmap is used.""" + raw_prediction_invert = False + """True if near=dark on raw_prediction""" + out = None + + if inputdepthmaps is not None and inputdepthmaps[count] is not None: + # use custom depthmap + dp = inputdepthmaps[count] + if isinstance(dp, Image.Image): + if dp.width != inputimages[count].width or dp.height != inputimages[count].height: + try: # LANCZOS may fail on some formats + dp = dp.resize((inputimages[count].width, inputimages[count].height), Image.Resampling.LANCZOS) + except: + dp = dp.resize((inputimages[count].width, inputimages[count].height)) + # Trying desperately to rescale image to [0;1) without actually normalizing it + # Normalizing is avoided, because we want to preserve the scale of the original depthmaps + # (batch mode, video mode). + if len(dp.getbands()) == 1: + out = np.asarray(dp, dtype="float") + out_max = out.max() + if out_max < 256: + bit_depth = 8 + elif out_max < 65536: + bit_depth = 16 + else: + bit_depth = 32 + out /= 2.0 ** bit_depth + else: + out = np.asarray(dp, dtype="float")[:, :, 0] + out /= 256.0 + else: + # Should be in interval [0; 1], values outside of this range will be clipped. + out = np.asarray(dp, dtype="float") + assert inputimages[count].height == out.shape[0], "Custom depthmap height mismatch" + assert inputimages[count].width == out.shape[1], "Custom depthmap width mismatch" + else: + # override net size (size may be different for different images) + if inp[go.NET_SIZE_MATCH]: + # Round up to a multiple of 32 to avoid potential issues + # TODO: buggs for Depth Anything + net_width = (inputimages[count].width + 31) // 32 * 32 + net_height = (inputimages[count].height + 31) // 32 * 32 + else: + net_width = inp[go.NET_WIDTH] + net_height = inp[go.NET_HEIGHT] + raw_prediction, raw_prediction_invert = \ + model_holder.get_raw_prediction(inputimages[count], net_width, net_height) + + # output + if abs(raw_prediction.max() - raw_prediction.min()) > np.finfo("float").eps: + out = np.copy(raw_prediction) + # TODO: some models may output negative values, maybe these should be clamped to zero. + if raw_prediction_invert: + out *= -1 + if inp[go.DO_OUTPUT_DEPTH_PREDICTION]: + yield count, 'depth_prediction', np.copy(out) + if inp[go.CLIPDEPTH]: + if inp[go.CLIPDEPTH_MODE] == 'Range': + out = (out - out.min()) / (out.max() - out.min()) # normalize to [0; 1] + out = np.clip(out, inp[go.CLIPDEPTH_FAR], inp[go.CLIPDEPTH_NEAR]) + elif inp[go.CLIPDEPTH_MODE] == 'Outliers': + fb, nb = np.percentile(out, [inp[go.CLIPDEPTH_FAR] * 100.0, inp[go.CLIPDEPTH_NEAR] * 100.0]) + out = np.clip(out, fb, nb) + out = (out - out.min()) / (out.max() - out.min()) # normalize to [0; 1] + else: + # Regretfully, the depthmap is broken and will be replaced with a black image + out = np.zeros(raw_prediction.shape) + + # Maybe we should not use img_output for everything, since we get better accuracy from + # the raw_prediction. However, it is not always supported. We maybe would like to achieve + # reproducibility, so depthmap of the image should be the same as generating the depthmap one more time. + img_output = convert_to_i16(out) + """Depthmap (near=bright), as uint16""" + + # if 3dinpainting, store maps for processing in second pass + if inp[go.GEN_INPAINTED_MESH]: + inpaint_imgs.append(inputimages[count]) + inpaint_depths.append(img_output) + + # applying background masks after depth + if inp[go.GEN_REMBG]: + print('applying background masks') + background_removed_image = background_removed_images[count] + # maybe a threshold cut would be better on the line below. + background_removed_array = np.array(background_removed_image) + bg_mask = (background_removed_array[:, :, 0] == 0) & (background_removed_array[:, :, 1] == 0) & ( + background_removed_array[:, :, 2] == 0) & (background_removed_array[:, :, 3] <= 0.2) + img_output[bg_mask] = 0 # far value + + yield count, 'background_removed', background_removed_image + + if inp[go.SAVE_BACKGROUND_REMOVAL_MASKS]: + bg_array = (1 - bg_mask.astype('int8')) * 255 + mask_array = np.stack((bg_array, bg_array, bg_array, bg_array), axis=2) + mask_image = Image.fromarray(mask_array.astype(np.uint8)) + + yield count, 'foreground_mask', mask_image + + # A weird quirk: if user tries to save depthmap, whereas custom depthmap is used, + # custom depthmap will be outputed + if inp[go.DO_OUTPUT_DEPTH]: + img_depth = cv2.bitwise_not(img_output) if inp[go.OUTPUT_DEPTH_INVERT] else img_output + if inp[go.OUTPUT_DEPTH_COMBINE]: + axis = 1 if inp[go.OUTPUT_DEPTH_COMBINE_AXIS] == 'Horizontal' else 0 + img_concat = Image.fromarray(np.concatenate( + (inputimages[count], convert_i16_to_rgb(img_depth, inputimages[count])), + axis=axis)) + yield count, 'concat_depth', img_concat + else: + yield count, 'depth', Image.fromarray(img_depth) + + if inp[go.GEN_STEREO]: + # print("Generating stereoscopic image(s)..") + stereoimages = create_stereoimages( + inputimages[count], img_output, + inp[go.STEREO_DIVERGENCE], inp[go.STEREO_SEPARATION], + inp[go.STEREO_MODES], + inp[go.STEREO_BALANCE], inp[go.STEREO_OFFSET_EXPONENT], inp[go.STEREO_FILL_ALGO]) + for c in range(0, len(stereoimages)): + yield count, inp[go.STEREO_MODES][c], stereoimages[c] + + if inp[go.GEN_NORMALMAP]: + normalmap = create_normalmap( + img_output, + inp[go.NORMALMAP_PRE_BLUR_KERNEL] if inp[go.NORMALMAP_PRE_BLUR] else None, + inp[go.NORMALMAP_SOBEL_KERNEL] if inp[go.NORMALMAP_SOBEL] else None, + inp[go.NORMALMAP_POST_BLUR_KERNEL] if inp[go.NORMALMAP_POST_BLUR] else None, + inp[go.NORMALMAP_INVERT] + ) + yield count, 'normalmap', normalmap + + if inp[go.GEN_HEATMAP]: + from dzoedepth.utils.misc import colorize + heatmap = Image.fromarray(colorize(img_output, cmap='inferno')) + yield count, 'heatmap', heatmap + + # gen mesh + if inp[go.GEN_SIMPLE_MESH]: + print(f"\nGenerating (occluded) mesh ..") + basename = 'depthmap' + meshsimple_fi = get_uniquefn(outpath, basename, 'obj', 'simple') + + depthi = raw_prediction if raw_prediction is not None else out + depthi_min, depthi_max = depthi.min(), depthi.max() + # try to map output to sensible values for non zoedepth models, boost, or custom maps + if inp[go.MODEL_TYPE] not in [7, 8, 9] or inp[go.BOOST] or inputdepthmaps[count] is not None: + # invert if midas + if inp[go.MODEL_TYPE] > 0 or inputdepthmaps[count] is not None: # TODO: Weird + depthi = depthi_max - depthi + depthi_min + depth_max = depthi.max() + depth_min = depthi.min() + # make positive + if depthi_min < 0: + depthi = depthi - depthi_min + depth_max = depthi.max() + depth_min = depthi.min() + # scale down + if depthi.max() > 10.0: + depthi = 4.0 * (depthi - depthi_min) / (depthi_max - depthi_min) + # offset + depthi = depthi + 1.0 + + mesh = create_mesh(inputimages[count], depthi, keep_edges=not inp[go.SIMPLE_MESH_OCCLUDE], + spherical=(inp[go.SIMPLE_MESH_SPHERICAL])) + mesh.export(meshsimple_fi) + yield count, 'simple_mesh', meshsimple_fi + + print("Computing output(s) done.") + except Exception as e: + import traceback + if 'out of memory' in str(e).lower(): + print(str(e)) + suggestion = "out of GPU memory, could not generate depthmap! " \ + "Here are some suggestions to work around this issue:\n" + if inp[go.BOOST]: + suggestion += " * Disable BOOST (generation will be faster, but the depthmap will be less detailed)\n" + if backbone.USED_BACKBONE != backbone.BackboneType.STANDALONE: + suggestion += " * Run DepthMap in the standalone mode - without launching the SD WebUI\n" + if device != torch.device("cpu"): + suggestion += " * Select CPU as the processing device (this will be slower)\n" + if inp[go.MODEL_TYPE] != 6: + suggestion +=\ + " * Use a different model (generally, more memory-consuming models produce better depthmaps)\n" + if not inp[go.BOOST]: + suggestion += " * Reduce net size (this could reduce quality)\n" + print('Fail.\n') + raise Exception(suggestion) + else: + print('Fail.\n') + raise e + finally: + if backbone.get_opt('depthmap_script_keepmodels', True): + model_holder.offload() # Swap to CPU memory + else: + model_holder.unload_models() + gc.collect() + backbone.torch_gc() + + # TODO: This should not be here + if inp[go.GEN_INPAINTED_MESH]: + try: + mesh_fi = run_3dphoto(device, inpaint_imgs, inpaint_depths, inputnames, outpath, + inp[go.GEN_INPAINTED_MESH_DEMOS], + 1, "mp4") + yield 0, 'inpainted_mesh', mesh_fi + except Exception as e: + print(f'{str(e)}, some issue with generating inpainted mesh') + + backbone.reload_sd_model() + print("All done.\n") + + +def get_uniquefn(outpath, basename, ext, suffix=''): + basecount = backbone.get_next_sequence_number(outpath, basename) + if basecount > 0: + basecount -= 1 + if suffix != '': + suffix = f'-{suffix}' # Dash is important for selecting unique filenames (see get_next_sequence_number) + for i in range(500): + fullfn = os.path.join(outpath, f"{basename}-{basecount + i:04}{suffix}.{ext}") + if not os.path.exists(fullfn): + return fullfn + return f"{basename}-99999{suffix}.{ext}" # Failback, should never be executed + + +def run_3dphoto(device, img_rgb, img_depth, inputnames, outpath, gen_inpainted_mesh_demos, vid_ssaa, vid_format): + mesh_fi = '' + try: + print("Running 3D Photo Inpainting .. ") + edgemodel_path = './models/3dphoto/edge_model.pth' + depthmodel_path = './models/3dphoto/depth_model.pth' + colormodel_path = './models/3dphoto/color_model.pth' + # create paths to model if not present + os.makedirs('./models/3dphoto/', exist_ok=True) + + ensure_file_downloaded( + edgemodel_path, + ["https://huggingface.co/spaces/Epoching/3D_Photo_Inpainting/resolve/e389e564fd2a55cf/checkpoints/edge-model.pth", + "https://filebox.ece.vt.edu/~jbhuang/project/3DPhoto/model/edge-model.pth"], + "b1d768bd008ad5fe9f540004f870b8c3d355e4939b2009aa4db493fd313217c9") + ensure_file_downloaded( + depthmodel_path, + ["https://huggingface.co/spaces/Epoching/3D_Photo_Inpainting/resolve/e389e564fd2a55cf/checkpoints/depth-model.pth", + "https://filebox.ece.vt.edu/~jbhuang/project/3DPhoto/model/depth-model.pth"], + "2d0e63e89a22762ddfa8bc8c9f8c992e5532b140123274ffc6e4171baa1b76f8") + ensure_file_downloaded( + colormodel_path, + ["https://huggingface.co/spaces/Epoching/3D_Photo_Inpainting/resolve/e389e564fd2a55cf/checkpoints/color-model.pth", + "https://filebox.ece.vt.edu/~jbhuang/project/3DPhoto/model/color-model.pth"], + "383c9b1db70097907a6f9c8abb0303e7056f50d5456a36f34ab784592b8b2c20" + ) + + print("Loading edge model ..") + depth_edge_model = Inpaint_Edge_Net(init_weights=True) + depth_edge_weight = torch.load(edgemodel_path, map_location=torch.device(device)) + depth_edge_model.load_state_dict(depth_edge_weight) + depth_edge_model = depth_edge_model.to(device) + depth_edge_model.eval() + print("Loading depth model ..") + depth_feat_model = Inpaint_Depth_Net() + depth_feat_weight = torch.load(depthmodel_path, map_location=torch.device(device)) + depth_feat_model.load_state_dict(depth_feat_weight, strict=True) + depth_feat_model = depth_feat_model.to(device) + depth_feat_model.eval() + depth_feat_model = depth_feat_model.to(device) + print("Loading rgb model ..") + rgb_model = Inpaint_Color_Net() + rgb_feat_weight = torch.load(colormodel_path, map_location=torch.device(device)) + rgb_model.load_state_dict(rgb_feat_weight) + rgb_model.eval() + rgb_model = rgb_model.to(device) + + config = {} + config["gpu_ids"] = 0 + config['extrapolation_thickness'] = 60 + config['extrapolate_border'] = True + config['depth_threshold'] = 0.04 + config['redundant_number'] = 12 + config['ext_edge_threshold'] = 0.002 + config['background_thickness'] = 70 + config['context_thickness'] = 140 + config['background_thickness_2'] = 70 + config['context_thickness_2'] = 70 + config['log_depth'] = True + config['depth_edge_dilate'] = 10 + config['depth_edge_dilate_2'] = 5 + config['largest_size'] = 512 + config['repeat_inpaint_edge'] = True + config['ply_fmt'] = "bin" + + config['save_ply'] = backbone.get_opt('depthmap_script_save_ply', False) + config['save_obj'] = True + + if device == torch.device("cpu"): + config["gpu_ids"] = -1 + + for count in trange(0, len(img_rgb)): + basename = 'depthmap' + if inputnames is not None: + if inputnames[count] is not None: + p = Path(inputnames[count]) + basename = p.stem + + mesh_fi = get_uniquefn(outpath, basename, 'obj') + + print(f"\nGenerating inpainted mesh .. (go make some coffee) ..") + + # from inpaint.utils.get_MiDaS_samples + W = img_rgb[count].width + H = img_rgb[count].height + int_mtx = np.array([[max(H, W), 0, W // 2], [0, max(H, W), H // 2], [0, 0, 1]]).astype(np.float32) + if int_mtx.max() > 1: + int_mtx[0, :] = int_mtx[0, :] / float(W) + int_mtx[1, :] = int_mtx[1, :] / float(H) + + # how inpaint.utils.read_MiDaS_depth() imports depthmap + disp = img_depth[count].astype(np.float32) + disp = disp - disp.min() + disp = cv2.blur(disp / disp.max(), ksize=(3, 3)) * disp.max() + disp = (disp / disp.max()) * 3.0 + depth = 1. / np.maximum(disp, 0.05) + + # rgb input + img = np.asarray(img_rgb[count]) + if len(img.shape) > 2 and img.shape[2] == 4: + # convert the image from RGBA2RGB + img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR) + + # run sparse bilateral filter + config['sparse_iter'] = 5 + config['filter_size'] = [7, 7, 5, 5, 5] + config['sigma_s'] = 4.0 + config['sigma_r'] = 0.5 + vis_photos, vis_depths = sparse_bilateral_filtering(depth.copy(), img.copy(), config, + num_iter=config['sparse_iter'], spdb=False) + depth = vis_depths[-1] + + # bilat_fn = os.path.join(outpath, basename +'_bilatdepth.png') + # cv2.imwrite(bilat_fn, depth) + + rt_info = write_mesh(img, + depth, + int_mtx, + mesh_fi, + config, + rgb_model, + depth_edge_model, + depth_edge_model, + depth_feat_model) + + if rt_info is not False and gen_inpainted_mesh_demos: + run_3dphoto_videos(mesh_fi, basename, outpath, 300, 40, + [0.03, 0.03, 0.05, 0.03], + ['double-straight-line', 'double-straight-line', 'circle', 'circle'], + [0.00, 0.00, -0.015, -0.015], + [0.00, 0.00, -0.015, -0.00], + [-0.05, -0.05, -0.05, -0.05], + ['dolly-zoom-in', 'zoom-in', 'circle', 'swing'], False, vid_format, vid_ssaa) + + backbone.torch_gc() + + finally: + del rgb_model + rgb_model = None + del depth_edge_model + depth_edge_model = None + del depth_feat_model + depth_feat_model = None + backbone.torch_gc() + + return mesh_fi + + +def run_3dphoto_videos(mesh_fi, basename, outpath, num_frames, fps, crop_border, traj_types, x_shift_range, + y_shift_range, z_shift_range, video_postfix, vid_dolly, vid_format, vid_ssaa): + import vispy + try: + if platform.system() == 'Windows': + vispy.use(app='PyQt5') + elif platform.system() == 'Darwin': + vispy.use('PyQt6') + else: + vispy.use(app='egl') + except: + import traceback + print(traceback.format_exc()) + print('Trying an alternative...') + for u in ['PyQt5', 'PyQt6', 'egl']: + try: + vispy.use(app=u) + break + except: + print(f'On {u}') + print(traceback.format_exc()) + # Honestly, I don't know if it actually helps at all + + # read ply + global video_mesh_data, video_mesh_fn + if video_mesh_fn is None or video_mesh_fn != mesh_fi: + try: + del video_mesh_data + except: + print("del video_mesh_data failed") + video_mesh_fn = mesh_fi + video_mesh_data = read_mesh(mesh_fi) + + verts, colors, faces, Height, Width, hFov, vFov, mean_loc_depth = video_mesh_data + + original_w = output_w = W = Width + original_h = output_h = H = Height + int_mtx = np.array([[max(H, W), 0, W // 2], [0, max(H, W), H // 2], [0, 0, 1]]).astype(np.float32) + if int_mtx.max() > 1: + int_mtx[0, :] = int_mtx[0, :] / float(W) + int_mtx[1, :] = int_mtx[1, :] / float(H) + + config = {} + config['video_folder'] = outpath + config['num_frames'] = num_frames + config['fps'] = fps + config['crop_border'] = crop_border + config['traj_types'] = traj_types + config['x_shift_range'] = x_shift_range + config['y_shift_range'] = y_shift_range + config['z_shift_range'] = z_shift_range + config['video_postfix'] = video_postfix + config['ssaa'] = vid_ssaa + + # from inpaint.utils.get_MiDaS_samples + generic_pose = np.eye(4) + assert len(config['traj_types']) == len(config['x_shift_range']) == \ + len(config['y_shift_range']) == len(config['z_shift_range']) == len(config['video_postfix']), \ + "The number of elements in 'traj_types', 'x_shift_range', 'y_shift_range', 'z_shift_range' and \ + 'video_postfix' should be equal." + tgt_pose = [[generic_pose * 1]] + tgts_poses = [] + for traj_idx in range(len(config['traj_types'])): + tgt_poses = [] + sx, sy, sz = path_planning(config['num_frames'], config['x_shift_range'][traj_idx], + config['y_shift_range'][traj_idx], + config['z_shift_range'][traj_idx], path_type=config['traj_types'][traj_idx]) + for xx, yy, zz in zip(sx, sy, sz): + tgt_poses.append(generic_pose * 1.) + tgt_poses[-1][:3, -1] = np.array([xx, yy, zz]) + tgts_poses += [tgt_poses] + tgt_pose = generic_pose * 1 + + # seems we only need the depthmap to calc mean_loc_depth, which is only used when doing 'dolly' + # width and height are already in the ply file in the comments .. + # might try to add the mean_loc_depth to it too + # did just that + # mean_loc_depth = img_depth[img_depth.shape[0]//2, img_depth.shape[1]//2] + + print("Generating videos ..") + + normal_canvas, all_canvas = None, None + videos_poses, video_basename = copy.deepcopy(tgts_poses), basename + top = (original_h // 2 - int_mtx[1, 2] * output_h) + left = (original_w // 2 - int_mtx[0, 2] * output_w) + down, right = top + output_h, left + output_w + border = [int(xx) for xx in [top, down, left, right]] + normal_canvas, all_canvas, fn_saved = output_3d_photo(verts.copy(), colors.copy(), faces.copy(), + copy.deepcopy(Height), copy.deepcopy(Width), + copy.deepcopy(hFov), copy.deepcopy(vFov), + copy.deepcopy(tgt_pose), config['video_postfix'], + copy.deepcopy(generic_pose), + copy.deepcopy(config['video_folder']), + None, copy.deepcopy(int_mtx), config, None, + videos_poses, video_basename, original_h, original_w, + border=border, depth=None, normal_canvas=normal_canvas, + all_canvas=all_canvas, + mean_loc_depth=mean_loc_depth, dolly=vid_dolly, + fnExt=vid_format) + return fn_saved + +def run_makevideo(fn_mesh, vid_numframes, vid_fps, vid_traj, vid_shift, vid_border, dolly, vid_format, vid_ssaa, + outpath=None, basename=None): + if len(fn_mesh) == 0 or not os.path.exists(fn_mesh): + raise Exception("Could not open mesh.") + + vid_ssaa = int(vid_ssaa) + + # traj type + if vid_traj == 0: + vid_traj = ['straight-line'] + elif vid_traj == 1: + vid_traj = ['double-straight-line'] + elif vid_traj == 2: + vid_traj = ['circle'] + + num_fps = int(vid_fps) + num_frames = int(vid_numframes) + shifts = vid_shift.split(',') + if len(shifts) != 3: + raise Exception("Translate requires 3 elements.") + x_shift_range = [float(shifts[0])] + y_shift_range = [float(shifts[1])] + z_shift_range = [float(shifts[2])] + + borders = vid_border.split(',') + if len(borders) != 4: + raise Exception("Crop Border requires 4 elements.") + crop_border = [float(borders[0]), float(borders[1]), float(borders[2]), float(borders[3])] + + if not outpath: + outpath = backbone.get_outpath() + + if not basename: + # output path and filename mess .. + basename = Path(fn_mesh).stem + + # unique filename + basecount = backbone.get_next_sequence_number(outpath, basename) + if basecount > 0: basecount = basecount - 1 + fullfn = None + for i in range(500): + fn = f"{basecount + i:05}" if basename == '' else f"{basename}-{basecount + i:04}" + fullfn = os.path.join(outpath, f"{fn}_." + vid_format) + if not os.path.exists(fullfn): + break + basename = Path(fullfn).stem + basename = basename[:-1] + + print("Loading mesh ..") + + fn_saved = run_3dphoto_videos(fn_mesh, basename, outpath, num_frames, num_fps, crop_border, vid_traj, x_shift_range, + y_shift_range, z_shift_range, [''], dolly, vid_format, vid_ssaa) + + return fn_saved[-1], fn_saved[-1], '' + +def unload_models(): + model_holder.unload_models() + + +# TODO: code borrowed from the internet to be marked as such and to reside in separate files + +def batched_background_removal(inimages, model_name): + from rembg import new_session, remove + print('creating background masks') + outimages = [] + + # model path and name + bg_model_dir = Path.joinpath(Path().resolve(), "models/rem_bg") + os.makedirs(bg_model_dir, exist_ok=True) + os.environ["U2NET_HOME"] = str(bg_model_dir) + + # starting a session + background_removal_session = new_session(model_name) + for count in range(0, len(inimages)): + bg_remove_img = np.array(remove(inimages[count], session=background_removal_session)) + outimages.append(Image.fromarray(bg_remove_img)) + # The line below might be redundant + del background_removal_session + return outimages + + +def pano_depth_to_world_points(depth): + """ + 360 depth to world points + given 2D depth is an equirectangular projection of a spherical image + Treat depth as radius + longitude : -pi to pi + latitude : -pi/2 to pi/2 + """ + + # Convert depth to radius + radius = depth.flatten() + + lon = np.linspace(-np.pi, np.pi, depth.shape[1]) + lat = np.linspace(-np.pi / 2, np.pi / 2, depth.shape[0]) + + lon, lat = np.meshgrid(lon, lat) + lon = lon.flatten() + lat = lat.flatten() + + # Convert to cartesian coordinates + x = radius * np.cos(lat) * np.cos(lon) + y = radius * np.cos(lat) * np.sin(lon) + z = radius * np.sin(lat) + + pts3d = np.stack([x, y, z], axis=1) + + return pts3d + + +def depth_edges_mask(depth): + """Returns a mask of edges in the depth map. + Args: + depth: 2D numpy array of shape (H, W) with dtype float32. + Returns: + mask: 2D numpy array of shape (H, W) with dtype bool. + """ + # Compute the x and y gradients of the depth map. + depth_dx, depth_dy = np.gradient(depth) + # Compute the gradient magnitude. + depth_grad = np.sqrt(depth_dx ** 2 + depth_dy ** 2) + # Compute the edge mask. + mask = depth_grad > 0.05 + return mask + + +def create_mesh(image, depth, keep_edges=False, spherical=False): + import trimesh + from dzoedepth.utils.geometry import depth_to_points, create_triangles + maxsize = backbone.get_opt('depthmap_script_mesh_maxsize', 2048) + + # limit the size of the input image + image.thumbnail((maxsize, maxsize)) + + if not spherical: + pts3d = depth_to_points(depth[None]) + else: + pts3d = pano_depth_to_world_points(depth) + + pts3d = pts3d.reshape(-1, 3) + + verts = pts3d.reshape(-1, 3) + image = np.array(image) + if keep_edges: + triangles = create_triangles(image.shape[0], image.shape[1]) + else: + triangles = create_triangles(image.shape[0], image.shape[1], mask=~depth_edges_mask(depth)) + colors = image.reshape(-1, 3) + + mesh = trimesh.Trimesh(vertices=verts, faces=triangles, vertex_colors=colors) + + # rotate 90deg over X when spherical + if spherical: + angle = math.pi / 2 + direction = [1, 0, 0] + center = [0, 0, 0] + rot_matrix = trimesh.transformations.rotation_matrix(angle, direction, center) + mesh.apply_transform(rot_matrix) + + return mesh diff --git a/src/depthmap_generation.py b/src/depthmap_generation.py new file mode 100644 index 0000000000000000000000000000000000000000..2a78c2c3445382c207197cabdf6f4d525bfa6c07 --- /dev/null +++ b/src/depthmap_generation.py @@ -0,0 +1,1220 @@ +import gc +import os.path +from operator import getitem + +import cv2 +import numpy as np +import skimage.measure +from PIL import Image +import torch +from torchvision.transforms import Compose, transforms + +# midas imports +from dmidas.dpt_depth import DPTDepthModel +from dmidas.midas_net import MidasNet +from dmidas.midas_net_custom import MidasNet_small +from dmidas.transforms import Resize, NormalizeImage, PrepareForNet +# zoedepth +from dzoedepth.models.builder import build_model +from dzoedepth.utils.config import get_config +# AdelaiDepth/LeReS imports +from lib.multi_depth_model_woauxi import RelDepthModel +from lib.net_tools import strip_prefix_if_present +from pix2pix.models.pix2pix4depth_model import Pix2Pix4DepthModel +# Marigold +from dmarigold.marigold import MarigoldPipeline +# pix2pix/merge net imports +from pix2pix.options.test_options import TestOptions +# depthanyting v2 +try: + from ddepth_anything_v2 import DepthAnythingV2 +except: + print('depth_anything_v2 import failed... somehow') + +# Our code +from src.misc import * +from src import backbone + +global depthmap_device + +class ModelHolder: + def __init__(self): + self.depth_model = None + self.pix2pix_model = None + self.depth_model_type = None + self.device = None # Target device, the model may be swapped from VRAM into RAM. + self.offloaded = False # True means current device is not the target device + + # Extra stuff + self.resize_mode = None + self.normalization = None + self.tiling_mode = False + + + def update_settings(self, **kvargs): + # Opens the pandora box + for k, v in kvargs.items(): + setattr(self, k, v) + + + def ensure_models(self, model_type, device: torch.device, boost: bool, tiling_mode: bool = False): + # TODO: could make it more granular + if model_type == -1 or model_type is None: + self.unload_models() + return + # Certain optimisations are irreversible and not device-agnostic, thus changing device requires reloading + if ( + model_type != self.depth_model_type or + boost != (self.pix2pix_model is not None) or + device != self.device or + tiling_mode != self.tiling_mode + ): + self.unload_models() + self.load_models(model_type, device, boost, tiling_mode) + self.reload() + + def load_models(self, model_type, device: torch.device, boost: bool, tiling_mode: bool = False): + """Ensure that the depth model is loaded""" + + # TODO: we need to at least try to find models downloaded by other plugins (e.g. controlnet) + + # model path and name + # ZoeDepth and Marigold do not use this + model_dir = "./models/midas" + if model_type == 0: + model_dir = "./models/leres" + if model_type == 11: + model_dir = "./models/depth_anything" + if model_type in [12, 13, 14]: + model_dir = "./models/depth_anything_v2" + + # create paths to model if not present + os.makedirs(model_dir, exist_ok=True) + os.makedirs('./models/pix2pix', exist_ok=True) + + print("Loading model weights from ", end=" ") + + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + model = None + if model_type == 0: # "res101" + model_path = f"{model_dir}/res101.pth" + print(model_path) + ensure_file_downloaded( + model_path, + ["https://cloudstor.aarnet.edu.au/plus/s/lTIJF4vrvHCAI31/download", + "https://huggingface.co/lllyasviel/Annotators/resolve/5bc80eec2b4fddbb/res101.pth", + ], + "1d696b2ef3e8336b057d0c15bc82d2fecef821bfebe5ef9d7671a5ec5dde520b") + if device != torch.device('cpu'): + checkpoint = torch.load(model_path) + else: + checkpoint = torch.load(model_path, map_location=torch.device('cpu')) + model = RelDepthModel(backbone='resnext101') + model.load_state_dict(strip_prefix_if_present(checkpoint['depth_model'], "module."), strict=True) + del checkpoint + backbone.torch_gc() + + if model_type == 1: # "dpt_beit_large_512" midas 3.1 + model_path = f"{model_dir}/dpt_beit_large_512.pt" + print(model_path) + ensure_file_downloaded(model_path, + "https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_beit_large_512.pt") + model = DPTDepthModel( + path=model_path, + backbone="beitl16_512", + non_negative=True, + ) + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + if model_type == 2: # "dpt_beit_large_384" midas 3.1 + model_path = f"{model_dir}/dpt_beit_large_384.pt" + print(model_path) + ensure_file_downloaded(model_path, + "https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_beit_large_384.pt") + model = DPTDepthModel( + path=model_path, + backbone="beitl16_384", + non_negative=True, + ) + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + if model_type == 3: # "dpt_large_384" midas 3.0 + model_path = f"{model_dir}/dpt_large-midas-2f21e586.pt" + print(model_path) + ensure_file_downloaded(model_path, + "https://github.com/intel-isl/DPT/releases/download/1_0/dpt_large-midas-2f21e586.pt") + model = DPTDepthModel( + path=model_path, + backbone="vitl16_384", + non_negative=True, + ) + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + elif model_type == 4: # "dpt_hybrid_384" midas 3.0 + model_path = f"{model_dir}/dpt_hybrid-midas-501f0c75.pt" + print(model_path) + ensure_file_downloaded(model_path, + "https://github.com/intel-isl/DPT/releases/download/1_0/dpt_hybrid-midas-501f0c75.pt") + model = DPTDepthModel( + path=model_path, + backbone="vitb_rn50_384", + non_negative=True, + ) + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + elif model_type == 5: # "midas_v21" + model_path = f"{model_dir}/midas_v21-f6b98070.pt" + print(model_path) + ensure_file_downloaded(model_path, + "https://github.com/AlexeyAB/MiDaS/releases/download/midas_dpt/midas_v21-f6b98070.pt") + model = MidasNet(model_path, non_negative=True) + resize_mode = "upper_bound" + normalization = NormalizeImage( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] + ) + + elif model_type == 6: # "midas_v21_small" + model_path = f"{model_dir}/midas_v21_small-70d6b9c8.pt" + print(model_path) + ensure_file_downloaded(model_path, + "https://github.com/AlexeyAB/MiDaS/releases/download/midas_dpt/midas_v21_small-70d6b9c8.pt") + model = MidasNet_small(model_path, features=64, backbone="efficientnet_lite3", exportable=True, + non_negative=True, blocks={'expand': True}) + resize_mode = "upper_bound" + normalization = NormalizeImage( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] + ) + + # When loading, zoedepth models will report the default net size. + # It will be overridden by the generation settings. + elif model_type == 7: # zoedepth_n + print("zoedepth_n\n") + conf = get_config("zoedepth", "infer") + model = build_model(conf) + + elif model_type == 8: # zoedepth_k + print("zoedepth_k\n") + conf = get_config("zoedepth", "infer", config_version="kitti") + model = build_model(conf) + + elif model_type == 9: # zoedepth_nk + print("zoedepth_nk\n") + conf = get_config("zoedepth_nk", "infer") + model = build_model(conf) + + elif model_type == 10: # Marigold v1 + model_path = "Bingxin/Marigold" + print(model_path) + dtype = torch.float32 if self.no_half else torch.float16 + model = MarigoldPipeline.from_pretrained(model_path, torch_dtype=dtype) + try: + import xformers + model.enable_xformers_memory_efficient_attention() + except: + pass # run without xformers + elif model_type == 11: # depth_anything + from depth_anything.dpt import DPT_DINOv2 + # This will download the model... to some place + model = ( + DPT_DINOv2( + encoder="vitl", + features=256, + out_channels=[256, 512, 1024, 1024], + localhub=False, + ).to(device).eval() + ) + model_path = f"{model_dir}/depth_anything_vitl14.pth" + ensure_file_downloaded(model_path, + "https://huggingface.co/spaces/LiheYoung/Depth-Anything/resolve/main/checkpoints/depth_anything_vitl14.pth") + + model.load_state_dict(torch.load(model_path)) + elif model_type in [12, 13, 14]: # depth_anything_v2 small, base, large + letter = {12: 's', 13: 'b', 14: 'l'}[model_type] + word = {12: 'Small', 13: 'Base', 14: 'Large'}[model_type] + model_path = f"{model_dir}/depth_anything_v2_vit{letter}.pth" + ensure_file_downloaded(model_path, + f"https://huggingface.co/depth-anything/Depth-Anything-V2-{word}/resolve/main/depth_anything_v2_vit{letter}.pth") + model_configs = {'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]}, + 'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]}, + 'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]}, + 'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}} + model = DepthAnythingV2(**model_configs[f'vit{letter}']) + model.load_state_dict(torch.load(model_path, map_location='cpu')) + # 15 is reserved for Depth Anything V2 Giant + + if tiling_mode: + def flatten(el): + flattened = [flatten(children) for children in el.children()] + res = [el] + for c in flattened: + res += c + return res + layers = flatten(model) # Hijacking the model + for layer in [layer for layer in layers if type(layer) == torch.nn.Conv2d or type(layer) == torch.nn.Conv1d]: + layer.padding_mode = 'circular' + + if model_type in range(0, 10): + model.eval() # prepare for evaluation + # optimize + if device == torch.device("cuda"): + if model_type in [0, 1, 2, 3, 4, 5, 6]: + model = model.to(memory_format=torch.channels_last) # TODO: weird + if not self.no_half: + # Marigold can be done + # TODO: Fix for zoedepth_n - it completely trips and generates black images + if model_type in [1, 2, 3, 4, 5, 6, 8, 9, 11] and not boost: + model = model.half() + if model_type in [12, 13, 14]: + model.depth_head.half() + model.pretrained.half() + model.to(device) # to correct device + + self.depth_model = model + self.depth_model_type = model_type + self.resize_mode = resize_mode + self.normalization = normalization + self.tiling_mode = tiling_mode + + self.device = device + + if boost: + # sfu.ca unfortunately is not very reliable, we use a mirror just in case + ensure_file_downloaded( + './models/pix2pix/latest_net_G.pth', + ["https://huggingface.co/lllyasviel/Annotators/resolve/9a7d84251d487d11/latest_net_G.pth", + "https://sfu.ca/~yagiz/CVPR21/latest_net_G.pth"], + '50ec735d74ed6499562d898f41b49343e521808b8dae589aa3c2f5c9ac9f7462') + opt = TestOptions().parse() + if device == torch.device('cpu'): + opt.gpu_ids = [] + self.pix2pix_model = Pix2Pix4DepthModel(opt) + self.pix2pix_model.save_dir = './models/pix2pix' + self.pix2pix_model.load_networks('latest') + self.pix2pix_model.eval() + + backbone.torch_gc() + + @staticmethod + def get_default_net_size(model_type): + # Have you ever wondered why so many things in so many code repositories are not optimal? + # For example, this here is a set of int:tuple. Why wouldn't it be a set of enum:tuple? + # Or even better, why won't every model be defined separately with all it's necessary values and constants in one place? And why one like of this comment is much longer than the other ones?! + # Why won't the models indexed by enum elements, not integers? + # The answer is as definite as it is horrifying: tech depth. + # This here is a prime example of how tech debt piles up: one slightly iffy decision a long time ago, + # then nothing is done with it for quite some time, stuff starts depending on it, more stuff is added. + # The old code are like blocks are like jenga blocks that are experiencing ever-increasing pressure, + # in tower that (just as code) grows to infinity. And noone wants to knock out the jenga. + # Noone wants to spend hours of their life fixing it - because adding new features is more exciting. + # Once merely a suboptimal thing, that worked perfectly at a time, turns into this monster that slowly + # takes your sanity away. It's not that it ambushes you directly - like a hungry moskquito it knows that + # being too annoying will warrant immediate action and smashing. Instead, it bothers you just a + # couple of sound decibels and droplets of blood less than necessary for you to actually go and deal with it. + # And mind you, this is one buffed maskito: well, actually it got beefed up with time. + # Now it is just a giant mockyto monster. Noone wants to fight it because it is scary, + # and thus this threshold of pain is much higher. Don't repeat our mistakes: fight the giant mojito monsters and + # don't let them spread! + sizes = { + 0: [448, 448], + 1: [512, 512], + 2: [384, 384], + 3: [384, 384], + 4: [384, 384], + 5: [384, 384], + 6: [256, 256], + 7: [384, 512], + 8: [384, 768], + 9: [384, 512], + 10: [768, 768], + 11: [518, 518], + 12: [518, 518], + 13: [518, 518], + 14: [518, 518] + } + if model_type in sizes: + return sizes[model_type] + return [512, 512] + + def offload(self): + """Move to RAM to conserve VRAM""" + if self.device != torch.device('cpu') and not self.offloaded: + self.move_models_to(torch.device('cpu')) + self.offloaded = True + + def reload(self): + """Undoes offload""" + if self.offloaded: + self.move_models_to(self.device) + self.offloaded = False + + def move_models_to(self, device): + if self.depth_model is not None: + self.depth_model.to(device) + if self.pix2pix_model is not None: + pass + # TODO: pix2pix offloading not implemented + + def unload_models(self): + if self.depth_model is not None or self.pix2pix_model is not None: + del self.depth_model + self.depth_model = None + del self.pix2pix_model + self.pix2pix_model = None + gc.collect() + backbone.torch_gc() + + self.depth_model_type = None + self.device = None + + def get_raw_prediction(self, input, net_width, net_height): + """Get prediction from the model currently loaded by the ModelHolder object. + If boost is enabled, net_width and net_height will be ignored.""" + global depthmap_device + depthmap_device = self.device + # input image + img = cv2.cvtColor(np.asarray(input), cv2.COLOR_BGR2RGB) / 255.0 + # compute depthmap + if self.pix2pix_model is None: + if self.depth_model_type == 0: + raw_prediction = estimateleres(img, self.depth_model, net_width, net_height) + elif self.depth_model_type in [7, 8, 9]: + raw_prediction = estimatezoedepth(input, self.depth_model, net_width, net_height) + elif self.depth_model_type in [1, 2, 3, 4, 5, 6]: + raw_prediction = estimatemidas(img, self.depth_model, net_width, net_height, + self.resize_mode, self.normalization, self.no_half, + self.precision == "autocast") + elif self.depth_model_type == 10: + raw_prediction = estimatemarigold(img, self.depth_model, net_width, net_height, + self.marigold_ensembles, self.marigold_steps) + elif self.depth_model_type == 11: + raw_prediction = estimatedepthanything(img, self.depth_model, net_width, net_height) + elif self.depth_model_type in [12, 13, 14]: + raw_prediction = estimatedepthanything_v2(img, self.depth_model, net_width, net_height) + else: + raw_prediction = estimateboost(img, self.depth_model, self.depth_model_type, self.pix2pix_model, + self.boost_rmax) + raw_prediction_invert = self.depth_model_type in [0, 7, 8, 9, 10] + return raw_prediction, raw_prediction_invert + + +def estimateleres(img, model, w, h): + # leres transform input + rgb_c = img[:, :, ::-1].copy() + A_resize = cv2.resize(rgb_c, (w, h)) + img_torch = scale_torch(A_resize)[None, :, :, :] + + # compute + with torch.no_grad(): + if depthmap_device == torch.device("cuda"): + img_torch = img_torch.cuda() + prediction = model.depth_model(img_torch) + + prediction = prediction.squeeze().cpu().numpy() + prediction = cv2.resize(prediction, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC) + + return prediction + + +def scale_torch(img): + """ + Scale the image and output it in torch.tensor. + :param img: input rgb is in shape [H, W, C], input depth/disp is in shape [H, W] + :param scale: the scale factor. float + :return: img. [C, H, W] + """ + if len(img.shape) == 2: + img = img[np.newaxis, :, :] + if img.shape[2] == 3: + transform = transforms.Compose( + [transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]) + img = transform(img.astype(np.float32)) + else: + img = img.astype(np.float32) + img = torch.from_numpy(img) + return img + + +def estimatezoedepth(img, model, w, h): + # x = transforms.ToTensor()(img).unsqueeze(0) + # x = x.type(torch.float32) + # x.to(depthmap_device) + # prediction = model.infer(x) + model.core.prep.resizer._Resize__width = w + model.core.prep.resizer._Resize__height = h + prediction = model.infer_pil(img) + + return prediction + + +def estimatemidas(img, model, w, h, resize_mode, normalization, no_half, precision_is_autocast): + import contextlib + # init transform + transform = Compose( + [ + Resize( + w, + h, + resize_target=None, + keep_aspect_ratio=True, + ensure_multiple_of=32, + resize_method=resize_mode, + image_interpolation_method=cv2.INTER_CUBIC, + ), + normalization, + PrepareForNet(), + ] + ) + + # transform input + img_input = transform({"image": img})["image"] + + # compute + precision_scope = torch.autocast if precision_is_autocast and depthmap_device == torch.device( + "cuda") else contextlib.nullcontext + with torch.no_grad(), precision_scope("cuda"): + sample = torch.from_numpy(img_input).to(depthmap_device).unsqueeze(0) + if depthmap_device == torch.device("cuda"): + sample = sample.to(memory_format=torch.channels_last) + if not no_half: + sample = sample.half() + prediction = model.forward(sample) + prediction = ( + torch.nn.functional.interpolate( + prediction.unsqueeze(1), + size=img.shape[:2], + mode="bicubic", + align_corners=False, + ) + .squeeze() + .cpu() + .numpy() + ) + + return prediction + + +# TODO: correct values for BOOST +# TODO: "h" is not used +def estimatemarigold(image, model, w, h, marigold_ensembles=5, marigold_steps=12): + # This hideous thing should be re-implemented once there is support from the upstream. + # TODO: re-implement this hideous thing by using features from the upstream + img = cv2.cvtColor((image * 255.0001).astype('uint8'), cv2.COLOR_BGR2RGB) + img = Image.fromarray(img) + with torch.no_grad(): + pipe_out = model(img, processing_res=w, show_progress_bar=False, + ensemble_size=marigold_ensembles, denoising_steps=marigold_steps, + match_input_res=False) + return cv2.resize(pipe_out.depth_np, (image.shape[:2][::-1]), interpolation=cv2.INTER_CUBIC) + + +def estimatedepthanything(image, model, w, h): + from depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet + transform = Compose( + [ + Resize( + width=w // 14 * 14, + height=h // 14 * 14, + resize_target=False, + keep_aspect_ratio=True, + ensure_multiple_of=14, + resize_method="lower_bound", + image_interpolation_method=cv2.INTER_CUBIC, + ), + NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + PrepareForNet(), + ] + ) + + timage = transform({"image": image})["image"] + timage = torch.from_numpy(timage).unsqueeze(0).to(device=next(model.parameters()).device, + dtype=next(model.parameters()).dtype) + + with torch.no_grad(): + depth = model(timage) + import torch.nn.functional as F + depth = F.interpolate( + depth[None], (image.shape[0], image.shape[1]), mode="bilinear", align_corners=False + )[0, 0] + + return depth.cpu().numpy() + + +def estimatedepthanything_v2(image, model, w, h): + # This is an awkward re-conversion, but I believe it should not impact quality + img = cv2.cvtColor((image * 255.1).astype('uint8'), cv2.COLOR_BGR2RGB) + with torch.no_grad(): + # Compare to: model.infer_image(img, w) + image, (h, w) = model.image2tensor(img, w) + # Casting to correct type, it is the same as type of some model tensor (the one here is arbitrary) + image_casted = image.type_as(model.pretrained.blocks[0].norm1.weight.data) + depth = model.forward(image_casted).type_as(image) + import torch.nn.functional as F + depth = F.interpolate(depth[:, None], (h, w), mode="bilinear", align_corners=True)[0, 0] + return depth.cpu().numpy() + + +class ImageandPatchs: + def __init__(self, root_dir, name, patchsinfo, rgb_image, scale=1): + self.root_dir = root_dir + self.patchsinfo = patchsinfo + self.name = name + self.patchs = patchsinfo + self.scale = scale + + self.rgb_image = cv2.resize(rgb_image, (round(rgb_image.shape[1] * scale), round(rgb_image.shape[0] * scale)), + interpolation=cv2.INTER_CUBIC) + + self.do_have_estimate = False + self.estimation_updated_image = None + self.estimation_base_image = None + + def __len__(self): + return len(self.patchs) + + def set_base_estimate(self, est): + self.estimation_base_image = est + if self.estimation_updated_image is not None: + self.do_have_estimate = True + + def set_updated_estimate(self, est): + self.estimation_updated_image = est + if self.estimation_base_image is not None: + self.do_have_estimate = True + + def __getitem__(self, index): + patch_id = int(self.patchs[index][0]) + rect = np.array(self.patchs[index][1]['rect']) + msize = self.patchs[index][1]['size'] + + ## applying scale to rect: + rect = np.round(rect * self.scale) + rect = rect.astype('int') + msize = round(msize * self.scale) + + patch_rgb = impatch(self.rgb_image, rect) + if self.do_have_estimate: + patch_whole_estimate_base = impatch(self.estimation_base_image, rect) + patch_whole_estimate_updated = impatch(self.estimation_updated_image, rect) + return {'patch_rgb': patch_rgb, 'patch_whole_estimate_base': patch_whole_estimate_base, + 'patch_whole_estimate_updated': patch_whole_estimate_updated, 'rect': rect, + 'size': msize, 'id': patch_id} + else: + return {'patch_rgb': patch_rgb, 'rect': rect, 'size': msize, 'id': patch_id} + + def print_options(self, opt): + """Print and save options + + It will print both current options and default values(if different). + It will save options into a text file / [checkpoints_dir] / opt.txt + """ + message = '' + message += '----------------- Options ---------------\n' + for k, v in sorted(vars(opt).items()): + comment = '' + default = self.parser.get_default(k) + if v != default: + comment = '\t[default: %s]' % str(default) + message += '{:>25}: {:<30}{}\n'.format(str(k), str(v), comment) + message += '----------------- End -------------------' + print(message) + + # save to the disk + """ + expr_dir = os.path.join(opt.checkpoints_dir, opt.name) + util.mkdirs(expr_dir) + file_name = os.path.join(expr_dir, '{}_opt.txt'.format(opt.phase)) + with open(file_name, 'wt') as opt_file: + opt_file.write(message) + opt_file.write('\n') + """ + + def parse(self): + """Parse our options, create checkpoints directory suffix, and set up gpu device.""" + opt = self.gather_options() + opt.isTrain = self.isTrain # train or test + + # process opt.suffix + if opt.suffix: + suffix = ('_' + opt.suffix.format(**vars(opt))) if opt.suffix != '' else '' + opt.name = opt.name + suffix + + # self.print_options(opt) + + # set gpu ids + str_ids = opt.gpu_ids.split(',') + opt.gpu_ids = [] + for str_id in str_ids: + id = int(str_id) + if id >= 0: + opt.gpu_ids.append(id) + # if len(opt.gpu_ids) > 0: + # torch.cuda.set_device(opt.gpu_ids[0]) + + self.opt = opt + return self.opt + + +def impatch(image, rect): + # Extract the given patch pixels from a given image. + w1 = rect[0] + h1 = rect[1] + w2 = w1 + rect[2] + h2 = h1 + rect[3] + image_patch = image[h1:h2, w1:w2] + return image_patch + + +class ImageandPatchs: + def __init__(self, root_dir, name, patchsinfo, rgb_image, scale=1): + self.root_dir = root_dir + self.patchsinfo = patchsinfo + self.name = name + self.patchs = patchsinfo + self.scale = scale + + self.rgb_image = cv2.resize(rgb_image, (round(rgb_image.shape[1] * scale), round(rgb_image.shape[0] * scale)), + interpolation=cv2.INTER_CUBIC) + + self.do_have_estimate = False + self.estimation_updated_image = None + self.estimation_base_image = None + + def __len__(self): + return len(self.patchs) + + def set_base_estimate(self, est): + self.estimation_base_image = est + if self.estimation_updated_image is not None: + self.do_have_estimate = True + + def set_updated_estimate(self, est): + self.estimation_updated_image = est + if self.estimation_base_image is not None: + self.do_have_estimate = True + + def __getitem__(self, index): + patch_id = int(self.patchs[index][0]) + rect = np.array(self.patchs[index][1]['rect']) + msize = self.patchs[index][1]['size'] + + ## applying scale to rect: + rect = np.round(rect * self.scale) + rect = rect.astype('int') + msize = round(msize * self.scale) + + patch_rgb = impatch(self.rgb_image, rect) + if self.do_have_estimate: + patch_whole_estimate_base = impatch(self.estimation_base_image, rect) + patch_whole_estimate_updated = impatch(self.estimation_updated_image, rect) + return {'patch_rgb': patch_rgb, 'patch_whole_estimate_base': patch_whole_estimate_base, + 'patch_whole_estimate_updated': patch_whole_estimate_updated, 'rect': rect, + 'size': msize, 'id': patch_id} + else: + return {'patch_rgb': patch_rgb, 'rect': rect, 'size': msize, 'id': patch_id} + + def print_options(self, opt): + """Print and save options + + It will print both current options and default values(if different). + It will save options into a text file / [checkpoints_dir] / opt.txt + """ + message = '' + message += '----------------- Options ---------------\n' + for k, v in sorted(vars(opt).items()): + comment = '' + default = self.parser.get_default(k) + if v != default: + comment = '\t[default: %s]' % str(default) + message += '{:>25}: {:<30}{}\n'.format(str(k), str(v), comment) + message += '----------------- End -------------------' + print(message) + + # save to the disk + """ + expr_dir = os.path.join(opt.checkpoints_dir, opt.name) + util.mkdirs(expr_dir) + file_name = os.path.join(expr_dir, '{}_opt.txt'.format(opt.phase)) + with open(file_name, 'wt') as opt_file: + opt_file.write(message) + opt_file.write('\n') + """ + + def parse(self): + """Parse our options, create checkpoints directory suffix, and set up gpu device.""" + opt = self.gather_options() + opt.isTrain = self.isTrain # train or test + + # process opt.suffix + if opt.suffix: + suffix = ('_' + opt.suffix.format(**vars(opt))) if opt.suffix != '' else '' + opt.name = opt.name + suffix + + # self.print_options(opt) + + # set gpu ids + str_ids = opt.gpu_ids.split(',') + opt.gpu_ids = [] + for str_id in str_ids: + id = int(str_id) + if id >= 0: + opt.gpu_ids.append(id) + # if len(opt.gpu_ids) > 0: + # torch.cuda.set_device(opt.gpu_ids[0]) + + self.opt = opt + return self.opt + + +def estimateboost(img, model, model_type, pix2pixmodel, whole_size_threshold): + pix2pixsize = 1024 # TODO: pix2pixsize and whole_size_threshold to setting? + + if model_type == 0: # leres + net_receptive_field_size = 448 + elif model_type == 1: # dpt_beit_large_512 + net_receptive_field_size = 512 + elif model_type == 11: # depth_anything + net_receptive_field_size = 518 + elif model_type in [12, 13, 14]: # depth_anything_v2 + net_receptive_field_size = 518 + else: # other midas # TODO Marigold support + net_receptive_field_size = 384 + patch_netsize = 2 * net_receptive_field_size + # Good luck trying to use zoedepth + + gc.collect() + backbone.torch_gc() + + # Generate mask used to smoothly blend the local pathc estimations to the base estimate. + # It is arbitrarily large to avoid artifacts during rescaling for each crop. + mask_org = generatemask((3000, 3000)) + mask = mask_org.copy() + + # Value x of R_x defined in the section 5 of the main paper. + r_threshold_value = 0.2 + # if R0: + # r_threshold_value = 0 + + input_resolution = img.shape + scale_threshold = 3 # Allows up-scaling with a scale up to 3 + + # Find the best input resolution R-x. The resolution search described in section 5-double estimation of the main paper and section B of the + # supplementary material. + whole_image_optimal_size, patch_scale = calculateprocessingres(img, net_receptive_field_size, r_threshold_value, + scale_threshold, whole_size_threshold) + + print('wholeImage being processed in :', whole_image_optimal_size) + + # Generate the base estimate using the double estimation. + whole_estimate = doubleestimate(img, net_receptive_field_size, whole_image_optimal_size, pix2pixsize, model, + model_type, pix2pixmodel) + + # Compute the multiplier described in section 6 of the main paper to make sure our initial patch can select + # small high-density regions of the image. + factor = max(min(1, 4 * patch_scale * whole_image_optimal_size / whole_size_threshold), 0.2) + print('Adjust factor is:', 1 / factor) + + # Compute the default target resolution. + if img.shape[0] > img.shape[1]: + a = 2 * whole_image_optimal_size + b = round(2 * whole_image_optimal_size * img.shape[1] / img.shape[0]) + else: + a = round(2 * whole_image_optimal_size * img.shape[0] / img.shape[1]) + b = 2 * whole_image_optimal_size + b = int(round(b / factor)) + a = int(round(a / factor)) + + """ + # recompute a, b and saturate to max res. + if max(a,b) > max_res: + print('Default Res is higher than max-res: Reducing final resolution') + if img.shape[0] > img.shape[1]: + a = max_res + b = round(option.max_res * img.shape[1] / img.shape[0]) + else: + a = round(option.max_res * img.shape[0] / img.shape[1]) + b = max_res + b = int(b) + a = int(a) + """ + + img = cv2.resize(img, (b, a), interpolation=cv2.INTER_CUBIC) + + # Extract selected patches for local refinement + base_size = net_receptive_field_size * 2 + patchset = generatepatchs(img, base_size, factor) + + print('Target resolution: ', img.shape) + + # Computing a scale in case user prompted to generate the results as the same resolution of the input. + # Notice that our method output resolution is independent of the input resolution and this parameter will only + # enable a scaling operation during the local patch merge implementation to generate results with the same resolution + # as the input. + """ + if output_resolution == 1: + mergein_scale = input_resolution[0] / img.shape[0] + print('Dynamicly change merged-in resolution; scale:', mergein_scale) + else: + mergein_scale = 1 + """ + # always rescale to input res for now + mergein_scale = input_resolution[0] / img.shape[0] + + imageandpatchs = ImageandPatchs('', '', patchset, img, mergein_scale) + whole_estimate_resized = cv2.resize(whole_estimate, (round(img.shape[1] * mergein_scale), + round(img.shape[0] * mergein_scale)), + interpolation=cv2.INTER_CUBIC) + imageandpatchs.set_base_estimate(whole_estimate_resized.copy()) + imageandpatchs.set_updated_estimate(whole_estimate_resized.copy()) + + print('Resulting depthmap resolution will be :', whole_estimate_resized.shape[:2]) + print('patches to process: ' + str(len(imageandpatchs))) + + # Enumerate through all patches, generate their estimations and refining the base estimate. + for patch_ind in range(len(imageandpatchs)): + + # Get patch information + patch = imageandpatchs[patch_ind] # patch object + patch_rgb = patch['patch_rgb'] # rgb patch + patch_whole_estimate_base = patch['patch_whole_estimate_base'] # corresponding patch from base + rect = patch['rect'] # patch size and location + patch_id = patch['id'] # patch ID + org_size = patch_whole_estimate_base.shape # the original size from the unscaled input + print('\t processing patch', patch_ind, '/', len(imageandpatchs) - 1, '|', rect) + + # We apply double estimation for patches. The high resolution value is fixed to twice the receptive + # field size of the network for patches to accelerate the process. + patch_estimation = doubleestimate(patch_rgb, net_receptive_field_size, patch_netsize, pix2pixsize, model, + model_type, pix2pixmodel) + patch_estimation = cv2.resize(patch_estimation, (pix2pixsize, pix2pixsize), interpolation=cv2.INTER_CUBIC) + patch_whole_estimate_base = cv2.resize(patch_whole_estimate_base, (pix2pixsize, pix2pixsize), + interpolation=cv2.INTER_CUBIC) + + # Merging the patch estimation into the base estimate using our merge network: + # We feed the patch estimation and the same region from the updated base estimate to the merge network + # to generate the target estimate for the corresponding region. + pix2pixmodel.set_input(patch_whole_estimate_base, patch_estimation) + + # Run merging network + pix2pixmodel.test() + visuals = pix2pixmodel.get_current_visuals() + + prediction_mapped = visuals['fake_B'] + prediction_mapped = (prediction_mapped + 1) / 2 + prediction_mapped = prediction_mapped.squeeze().cpu().numpy() + + mapped = prediction_mapped + + # We use a simple linear polynomial to make sure the result of the merge network would match the values of + # base estimate + p_coef = np.polyfit(mapped.reshape(-1), patch_whole_estimate_base.reshape(-1), deg=1) + merged = np.polyval(p_coef, mapped.reshape(-1)).reshape(mapped.shape) + + merged = cv2.resize(merged, (org_size[1], org_size[0]), interpolation=cv2.INTER_CUBIC) + + # Get patch size and location + w1 = rect[0] + h1 = rect[1] + w2 = w1 + rect[2] + h2 = h1 + rect[3] + + # To speed up the implementation, we only generate the Gaussian mask once with a sufficiently large size + # and resize it to our needed size while merging the patches. + if mask.shape != org_size: + mask = cv2.resize(mask_org, (org_size[1], org_size[0]), interpolation=cv2.INTER_LINEAR) + + tobemergedto = imageandpatchs.estimation_updated_image + + # Update the whole estimation: + # We use a simple Gaussian mask to blend the merged patch region with the base estimate to ensure seamless + # blending at the boundaries of the patch region. + tobemergedto[h1:h2, w1:w2] = np.multiply(tobemergedto[h1:h2, w1:w2], 1 - mask) + np.multiply(merged, mask) + imageandpatchs.set_updated_estimate(tobemergedto) + + # output + return cv2.resize(imageandpatchs.estimation_updated_image, (input_resolution[1], input_resolution[0]), + interpolation=cv2.INTER_CUBIC) + + +def generatemask(size): + # Generates a Guassian mask + mask = np.zeros(size, dtype=np.float32) + sigma = int(size[0] / 16) + k_size = int(2 * np.ceil(2 * int(size[0] / 16)) + 1) + mask[int(0.15 * size[0]):size[0] - int(0.15 * size[0]), int(0.15 * size[1]): size[1] - int(0.15 * size[1])] = 1 + mask = cv2.GaussianBlur(mask, (int(k_size), int(k_size)), sigma) + mask = (mask - mask.min()) / (mask.max() - mask.min()) + mask = mask.astype(np.float32) + return mask + + +def rgb2gray(rgb): + # Converts rgb to gray + return np.dot(rgb[..., :3], [0.2989, 0.5870, 0.1140]) + + +def resizewithpool(img, size): + i_size = img.shape[0] + n = int(np.floor(i_size / size)) + + out = skimage.measure.block_reduce(img, (n, n), np.max) + return out + + +def calculateprocessingres(img, basesize, confidence=0.1, scale_threshold=3, whole_size_threshold=3000): + # Returns the R_x resolution described in section 5 of the main paper. + + # Parameters: + # img :input rgb image + # basesize : size the dilation kernel which is equal to receptive field of the network. + # confidence: value of x in R_x; allowed percentage of pixels that are not getting any contextual cue. + # scale_threshold: maximum allowed upscaling on the input image ; it has been set to 3. + # whole_size_threshold: maximum allowed resolution. (R_max from section 6 of the main paper) + + # Returns: + # outputsize_scale*speed_scale :The computed R_x resolution + # patch_scale: K parameter from section 6 of the paper + + # speed scale parameter is to process every image in a smaller size to accelerate the R_x resolution search + speed_scale = 32 + image_dim = int(min(img.shape[0:2])) + + gray = rgb2gray(img) + grad = np.abs(cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)) + np.abs(cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)) + grad = cv2.resize(grad, (image_dim, image_dim), cv2.INTER_AREA) + + # thresholding the gradient map to generate the edge-map as a proxy of the contextual cues + m = grad.min() + M = grad.max() + middle = m + (0.4 * (M - m)) + grad[grad < middle] = 0 + grad[grad >= middle] = 1 + + # dilation kernel with size of the receptive field + kernel = np.ones((int(basesize / speed_scale), int(basesize / speed_scale)), float) + # dilation kernel with size of the a quarter of receptive field used to compute k + # as described in section 6 of main paper + kernel2 = np.ones((int(basesize / (4 * speed_scale)), int(basesize / (4 * speed_scale))), float) + + # Output resolution limit set by the whole_size_threshold and scale_threshold. + threshold = min(whole_size_threshold, scale_threshold * max(img.shape[:2])) + + outputsize_scale = basesize / speed_scale + for p_size in range(int(basesize / speed_scale), int(threshold / speed_scale), int(basesize / (2 * speed_scale))): + grad_resized = resizewithpool(grad, p_size) + grad_resized = cv2.resize(grad_resized, (p_size, p_size), cv2.INTER_NEAREST) + grad_resized[grad_resized >= 0.5] = 1 + grad_resized[grad_resized < 0.5] = 0 + + dilated = cv2.dilate(grad_resized, kernel, iterations=1) + meanvalue = (1 - dilated).mean() + if meanvalue > confidence: + break + else: + outputsize_scale = p_size + + grad_region = cv2.dilate(grad_resized, kernel2, iterations=1) + patch_scale = grad_region.mean() + + return int(outputsize_scale * speed_scale), patch_scale + + +# Generate a double-input depth estimation +def doubleestimate(img, size1, size2, pix2pixsize, model, net_type, pix2pixmodel): + # Generate the low resolution estimation + estimate1 = singleestimate(img, size1, model, net_type) + # Resize to the inference size of merge network. + estimate1 = cv2.resize(estimate1, (pix2pixsize, pix2pixsize), interpolation=cv2.INTER_CUBIC) + + # Generate the high resolution estimation + estimate2 = singleestimate(img, size2, model, net_type) + # Resize to the inference size of merge network. + estimate2 = cv2.resize(estimate2, (pix2pixsize, pix2pixsize), interpolation=cv2.INTER_CUBIC) + + # Inference on the merge model + pix2pixmodel.set_input(estimate1, estimate2) + pix2pixmodel.test() + visuals = pix2pixmodel.get_current_visuals() + prediction_mapped = visuals['fake_B'] + prediction_mapped = (prediction_mapped + 1) / 2 + prediction_mapped = (prediction_mapped - torch.min(prediction_mapped)) / ( + torch.max(prediction_mapped) - torch.min(prediction_mapped)) + prediction_mapped = prediction_mapped.squeeze().cpu().numpy() + + return prediction_mapped + + +# Generate a single-input depth estimation +def singleestimate(img, msize, model, net_type): + if net_type == 0: + return estimateleres(img, model, msize, msize) + elif net_type == 10: + return estimatemarigold(img, model, msize, msize) + elif net_type == 11: + return estimatedepthanything(img, model, msize, msize) + elif net_type in [12, 13, 14]: + return estimatedepthanything_v2(img, model, msize, msize) + elif net_type >= 7: + # np to PIL + return estimatezoedepth(Image.fromarray(np.uint8(img * 255)).convert('RGB'), model, msize, msize) + else: + return estimatemidasBoost(img, model, msize, msize) + + +# Generating local patches to perform the local refinement described in section 6 of the main paper. +def generatepatchs(img, base_size, factor): + # Compute the gradients as a proxy of the contextual cues. + img_gray = rgb2gray(img) + whole_grad = np.abs(cv2.Sobel(img_gray, cv2.CV_64F, 0, 1, ksize=3)) + \ + np.abs(cv2.Sobel(img_gray, cv2.CV_64F, 1, 0, ksize=3)) + + threshold = whole_grad[whole_grad > 0].mean() + whole_grad[whole_grad < threshold] = 0 + + # We use the integral image to speed-up the evaluation of the amount of gradients for each patch. + gf = whole_grad.sum() / len(whole_grad.reshape(-1)) + grad_integral_image = cv2.integral(whole_grad) + + # Variables are selected such that the initial patch size would be the receptive field size + # and the stride is set to 1/3 of the receptive field size. + blsize = int(round(base_size / 2)) + stride = int(round(blsize * 0.75)) + + # Get initial Grid + patch_bound_list = applyGridpatch(blsize, stride, img, [0, 0, 0, 0]) + + # Refine initial Grid of patches by discarding the flat (in terms of gradients of the rgb image) ones. Refine + # each patch size to ensure that there will be enough depth cues for the network to generate a consistent depth map. + print("Selecting patches ...") + patch_bound_list = adaptiveselection(grad_integral_image, patch_bound_list, gf, factor) + + # Sort the patch list to make sure the merging operation will be done with the correct order: starting from biggest + # patch + patchset = sorted(patch_bound_list.items(), key=lambda x: getitem(x[1], 'size'), reverse=True) + return patchset + + +def applyGridpatch(blsize, stride, img, box): + # Extract a simple grid patch. + counter1 = 0 + patch_bound_list = {} + for k in range(blsize, img.shape[1] - blsize, stride): + for j in range(blsize, img.shape[0] - blsize, stride): + patch_bound_list[str(counter1)] = {} + patchbounds = [j - blsize, k - blsize, j - blsize + 2 * blsize, k - blsize + 2 * blsize] + patch_bound = [box[0] + patchbounds[1], box[1] + patchbounds[0], patchbounds[3] - patchbounds[1], + patchbounds[2] - patchbounds[0]] + patch_bound_list[str(counter1)]['rect'] = patch_bound + patch_bound_list[str(counter1)]['size'] = patch_bound[2] + counter1 = counter1 + 1 + return patch_bound_list + + +# Adaptively select patches +def adaptiveselection(integral_grad, patch_bound_list, gf, factor): + patchlist = {} + count = 0 + height, width = integral_grad.shape + + search_step = int(32 / factor) + + # Go through all patches + for c in range(len(patch_bound_list)): + # Get patch + bbox = patch_bound_list[str(c)]['rect'] + + # Compute the amount of gradients present in the patch from the integral image. + cgf = getGF_fromintegral(integral_grad, bbox) / (bbox[2] * bbox[3]) + + # Check if patching is beneficial by comparing the gradient density of the patch to + # the gradient density of the whole image + if cgf >= gf: + bbox_test = bbox.copy() + patchlist[str(count)] = {} + + # Enlarge each patch until the gradient density of the patch is equal + # to the whole image gradient density + while True: + + bbox_test[0] = bbox_test[0] - int(search_step / 2) + bbox_test[1] = bbox_test[1] - int(search_step / 2) + + bbox_test[2] = bbox_test[2] + search_step + bbox_test[3] = bbox_test[3] + search_step + + # Check if we are still within the image + if bbox_test[0] < 0 or bbox_test[1] < 0 or bbox_test[1] + bbox_test[3] >= height \ + or bbox_test[0] + bbox_test[2] >= width: + break + + # Compare gradient density + cgf = getGF_fromintegral(integral_grad, bbox_test) / (bbox_test[2] * bbox_test[3]) + if cgf < gf: + break + bbox = bbox_test.copy() + + # Add patch to selected patches + patchlist[str(count)]['rect'] = bbox + patchlist[str(count)]['size'] = bbox[2] + count = count + 1 + + # Return selected patches + return patchlist + + +def getGF_fromintegral(integralimage, rect): + # Computes the gradient density of a given patch from the gradient integral image. + x1 = rect[1] + x2 = rect[1] + rect[3] + y1 = rect[0] + y2 = rect[0] + rect[2] + value = integralimage[x2, y2] - integralimage[x1, y2] - integralimage[x2, y1] + integralimage[x1, y1] + return value + + +def estimatemidasBoost(img, model, w, h): + # init transform + transform = Compose( + [ + Resize( + w, + h, + resize_target=None, + keep_aspect_ratio=True, + ensure_multiple_of=32, + resize_method="upper_bound", + image_interpolation_method=cv2.INTER_CUBIC, + ), + NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + PrepareForNet(), + ] + ) + + # transform input + img_input = transform({"image": img})["image"] + + # compute + with torch.no_grad(): + sample = torch.from_numpy(img_input).to(depthmap_device).unsqueeze(0) + if depthmap_device == torch.device("cuda"): + sample = sample.to(memory_format=torch.channels_last) + prediction = model.forward(sample) + + prediction = prediction.squeeze().cpu().numpy() + prediction = cv2.resize(prediction, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC) + + # normalization + depth_min = prediction.min() + depth_max = prediction.max() + + if depth_max - depth_min > np.finfo("float").eps: + prediction = (prediction - depth_min) / (depth_max - depth_min) + else: + prediction = 0 + + return prediction diff --git a/src/gradio_args_transport.py b/src/gradio_args_transport.py new file mode 100644 index 0000000000000000000000000000000000000000..35ceba8af2608460f4940b6355ea203d96987caa --- /dev/null +++ b/src/gradio_args_transport.py @@ -0,0 +1,92 @@ +import gradio as gr + +class GradioComponentBundle: + """Allows easier transportation of massive ammount of named gradio inputs. + Allows adding visibility rules quicker.""" + def __init__(self): + self.internal = {} + self.internal_ignored = {} + + def _raw_assignment(self, key, value, ignored=False): + assert key not in self.internal, f"Already bundled component with name {key}." + assert key not in self.internal_ignored, f"Already bundled component with name {key}." + if not ignored: + self.internal[key] = value + else: + self.internal_ignored[key] = value + + def _append_el(self, thing, ignored=False): + if isinstance(thing, tuple) and len(thing) == 2 and isinstance(thing[1], gr.blocks.Block): + name = thing[0] if isinstance(thing[0], str) else thing[0].name.lower() # .name is for Enums + if hasattr(thing[0], 'df') and thing[0].df is not None: + thing[1].value = thing[0].df + self._raw_assignment(name, thing[1], ignored) + elif isinstance(thing, gr.components.Component) and thing.elem_id is not None: + self._raw_assignment(thing.elem_id, thing, ignored) + else: + raise Exception(f"This object can not be bundled, {str(thing)}") + + def __iadd__(self, els): + """Add an input element that will be packed into a bundle.""" + self._append_el(els, ignored=False) + return self + + def __isub__(self, els): + """Add an element that will not be packed into a bundle, but will be accessible.""" + self._append_el(els, ignored=True) + return self + + def __ior__(self, thing): + """Add an extra bundle into your bundle, so you could have more bundeled items in your bundle.""" + assert isinstance(thing, GradioComponentBundle), "Use += or -= for bundling elements" + for key in list(thing.internal.keys()): + self._raw_assignment(key, thing[key], False) + for key in list(thing.internal_ignored.keys()): + self._raw_assignment(key, thing[key], True) + return self + + def __getitem__(self, key): + """Return the gradio component elem_id""" + if hasattr(key, 'name'): + key = key.name.lower() # for enum elements + if key in self.internal_ignored: + return self.internal_ignored[key] + return self.internal[key] + + def __contains__(self, key): + if hasattr(key, 'name'): + key = key.name.lower() # for enum elements + return key in self.internal_ignored or key in self.internal + + def enkey_tail(self): + """Must be the last element of the bundle for unbundling to work""" + keys = sorted(list(self.internal.keys())) + head = gr.HTML(elem_id="zzz_depthmap_enkey", value="\u222F" + "\u222F".join(keys), visible=False) + return head + + def enkey_body(self): + """This is what should be passed into the function that is called by gradio""" + return [self.internal[x] for x in sorted(list(self.internal.keys()))] + + def add_rule(self, first, rule, second): + first = self[first] if first in self else first + second = self[second] if second in self else second + if rule == 'visible-if-not': + second.change(fn=lambda v: first.update(visible=not v), inputs=[second], outputs=[first]) + elif rule == 'visible-if': + second.change(fn=lambda v: first.update(visible=v), inputs=[second], outputs=[first]) + else: + raise Exception(f'Unknown rule type {rule}') + + @staticmethod + def enkey_to_dict(inp): + """Unbundle: get a dictionary with stuff after it is sent bby the gradio to the function. + Enkey format: bunch of Gradio components, + then a Gradio component, which value is concatination of names of the previous Gradio objects""" + assert inp[-1].startswith("\u222F") + ret = {} + names = inp[-1].split("\u222F")[1:] + assert len(names) == len(inp) - 1 + for i, name in enumerate(names): + ret[name] = inp[i] + return ret diff --git a/src/misc.py b/src/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..e611c1891c7dbf7047f51f021931488cfa8b86f4 --- /dev/null +++ b/src/misc.py @@ -0,0 +1,44 @@ +import subprocess +import os +import pathlib +import builtins + +def get_commit_hash(): + try: + file_path = pathlib.Path(__file__).parent + return subprocess.check_output( + [os.environ.get("GIT", "git"), "rev-parse", "HEAD"], + cwd=file_path, shell=False, stderr=subprocess.DEVNULL, encoding='utf8').strip()[0:8] + except Exception: + return "" + + +REPOSITORY_NAME = "stable-diffusion-webui-depthmap-script" +SCRIPT_NAME = "DepthMap" +SCRIPT_VERSION = "v0.4.8" +SCRIPT_FULL_NAME = f"{SCRIPT_NAME} {SCRIPT_VERSION} ({get_commit_hash()})" + + +# # Returns SHA256 hash of a file +# import hashlib +# def sha256sum(filename): +# with open(filename, 'rb', buffering=0) as f: +# return hashlib.file_digest(f, 'sha256').hexdigest() +def ensure_file_downloaded(filename, url, sha256_hash_prefix=None): + import torch + # Do not check the hash every time - it is somewhat time-consumin + if os.path.exists(filename): + return + + if type(url) is not list: + url = [url] + for cur_url in url: + try: + print("Downloading", cur_url, "to", filename) + torch.hub.download_url_to_file(cur_url, filename, sha256_hash_prefix) + if os.path.exists(filename): + return # The correct model was downloaded, no need to try more + except: + pass + raise RuntimeError(f'Download failed. ' + f'Try again later or manually download the file {filename} to location {url}.') diff --git a/src/normalmap_generation.py b/src/normalmap_generation.py new file mode 100644 index 0000000000000000000000000000000000000000..85ecb074e5e96446b9817608dd021e8a3e89f39c --- /dev/null +++ b/src/normalmap_generation.py @@ -0,0 +1,56 @@ +import numpy as np +import cv2 +from PIL import Image + +def create_normalmap(depthmap, + pre_blur = None, sobel_gradient = 3, post_blur = None, + invert=False): + """Generates normalmaps. + :param depthmap: depthmap that will be used to generate normalmap + :param pre_blur: apply gaussian blur before taking gradient, -1 for disable, otherwise kernel size + :param sobel_gradient: use Sobel gradient, None for regular gradient, otherwise kernel size + :param post_blur: apply gaussian blur after taking gradient, -1 for disable, otherwise kernel size + :param invert: depthmap will be inverted before calculating normalmap + """ + # https://stackoverflow.com/questions/53350391/surface-normal-calculation-from-depth-map-in-python + # TODO: Tiling can be improved (gradients could be matched). + # TODO: Implement bilateral filtering (16 bit deflickering) + + # We invert by default, maybe there is a negative sign hiding somewhere + normalmap = depthmap if invert else depthmap * (-1.0) + normalmap = normalmap / 256.0 + # pre blur (only blurs z-axis) + if pre_blur is not None and pre_blur > 0: + normalmap = cv2.GaussianBlur(normalmap, (pre_blur, pre_blur), pre_blur) + + # take gradients + if sobel_gradient is not None and sobel_gradient > 0: + zx = cv2.Sobel(np.float64(normalmap), cv2.CV_64F, 1, 0, ksize=sobel_gradient) + zy = cv2.Sobel(np.float64(normalmap), cv2.CV_64F, 0, 1, ksize=sobel_gradient) + else: + zy, zx = np.gradient(normalmap) + + # combine and normalize gradients + normal = np.dstack((zx, -zy, np.ones_like(normalmap))) + # every pixel of a normal map is a normal vector, it should be a unit vector + n = np.linalg.norm(normal, axis=2) + normal[:, :, 0] /= n + normal[:, :, 1] /= n + normal[:, :, 2] /= n + + # TODO: this probably is not a good way to do it + if post_blur is not None and post_blur > 0: + normal = cv2.GaussianBlur(normal, (post_blur, post_blur), post_blur) + # Normalize every vector again + n = np.linalg.norm(normal, axis=2) + normal[:, :, 0] /= n + normal[:, :, 1] /= n + normal[:, :, 2] /= n + + # offset and rescale values to be in 0-255, so we can export them + normal += 1 + normal /= 2 + normal = np.clip(normal * 256, 0, 256 - 0.1) # Clipping form above is needed to avoid overflowing + normal = normal.astype(np.uint8) + + return Image.fromarray(normal) diff --git a/src/stereoimage_generation.py b/src/stereoimage_generation.py new file mode 100644 index 0000000000000000000000000000000000000000..504f4c2fd4ab9de43b94af2f4b2e55e8e968b7f8 --- /dev/null +++ b/src/stereoimage_generation.py @@ -0,0 +1,307 @@ +try: + from numba import njit, prange +except Exception as e: + print(f"WARINING! Numba failed to import! Stereoimage generation will be much slower! ({str(e)})") + from builtins import range as prange + def njit(parallel=False): + def Inner(func): return lambda *args, **kwargs: func(*args, **kwargs) + return Inner +import numpy as np +from PIL import Image + + +def create_stereoimages(original_image, depthmap, divergence, separation=0.0, modes=None, + stereo_balance=0.0, stereo_offset_exponent=1.0, fill_technique='polylines_sharp'): + """Creates stereoscopic images. + An effort is made to make them look nice, but beware that the resulting image will have some distortion. + The correctness was not rigorously tested. + + :param original_image: original image from which the 3D image (stereoimage) will be created + :param depthmap: depthmap corresponding to the original image. White = near, black = far. + :param float divergence: the measure of 3D effect, in percentages. + A good value will likely be somewhere in the [0.05; 10.0) interval. + :param float separation: measure by how much to move two halves of the stereoimage apart from each-other. + Measured in percentages. Negative values move two parts closer together. + Affects which parts of the image will be visible in left and/or right half. + :param list modes: how the result will look like. By default only 'left-right' is generated + - a picture for the left eye will be on the left and the picture from the right eye - on the right. + Some of the supported modes are: 'left-right', 'right-left', 'top-bottom', 'bottom-top', 'red-cyan-anaglyph'. + :param float stereo_balance: has to do with how the divergence will be split among the two parts of the image, + must be in the [-1.0; 1.0] interval. + :param float stereo_offset_exponent: Higher values move objects residing + between close and far plane more to the far plane + :param str fill_technique: applying divergence inevitably creates some gaps in the image. + This parameter specifies the technique that will be used to fill in the blanks in the two resulting images. + Must be one of the following: 'none', 'naive', 'naive_interpolating', 'polylines_soft', 'polylines_sharp'. + """ + if modes is None: + modes = ['left-right'] + if not isinstance(modes, list): + modes = [modes] + if len(modes) == 0: + return [] + + original_image = np.asarray(original_image) + balance = (stereo_balance + 1) / 2 + left_eye = original_image if balance < 0.001 else \ + apply_stereo_divergence(original_image, depthmap, +1 * divergence * balance, -1 * separation, + stereo_offset_exponent, fill_technique) + right_eye = original_image if balance > 0.999 else \ + apply_stereo_divergence(original_image, depthmap, -1 * divergence * (1 - balance), separation, + stereo_offset_exponent, fill_technique) + + results = [] + for mode in modes: + if mode == 'left-right': # Most popular format. Common use case: displaying in HMD. + results.append(np.hstack([left_eye, right_eye])) + elif mode == 'right-left': # Cross-viewing + results.append(np.hstack([right_eye, left_eye])) + elif mode == 'top-bottom': + results.append(np.vstack([left_eye, right_eye])) + elif mode == 'bottom-top': + results.append(np.vstack([right_eye, left_eye])) + elif mode == 'red-cyan-anaglyph': # Anaglyth glasses + results.append(overlap_red_cyan(left_eye, right_eye)) + elif mode == 'left-only': + results.append(left_eye) + elif mode == 'only-right': + results.append(right_eye) + elif mode == 'cyan-red-reverseanaglyph': # Anaglyth glasses worn upside down + # Better for people whose main eye is left + results.append(overlap_red_cyan(right_eye, left_eye)) + else: + raise Exception('Unknown mode') + return [Image.fromarray(r) for r in results] + + +def apply_stereo_divergence(original_image, depth, divergence, separation, stereo_offset_exponent, fill_technique): + assert original_image.shape[:2] == depth.shape, 'Depthmap and the image must have the same size' + depth_min = depth.min() + depth_max = depth.max() + normalized_depth = (depth - depth_min) / (depth_max - depth_min) + divergence_px = (divergence / 100.0) * original_image.shape[1] + separation_px = (separation / 100.0) * original_image.shape[1] + + if fill_technique in ['none', 'naive', 'naive_interpolating']: + return apply_stereo_divergence_naive( + original_image, normalized_depth, divergence_px, separation_px, stereo_offset_exponent, fill_technique + ) + if fill_technique in ['polylines_soft', 'polylines_sharp']: + return apply_stereo_divergence_polylines( + original_image, normalized_depth, divergence_px, separation_px, stereo_offset_exponent, fill_technique + ) + + +@njit(parallel=False) +def apply_stereo_divergence_naive( + original_image, normalized_depth, divergence_px: float, separation_px: float, stereo_offset_exponent: float, + fill_technique: str): + h, w, c = original_image.shape + + derived_image = np.zeros_like(original_image) + filled = np.zeros(h * w, dtype=np.uint8) + + for row in prange(h): + # Swipe order should ensure that pixels that are closer overwrite + # (at their destination) pixels that are less close + for col in range(w) if divergence_px < 0 else range(w - 1, -1, -1): + col_d = col + int((normalized_depth[row][col] ** stereo_offset_exponent) * divergence_px + separation_px) + if 0 <= col_d < w: + derived_image[row][col_d] = original_image[row][col] + filled[row * w + col_d] = 1 + + # Fill the gaps + if fill_technique == 'naive_interpolating': + for row in range(h): + for l_pointer in range(w): + # This if (and the next if) performs two checks that are almost the same - for performance reasons + if sum(derived_image[row][l_pointer]) != 0 or filled[row * w + l_pointer]: + continue + l_border = derived_image[row][l_pointer - 1] if l_pointer > 0 else np.zeros(3, dtype=np.uint8) + r_border = np.zeros(3, dtype=np.uint8) + r_pointer = l_pointer + 1 + while r_pointer < w: + if sum(derived_image[row][r_pointer]) != 0 and filled[row * w + r_pointer]: + r_border = derived_image[row][r_pointer] + break + r_pointer += 1 + if sum(l_border) == 0: + l_border = r_border + elif sum(r_border) == 0: + r_border = l_border + # Example illustrating positions of pointers at this point in code: + # is filled? : + - - - - + + # pointers : l r + # interpolated: 0 1 2 3 4 5 + # In total: 5 steps between two filled pixels + total_steps = 1 + r_pointer - l_pointer + step = (r_border.astype(np.float_) - l_border) / total_steps + for col in range(l_pointer, r_pointer): + derived_image[row][col] = l_border + (step * (col - l_pointer + 1)).astype(np.uint8) + return derived_image + elif fill_technique == 'naive': + derived_fix = np.copy(derived_image) + for pos in np.where(filled == 0)[0]: + row = pos // w + col = pos % w + row_times_w = row * w + for offset in range(1, abs(int(divergence_px)) + 2): + r_offset = col + offset + l_offset = col - offset + if r_offset < w and filled[row_times_w + r_offset]: + derived_fix[row][col] = derived_image[row][r_offset] + break + if 0 <= l_offset and filled[row_times_w + l_offset]: + derived_fix[row][col] = derived_image[row][l_offset] + break + return derived_fix + else: # none + return derived_image + + +@njit(parallel=True) # fastmath=True does not reasonably improve performance +def apply_stereo_divergence_polylines( + original_image, normalized_depth, divergence_px: float, separation_px: float, stereo_offset_exponent: float, + fill_technique: str): + # This code treats rows of the image as polylines + # It generates polylines, morphs them (applies divergence) to them, and then rasterizes them + EPSILON = 1e-7 + PIXEL_HALF_WIDTH = 0.45 if fill_technique == 'polylines_sharp' else 0.0 + # PERF_COUNTERS = [0, 0, 0] + + h, w, c = original_image.shape + derived_image = np.zeros_like(original_image) + for row in prange(h): + # generating the vertices of the morphed polyline + # format: new coordinate of the vertex, divergence (closeness), column of pixel that contains the point's color + pt = np.zeros((5 + 2 * w, 3), dtype=np.float_) + pt_end: int = 0 + pt[pt_end] = [-1.0 * w, 0.0, 0.0] + pt_end += 1 + for col in range(0, w): + coord_d = (normalized_depth[row][col] ** stereo_offset_exponent) * divergence_px + coord_x = col + 0.5 + coord_d + separation_px + if PIXEL_HALF_WIDTH < EPSILON: + pt[pt_end] = [coord_x, abs(coord_d), col] + pt_end += 1 + else: + pt[pt_end] = [coord_x - PIXEL_HALF_WIDTH, abs(coord_d), col] + pt[pt_end + 1] = [coord_x + PIXEL_HALF_WIDTH, abs(coord_d), col] + pt_end += 2 + pt[pt_end] = [2.0 * w, 0.0, w - 1] + pt_end += 1 + + # generating the segments of the morphed polyline + # format: coord_x, coord_d, color_i of the first point, then the same for the second point + sg_end: int = pt_end - 1 + sg = np.zeros((sg_end, 6), dtype=np.float_) + for i in range(sg_end): + sg[i] += np.concatenate((pt[i], pt[i + 1])) + # Here is an informal proof that this (morphed) polyline does not self-intersect: + # Draw a plot with two axes: coord_x and coord_d. Now draw the original line - it will be positioned at the + # bottom of the graph (that is, for every point coord_d == 0). Now draw the morphed line using the vertices of + # the original polyline. Observe that for each vertex in the new polyline, its increments + # (from the corresponding vertex in the old polyline) over coord_x and coord_d are in direct proportion. + # In fact, this proportion is equal for all the vertices and it is equal either -1 or +1, + # depending on the sign of divergence_px. Now draw the lines from each old vertex to a corresponding new vertex. + # Since the proportions are equal, these lines have the same angle with an axe and are parallel. + # So, these lines do not intersect. Now rotate the plot by 45 or -45 degrees and observe that + # each dot of the polyline is further right from the last dot, + # which makes it impossible for the polyline to self-intersect. QED. + + # sort segments and points using insertion sort + # has a very good performance in practice, since these are almost sorted to begin with + for i in range(1, sg_end): + u = i - 1 + while pt[u][0] > pt[u + 1][0] and 0 <= u: + pt[u], pt[u + 1] = np.copy(pt[u + 1]), np.copy(pt[u]) + sg[u], sg[u + 1] = np.copy(sg[u + 1]), np.copy(sg[u]) + u -= 1 + + # rasterizing + # at each point in time we keep track of segments that are "active" (or "current") + csg = np.zeros((5 * int(abs(divergence_px)) + 25, 6), dtype=np.float_) + csg_end: int = 0 + sg_pointer: int = 0 + # and index of the point that should be processed next + pt_i: int = 0 + for col in range(w): # iterate over regions (that will be rasterized into pixels) + color = np.full(c, 0.5, dtype=np.float_) # we start with 0.5 because of how floats are converted to ints + while pt[pt_i][0] < col: + pt_i += 1 + pt_i -= 1 # pt_i now points to the dot before the region start + # Finding segment' parts that contribute color to the region + while pt[pt_i][0] < col + 1: + coord_from = max(col, pt[pt_i][0]) + EPSILON + coord_to = min(col + 1, pt[pt_i + 1][0]) - EPSILON + significance = coord_to - coord_from + # the color at center point is the same as the average of color of segment part + coord_center = coord_from + 0.5 * significance + + # adding segments that now may contribute + while sg_pointer < sg_end and sg[sg_pointer][0] < coord_center: + csg[csg_end] = sg[sg_pointer] + sg_pointer += 1 + csg_end += 1 + # removing segments that will no longer contribute + csg_i = 0 + while csg_i < csg_end: + if csg[csg_i][3] < coord_center: + csg[csg_i] = csg[csg_end - 1] + csg_end -= 1 + else: + csg_i += 1 + # finding the closest segment (segment with most divergence) + # note that this segment will be the closest from coord_from right up to coord_to, since there + # no new segments "appearing" inbetween these two and _the polyline does not self-intersect_ + best_csg_i: int = 0 + # PERF_COUNTERS[0] += 1 + if csg_end != 1: + # PERF_COUNTERS[1] += 1 + best_csg_closeness: float = -EPSILON + for csg_i in range(csg_end): + ip_k = (coord_center - csg[csg_i][0]) / (csg[csg_i][3] - csg[csg_i][0]) + # assert 0.0 <= ip_k <= 1.0 + closeness = (1.0 - ip_k) * csg[csg_i][1] + ip_k * csg[csg_i][4] + if best_csg_closeness < closeness and 0.0 < ip_k < 1.0: + best_csg_closeness = closeness + best_csg_i = csg_i + # getting the color + col_l: int = int(csg[best_csg_i][2] + EPSILON) + col_r: int = int(csg[best_csg_i][5] + EPSILON) + if col_l == col_r: + color += original_image[row][col_l] * significance + else: + # PERF_COUNTERS[2] += 1 + ip_k = (coord_center - csg[best_csg_i][0]) / (csg[best_csg_i][3] - csg[best_csg_i][0]) + color += (original_image[row][col_l] * (1.0 - ip_k) + + original_image[row][col_r] * ip_k + ) * significance + pt_i += 1 + derived_image[row][col] = np.asarray(color, dtype=np.uint8) + # print(PERF_COUNTERS) + return derived_image + + +@njit(parallel=True) +def overlap_red_cyan(im1, im2): + width1 = im1.shape[1] + height1 = im1.shape[0] + width2 = im2.shape[1] + height2 = im2.shape[0] + + # final image + composite = np.zeros((height2, width2, 3), np.uint8) + + # iterate through "left" image, filling in red values of final image + for i in prange(height1): + for j in range(width1): + composite[i, j, 0] = im1[i, j, 0] + + # iterate through "right" image, filling in blue/green values of final image + for i in prange(height2): + for j in range(width2): + composite[i, j, 1] = im2[i, j, 1] + composite[i, j, 2] = im2[i, j, 2] + + return composite diff --git a/src/video_mode.py b/src/video_mode.py new file mode 100644 index 0000000000000000000000000000000000000000..629727ace5ee46b723d455e23a7440e6199c89b6 --- /dev/null +++ b/src/video_mode.py @@ -0,0 +1,175 @@ +import pathlib +import traceback + +from PIL import Image +import numpy as np +import os + +from src import core +from src import backbone +from src.common_constants import GenerationOptions as go + + +def open_path_as_images(path, maybe_depthvideo=False): + """Takes the filepath, returns (fps, frames). Every frame is a Pillow Image object""" + suffix = pathlib.Path(path).suffix + if suffix.lower() == '.gif': + frames = [] + img = Image.open(path) + for i in range(img.n_frames): + img.seek(i) + frames.append(img.convert('RGB')) + return 1000 / img.info['duration'], frames + if suffix.lower() == '.mts': + import imageio_ffmpeg + import av + container = av.open(path) + frames = [] + for packet in container.demux(video=0): + for frame in packet.decode(): + # Convert the frame to a NumPy array + numpy_frame = frame.to_ndarray(format='rgb24') + # Convert the NumPy array to a Pillow Image + image = Image.fromarray(numpy_frame) + frames.append(image) + fps = float(container.streams.video[0].average_rate) + container.close() + return fps, frames + if suffix.lower() in ['.avi'] and maybe_depthvideo: + try: + import imageio_ffmpeg + # Suppose there are in fact 16 bits per pixel + # If this is not the case, this is not a 16-bit depthvideo, so no need to process it this way + gen = imageio_ffmpeg.read_frames(path, pix_fmt='gray16le', bits_per_pixel=16) + video_info = next(gen) + if video_info['pix_fmt'] == 'gray16le': + width, height = video_info['size'] + frames = [] + for frame in gen: + # Not sure if this is implemented somewhere else + result = np.frombuffer(frame, dtype='uint16') + result.shape = (height, width) # Why does it work? I don't remotely have any idea. + frames += [Image.fromarray(result)] + # TODO: Wrapping frames into Pillow objects is wasteful + return video_info['fps'], frames + finally: + if 'gen' in locals(): + gen.close() + if suffix.lower() in ['.webm', '.mp4', '.avi']: + from moviepy.video.io.VideoFileClip import VideoFileClip + clip = VideoFileClip(path) + frames = [Image.fromarray(x) for x in list(clip.iter_frames())] + # TODO: Wrapping frames into Pillow objects is wasteful + return clip.fps, frames + else: + try: + return 1, [Image.open(path)] + except Exception as e: + raise Exception(f"Probably an unsupported file format: {suffix}") from e + + +def frames_to_video(fps, frames, path, name, colorvids_bitrate=None): + if frames[0].mode == 'I;16': # depthmap video + import imageio_ffmpeg + writer = imageio_ffmpeg.write_frames( + os.path.join(path, f"{name}.avi"), frames[0].size, 'gray16le', 'gray16le', fps, codec='ffv1', + macro_block_size=1) + try: + writer.send(None) + for frame in frames: + writer.send(np.array(frame)) + finally: + writer.close() + else: + arrs = [np.asarray(frame) for frame in frames] + from moviepy.video.io.ImageSequenceClip import ImageSequenceClip + clip = ImageSequenceClip(arrs, fps=fps) + done = False + priority = [('avi', 'png'), ('avi', 'rawvideo'), ('mp4', 'libx264'), ('webm', 'libvpx')] + if colorvids_bitrate: + priority = reversed(priority) + for v_format, codec in priority: + try: + br = f'{colorvids_bitrate}k' if codec not in ['png', 'rawvideo'] else None + clip.write_videofile(os.path.join(path, f"{name}.{v_format}"), codec=codec, bitrate=br) + done = True + break + except: + traceback.print_exc() + if not done: + raise Exception('Saving the video failed!') + + +def process_predicitons(predictions, smoothening='none'): + def global_scaling(objs, a=None, b=None): + """Normalizes objs, but uses (a, b) instead of (minimum, maximum) value of objs, if supplied""" + normalized = [] + min_value = a if a is not None else min([obj.min() for obj in objs]) + max_value = b if b is not None else max([obj.max() for obj in objs]) + for obj in objs: + normalized += [(obj - min_value) / (max_value - min_value)] + return normalized + + print('Processing generated depthmaps') + # TODO: Detect cuts and process segments separately + if smoothening == 'none': + return global_scaling(predictions) + elif smoothening == 'experimental': + processed = [] + clip = lambda val: min(max(0, val), len(predictions) - 1) + for i in range(len(predictions)): + f = np.zeros_like(predictions[i]) + for u, mul in enumerate([0.10, 0.20, 0.40, 0.20, 0.10]): # Eyeballed it, math person please fix this + f += mul * predictions[clip(i + (u - 2))] + processed += [f] + # This could have been deterministic monte carlo... Oh well, this version is faster. + a, b = np.percentile(np.stack(processed), [0.5, 99.5]) + return global_scaling(predictions, a, b) + return predictions + + +def gen_video(video, outpath, inp, custom_depthmap=None, colorvids_bitrate=None, smoothening='none'): + if inp[go.GEN_SIMPLE_MESH.name.lower()] or inp[go.GEN_INPAINTED_MESH.name.lower()]: + return 'Creating mesh-videos is not supported. Please split video into frames and use batch processing.' + + fps, input_images = open_path_as_images(os.path.abspath(video.name)) + os.makedirs(backbone.get_outpath(), exist_ok=True) + + if custom_depthmap is None: + print('Generating depthmaps for the video frames') + needed_keys = [go.COMPUTE_DEVICE, go.MODEL_TYPE, go.BOOST, go.NET_SIZE_MATCH, go.NET_WIDTH, go.NET_HEIGHT] + needed_keys = [x.name.lower() for x in needed_keys] + first_pass_inp = {k: v for (k, v) in inp.items() if k in needed_keys} + # We need predictions where frames are not normalized separately. + first_pass_inp[go.DO_OUTPUT_DEPTH_PREDICTION] = True + # No need in normalized frames. Properly processed depth video will be created in the second pass + first_pass_inp[go.DO_OUTPUT_DEPTH.name] = False + + gen_obj = core.core_generation_funnel(None, input_images, None, None, first_pass_inp) + input_depths = [x[2] for x in list(gen_obj)] + input_depths = process_predicitons(input_depths, smoothening) + else: + print('Using custom depthmap video') + cdm_fps, input_depths = open_path_as_images(os.path.abspath(custom_depthmap.name), maybe_depthvideo=True) + assert len(input_depths) == len(input_images), 'Custom depthmap video length does not match input video length' + if input_depths[0].size != input_images[0].size: + print('Warning! Input video size and depthmap video size are not the same!') + + print('Generating output frames') + img_results = list(core.core_generation_funnel(None, input_images, input_depths, None, inp)) + gens = list(set(map(lambda x: x[1], img_results))) + + print('Saving generated frames as video outputs') + for gen in gens: + if gen == 'depth' and custom_depthmap is not None: + # Well, that would be extra stupid, even if user has picked this option for some reason + # (forgot to change the default?) + continue + + imgs = [x[2] for x in img_results if x[1] == gen] + basename = f'{gen}_video' + frames_to_video(fps, imgs, outpath, f"depthmap-{backbone.get_next_sequence_number(outpath, basename)}-{basename}", + colorvids_bitrate) + print('All done. Video(s) saved!') + return '

Videos generated

' if len(gens) > 1 else '

Video generated

' if len(gens) == 1 \ + else '

Nothing generated - please check the settings and try again

'