avans06 commited on
Commit
20b04f8
·
1 Parent(s): 69d49d2

Add display of execution time for the upscale and face enhancer models.

Browse files
Files changed (2) hide show
  1. app.py +517 -353
  2. utils/dataops.py +38 -55
app.py CHANGED
@@ -7,6 +7,7 @@ import gradio as gr
7
  import torch
8
  import traceback
9
  import math
 
10
  from collections import defaultdict
11
  from facexlib.utils.misc import download_from_url
12
  from basicsr.utils.realesrganer import RealESRGANer
@@ -58,40 +59,49 @@ Simple yet effective, GPEN outperforms state-of-the-art methods, delivering real
58
  "https://github.com/wzhouxiff/RestoreFormerPlusPlus", "The same as RestoreFormer++ but legacy model"],
59
  }
60
  upscale_models = {
61
- # SRVGGNet
62
  "realesr-general-x4v3.pth": ["https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth",
63
  "https://github.com/xinntao/Real-ESRGAN/releases/tag/v0.3.0",
64
- """add realesr-general-x4v3 and realesr-general-wdn-x4v3. They are very tiny models for general scenes, and they may more robust. But as they are tiny models, their performance may be limited."""],
 
65
 
66
  "realesr-animevideov3.pth": ["https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-animevideov3.pth",
67
  "https://github.com/xinntao/Real-ESRGAN/releases/tag/v0.2.5.0",
68
- """update the RealESRGAN AnimeVideo-v3 model, which can achieve better results with a faster inference speed."""],
 
69
 
70
  "4xLSDIRCompact.pth": ["https://github.com/Phhofm/models/releases/download/4xLSDIRCompact/4xLSDIRCompact.pth",
71
  "https://github.com/Phhofm/models/releases/tag/4xLSDIRCompact",
72
- """Phhofm: Upscale small good quality photos to 4x their size. This is my first ever released self-trained sisr upscaling model."""],
 
73
 
74
  "4xLSDIRCompactC.pth": ["https://github.com/Phhofm/models/releases/download/4xLSDIRCompactC/4xLSDIRCompactC.pth",
75
  "https://github.com/Phhofm/models/releases/tag/4xLSDIRCompactC",
76
- """Phhofm: 4x photo upscaler that handler jpg compression. Trying to extend my previous model to be able to handle compression (JPG 100-30) by manually altering the training dataset, since 4xLSDIRCompact cant handle compression. Use this instead of 4xLSDIRCompact if your photo has compression (like an image from the web)."""],
 
77
 
78
  "4xLSDIRCompactR.pth": ["https://github.com/Phhofm/models/releases/download/4xLSDIRCompactC/4xLSDIRCompactR.pth",
79
  "https://github.com/Phhofm/models/releases/tag/4xLSDIRCompactC",
80
- """Phhofm: 4x photo uspcaler that handles jpg compression, noise and slight. Extending my last 4xLSDIRCompact model to Real-ESRGAN, meaning trained on synthetic data instead to handle more kinds of degradations, it should be able to handle compression, noise, and slight blur."""],
 
81
 
82
  "4xLSDIRCompactN.pth": ["https://github.com/Phhofm/models/releases/download/4xLSDIRCompact3/4xLSDIRCompactC3.pth",
83
  "https://github.com/Phhofm/models/releases/tag/4xLSDIRCompact3",
84
- """Phhofm: Upscale good quality input photos to x4 their size. The original 4xLSDIRCompact a bit more trained, cannot handle degradation.
 
85
  I am releasing the Series 3 from my 4xLSDIRCompact models. In general my suggestion is, if you have good quality input images use 4xLSDIRCompactN3, otherwise try 4xLSDIRCompactC3 which will be able to handle jpg compression and a bit of blur, or then 4xLSDIRCompactCR3, which is an interpolation between C3 and R3 to be able to handle a bit of noise additionally."""],
86
 
87
  "4xLSDIRCompactC3.pth": ["https://github.com/Phhofm/models/releases/download/4xLSDIRCompact3/4xLSDIRCompactC3.pth",
88
  "https://github.com/Phhofm/models/releases/tag/4xLSDIRCompact3",
89
- """Phhofm: Upscale compressed photos to x4 their size. Able to handle JPG compression (30-100).
 
 
90
  I am releasing the Series 3 from my 4xLSDIRCompact models. In general my suggestion is, if you have good quality input images use 4xLSDIRCompactN3, otherwise try 4xLSDIRCompactC3 which will be able to handle jpg compression and a bit of blur, or then 4xLSDIRCompactCR3, which is an interpolation between C3 and R3 to be able to handle a bit of noise additionally."""],
91
 
92
  "4xLSDIRCompactR3.pth": ["https://github.com/Phhofm/models/releases/download/4xLSDIRCompact3/4xLSDIRCompactR3.pth",
93
  "https://github.com/Phhofm/models/releases/tag/4xLSDIRCompact3",
94
- """Phhofm: Upscale (degraded) photos to x4 their size. Trained on synthetic data, meant to handle more degradations.
 
95
  I am releasing the Series 3 from my 4xLSDIRCompact models. In general my suggestion is, if you have good quality input images use 4xLSDIRCompactN3, otherwise try 4xLSDIRCompactC3 which will be able to handle jpg compression and a bit of blur, or then 4xLSDIRCompactCR3, which is an interpolation between C3 and R3 to be able to handle a bit of noise additionally."""],
96
 
97
  "4xLSDIRCompactCR3.pth": ["https://github.com/Phhofm/models/releases/download/4xLSDIRCompact3/4xLSDIRCompactCR3.pth",
@@ -100,125 +110,173 @@ I am releasing the Series 3 from my 4xLSDIRCompact models. In general my suggest
100
 
101
  "2xParimgCompact.pth": ["https://github.com/Phhofm/models/releases/download/2xParimgCompact/2xParimgCompact.pth",
102
  "https://github.com/Phhofm/models/releases/tag/2xParimgCompact",
103
- """Phhofm: A 2x photo upscaling compact model based on Microsoft's ImagePairs. This was one of the earliest models I started training and finished it now for release. As can be seen in the examples, this model will affect colors."""],
 
104
 
105
  "1xExposureCorrection_compact.pth": ["https://github.com/Phhofm/models/releases/download/1xExposureCorrection_compact/1xExposureCorrection_compact.pth",
106
- "https://github.com/Phhofm/models/releases/tag/1xExposureCorrection_compact",
107
- """Phhofm: This model is meant as an experiment to see if compact can be used to train on photos to exposure correct those using the pixel, perceptual, color, color and ldl losses. There is no brightness loss. Still it seems to kinda work."""],
 
108
 
109
  "1xUnderExposureCorrection_compact.pth": ["https://github.com/Phhofm/models/releases/download/1xExposureCorrection_compact/1xUnderExposureCorrection_compact.pth",
110
- "https://github.com/Phhofm/models/releases/tag/1xExposureCorrection_compact",
111
- """Phhofm: This model is meant as an experiment to see if compact can be used to train on underexposed images to exposure correct those using the pixel, perceptual, color, color and ldl losses. There is no brightness loss. Still it seems to kinda work."""],
 
112
 
113
  "1xOverExposureCorrection_compact.pth": ["https://github.com/Phhofm/models/releases/download/1xExposureCorrection_compact/1xOverExposureCorrection_compact.pth",
114
- "https://github.com/Phhofm/models/releases/tag/1xExposureCorrection_compact",
115
- """Phhofm: This model is meant as an experiment to see if compact can be used to train on overexposed images to exposure correct those using the pixel, perceptual, color, color and ldl losses. There is no brightness loss. Still it seems to kinda work."""],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
  # RRDBNet
118
  "RealESRGAN_x4plus_anime_6B.pth": ["https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth",
119
  "https://github.com/xinntao/Real-ESRGAN/releases/tag/v0.2.2.4",
120
- """We add RealESRGAN_x4plus_anime_6B.pth, which is optimized for anime images with much smaller model size. More details and comparisons with waifu2x are in anime_model.md"""],
 
121
 
122
  "RealESRGAN_x2plus.pth" : ["https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth",
123
  "https://github.com/xinntao/Real-ESRGAN/releases/tag/v0.2.1",
124
- """Add RealESRGAN_x2plus.pth model"""],
 
125
 
126
  "RealESRNet_x4plus.pth" : ["https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.1/RealESRNet_x4plus.pth",
127
  "https://github.com/xinntao/Real-ESRGAN/releases/tag/v0.1.1",
128
- """This release is mainly for storing pre-trained models and executable files."""],
 
129
 
130
  "RealESRGAN_x4plus.pth" : ["https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth",
131
  "https://github.com/xinntao/Real-ESRGAN/releases/tag/v0.1.0",
132
- """This release is mainly for storing pre-trained models and executable files."""],
 
133
 
134
  # ESRGAN(oldRRDB)
135
  "4x-AnimeSharp.pth": ["https://huggingface.co/utnah/esrgan/resolve/main/4x-AnimeSharp.pth?download=true",
136
  "https://openmodeldb.info/models/4x-AnimeSharp",
137
- """Interpolation between 4x-UltraSharp and 4x-TextSharp-v0.5. Works amazingly on anime. It also upscales text, but it's far better with anime content."""],
 
138
 
139
  "4x_IllustrationJaNai_V1_ESRGAN_135k.pth": ["https://drive.google.com/uc?export=download&confirm=1&id=1qpioSqBkB_IkSBhEAewSSNFt6qgkBimP",
140
  "https://openmodeldb.info/models/4x-IllustrationJaNai-V1-DAT2",
141
- """Purpose: Illustrations, digital art, manga covers
142
- Model for color images including manga covers and color illustrations, digital art, visual novel art, artbooks, and more.
143
  DAT2 version is the highest quality version but also the slowest. See the ESRGAN version for faster performance."""],
144
 
145
  "2x-sudo-RealESRGAN.pth": ["https://objectstorage.us-phoenix-1.oraclecloud.com/n/ax6ygfvpvzka/b/open-modeldb-files/o/2x-sudo-RealESRGAN.pth",
146
  "https://openmodeldb.info/models/2x-sudo-RealESRGAN",
147
- """Pretrained: Pretrained_Model_G: RealESRGAN_x4plus_anime_6B.pth / RealESRGAN_x4plus_anime_6B.pth (sudo_RealESRGAN2x_3.332.758_G.pth)
148
- Tried to make the best 2x model there is for drawings. I think i archived that.
149
  And yes, it is nearly 3.8 million iterations (probably a record nobody will beat here), took me nearly half a year to train.
150
  It can happen that in one edge is a noisy pattern in edges. You can use padding/crop for that.
151
- I aimed for perceptual quality without zooming in like 400%. Since RealESRGAN is 4x, I downscaled these images with bicubic."""],
 
152
 
153
  "2x-sudo-RealESRGAN-Dropout.pth": ["https://objectstorage.us-phoenix-1.oraclecloud.com/n/ax6ygfvpvzka/b/open-modeldb-files/o/2x-sudo-RealESRGAN-Dropout.pth",
154
  "https://openmodeldb.info/models/2x-sudo-RealESRGAN-Dropout",
155
- """Pretrained: Pretrained_Model_G: RealESRGAN_x4plus_anime_6B.pth / RealESRGAN_x4plus_anime_6B.pth (sudo_RealESRGAN2x_3.332.758_G.pth)
156
- Tried to make the best 2x model there is for drawings. I think i archived that.
157
  And yes, it is nearly 3.8 million iterations (probably a record nobody will beat here), took me nearly half a year to train.
158
  It can happen that in one edge is a noisy pattern in edges. You can use padding/crop for that.
159
- I aimed for perceptual quality without zooming in like 400%. Since RealESRGAN is 4x, I downscaled these images with bicubic."""],
 
160
 
161
  "4xNomos2_otf_esrgan.pth": ["https://github.com/Phhofm/models/releases/download/4xNomos2_otf_esrgan/4xNomos2_otf_esrgan.pth",
162
  "https://github.com/Phhofm/models/releases/tag/4xNomos2_otf_esrgan",
163
- """Phhofm: Restoration, 4x ESRGAN model for photography, trained using the Real-ESRGAN otf degradation pipeline."""],
 
164
 
165
  "4xNomosWebPhoto_esrgan.pth": ["https://github.com/Phhofm/models/releases/download/4xNomosWebPhoto_esrgan/4xNomosWebPhoto_esrgan.pth",
166
  "https://github.com/Phhofm/models/releases/tag/4xNomosWebPhoto_esrgan",
167
- """Phhofm: Restoration, 4x ESRGAN model for photography, trained with realistic noise, lens blur, jpg and webp re-compression.
 
168
  ESRGAN version of 4xNomosWebPhoto_RealPLKSR, trained on the same dataset and in the same way."""],
169
 
170
  # DATNet
171
  "4xNomos8kDAT.pth" : ["https://github.com/Phhofm/models/releases/download/4xNomos8kDAT/4xNomos8kDAT.pth",
172
  "https://openmodeldb.info/models/4x-Nomos8kDAT",
173
- """Phhofm: A 4x photo upscaler with otf jpg compression, blur and resize, trained on musl's Nomos8k_sfw dataset for realisic sr, this time based on the DAT arch, as a finetune on the official 4x DAT model."""],
 
174
 
175
  "4x-DWTP-DS-dat2-v3.pth" : ["https://objectstorage.us-phoenix-1.oraclecloud.com/n/ax6ygfvpvzka/b/open-modeldb-files/o/4x-DWTP-DS-dat2-v3.pth",
176
  "https://openmodeldb.info/models/4x-DWTP-DS-dat2-v3",
177
- """DAT descreenton model, designed to reduce discrepancies on tiles due to too much loss of the first version, while getting rid of the removal of paper texture"""],
 
178
 
179
  "4xBHI_dat2_real.pth" : ["https://github.com/Phhofm/models/releases/download/4xBHI_dat2_real/4xBHI_dat2_real.pth",
180
  "https://github.com/Phhofm/models/releases/tag/4xBHI_dat2_real",
181
- """Phhofm: 4x dat2 upscaling model for web and realistic images. It handles realistic noise, some realistic blur, and webp and jpg (re)compression. Trained on my BHI dataset (390'035 training tiles) with degraded LR subset."""],
 
182
 
183
  "4xBHI_dat2_otf.pth" : ["https://github.com/Phhofm/models/releases/download/4xBHI_dat2_otf/4xBHI_dat2_otf.pth",
184
  "https://github.com/Phhofm/models/releases/tag/4xBHI_dat2_otf",
185
- """Phhofm: 4x dat2 upscaling model, trained with the real-esrgan otf pipeline on my bhi dataset. Handles noise and compression."""],
 
186
 
187
  "4xBHI_dat2_multiblur.pth" : ["https://github.com/Phhofm/models/releases/download/4xBHI_dat2_multiblurjpg/4xBHI_dat2_multiblur.pth",
188
  "https://github.com/Phhofm/models/releases/tag/4xBHI_dat2_multiblurjpg",
189
- """Phhofm: 4x dat2 upscaling model, trained with down_up,linear, cubic_mitchell, lanczos, gauss and box scaling algos, some average, gaussian and anisotropic blurs and jpg compression. Trained on my BHI sisr dataset."""],
190
 
191
  "4xBHI_dat2_multiblurjpg.pth" : ["https://github.com/Phhofm/models/releases/download/4xBHI_dat2_multiblurjpg/4xBHI_dat2_multiblurjpg.pth",
192
  "https://github.com/Phhofm/models/releases/tag/4xBHI_dat2_multiblurjpg",
193
- """Phhofm: 4x dat2 upscaling model, trained with down_up,linear, cubic_mitchell, lanczos, gauss and box scaling algos, some average, gaussian and anisotropic blurs and jpg compression. Trained on my BHI sisr dataset."""],
 
194
 
195
  "4x_IllustrationJaNai_V1_DAT2_190k.pth": ["https://drive.google.com/uc?export=download&confirm=1&id=1qpioSqBkB_IkSBhEAewSSNFt6qgkBimP",
196
  "https://openmodeldb.info/models/4x-IllustrationJaNai-V1-DAT2",
197
- """Purpose: Illustrations, digital art, manga covers
198
- Model for color images including manga covers and color illustrations, digital art, visual novel art, artbooks, and more.
199
  DAT2 version is the highest quality version but also the slowest. See the ESRGAN version for faster performance."""],
200
 
201
  "4x-PBRify_UpscalerDAT2_V1.pth": ["https://github.com/Kim2091/Kim2091-Models/releases/download/4x-PBRify_UpscalerDAT2_V1/4x-PBRify_UpscalerDAT2_V1.pth",
202
  "https://github.com/Kim2091/Kim2091-Models/releases/tag/4x-PBRify_UpscalerDAT2_V1",
203
- """Kim2091: Yet another model in the PBRify_Remix series. This is a new upscaler to replace the previous 4x-PBRify_UpscalerSIR-M_V2 model.
 
204
  This model far exceeds the quality of the previous, with far more natural detail generation and better reconstruction of lines and edges."""],
205
 
206
  "4xBHI_dat2_otf_nn.pth": ["https://github.com/Phhofm/models/releases/download/4xBHI_dat2_otf_nn/4xBHI_dat2_otf_nn.pth",
207
  "https://github.com/Phhofm/models/releases/tag/4xBHI_dat2_otf_nn",
208
- """Phhofm: 4x dat2 upscaling model, trained with the real-esrgan otf pipeline but without noise, on my bhi dataset. Handles resizes, and jpg compression."""],
 
209
 
210
  # HAT
211
  "4xNomos8kSCHAT-L.pth" : ["https://github.com/Phhofm/models/releases/download/4xNomos8kSCHAT/4xNomos8kSCHAT-L.pth",
212
  "https://openmodeldb.info/models/4x-Nomos8kSCHAT-L",
213
- """Phhofm: 4x photo upscaler with otf jpg compression and blur, trained on musl's Nomos8k_sfw dataset for realisic sr. Since this is a big model, upscaling might take a while."""],
 
214
 
215
  "4xNomos8kSCHAT-S.pth" : ["https://github.com/Phhofm/models/releases/download/4xNomos8kSCHAT/4xNomos8kSCHAT-S.pth",
216
  "https://openmodeldb.info/models/4x-Nomos8kSCHAT-S",
217
- """Phhofm: 4x photo upscaler with otf jpg compression and blur, trained on musl's Nomos8k_sfw dataset for realisic sr. HAT-S version/model."""],
 
218
 
219
  "4xNomos8kHAT-L_otf.pth": ["https://github.com/Phhofm/models/releases/download/4xNomos8kHAT-L_otf/4xNomos8kHAT-L_otf.pth",
220
  "https://openmodeldb.info/models/4x-Nomos8kHAT-L-otf",
221
- """Phhofm: 4x photo upscaler trained with otf"""],
 
222
 
223
  "4xBHI_small_hat-l.pth": ["https://github.com/Phhofm/models/releases/download/4xBHI_small_hat-l/4xBHI_small_hat-l.pth",
224
  "https://github.com/Phhofm/models/releases/tag/4xBHI_small_hat-l",
@@ -230,116 +288,134 @@ also included in this release, which might still feel soft if you are used to sh
230
  # RealPLKSR_dysample
231
  "4xHFA2k_ludvae_realplksr_dysample.pth": ["https://github.com/Phhofm/models/releases/download/4xHFA2k_ludvae_realplksr_dysample/4xHFA2k_ludvae_realplksr_dysample.pth",
232
  "https://openmodeldb.info/models/4x-HFA2k-ludvae-realplksr-dysample",
233
- """Phhofm: A Dysample RealPLKSR 4x upscaling model for anime single-image resolution."""],
 
234
 
235
  "4xArtFaces_realplksr_dysample.pth" : ["https://github.com/Phhofm/models/releases/download/4xArtFaces_realplksr_dysample/4xArtFaces_realplksr_dysample.pth",
236
  "https://openmodeldb.info/models/4x-ArtFaces-realplksr-dysample",
237
- """Phhofm: A Dysample RealPLKSR 4x upscaling model for art / painted faces."""],
 
238
 
239
  "4x-PBRify_RPLKSRd_V3.pth" : ["https://github.com/Kim2091/Kim2091-Models/releases/download/4x-PBRify_RPLKSRd_V3/4x-PBRify_RPLKSRd_V3.pth",
240
  "https://github.com/Kim2091/Kim2091-Models/releases/tag/4x-PBRify_RPLKSRd_V3",
241
- """Kim2091: This update brings a new upscaling model, 4x-PBRify_RPLKSRd_V3. This model is roughly 8x faster than the current DAT2 model, while being higher quality.
 
242
  It produces far more natural detail, resolves lines and edges more smoothly, and cleans up compression artifacts better.
243
  As a result of those improvements, PBR is also much improved. It tends to be clearer with less defined artifacts."""],
244
 
245
  "4xNomos2_realplksr_dysample.pth" : ["https://github.com/Phhofm/models/releases/download/4xNomos2_realplksr_dysample/4xNomos2_realplksr_dysample.pth",
246
  "https://openmodeldb.info/models/4x-Nomos2-realplksr-dysample",
247
- """Phhofm: A Dysample RealPLKSR 4x upscaling model that was trained with / handles jpg compression down to 70 on the Nomosv2 dataset, preserves DoF.
 
248
  This model affects / saturate colors, which can be counteracted a bit by using wavelet color fix, as used in these examples."""],
249
 
250
  # RealPLKSR
251
  "2x-AnimeSharpV2_RPLKSR_Sharp.pth": ["https://github.com/Kim2091/Kim2091-Models/releases/download/2x-AnimeSharpV2_Set/2x-AnimeSharpV2_RPLKSR_Sharp.pth",
252
  "https://github.com/Kim2091/Kim2091-Models/releases/tag/2x-AnimeSharpV2_Set",
253
- """Kim2091: This is my first anime model in years. Hopefully you guys can find a good use-case for it.
 
254
  RealPLKSR (Higher quality, slower) Sharp: For heavily degraded sources. Sharp models have issues depth of field but are best at removing artifacts
255
  """],
256
 
257
  "2x-AnimeSharpV2_RPLKSR_Soft.pth" : ["https://github.com/Kim2091/Kim2091-Models/releases/download/2x-AnimeSharpV2_Set/2x-AnimeSharpV2_RPLKSR_Soft.pth",
258
  "https://github.com/Kim2091/Kim2091-Models/releases/tag/2x-AnimeSharpV2_Set",
259
- """Kim2091: This is my first anime model in years. Hopefully you guys can find a good use-case for it.
 
260
  RealPLKSR (Higher quality, slower) Soft: For cleaner sources. Soft models preserve depth of field but may not remove other artifacts as well"""],
261
 
262
  "4xPurePhoto-RealPLSKR.pth" : ["https://github.com/starinspace/StarinspaceUpscale/releases/download/Models/4xPurePhoto-RealPLSKR.pth",
263
  "https://openmodeldb.info/models/4x-PurePhoto-RealPLSKR",
264
- """Skilled in working with cats, hair, parties, and creating clear images.
 
265
  Also proficient in resizing photos and enlarging large, sharp images.
266
  Can effectively improve images from small sizes as well (300px at smallest on one side, depending on the subject).
267
- Experienced in experimenting with techniques like upscaling with this model twice and \
268
  then reducing it by 50% to enhance details, especially in features like hair or animals."""],
269
 
270
  "2x_Text2HD_v.1-RealPLKSR.pth" : ["https://github.com/starinspace/StarinspaceUpscale/releases/download/Models/2x_Text2HD_v.1-RealPLKSR.pth",
271
  "https://openmodeldb.info/models/2x-Text2HD-v-1",
272
- """Purpose: Upscale text in very low quality to normal quality.
273
- The upscale model is specifically designed to enhance lower-quality text images, \
274
  improving their clarity and readability by upscaling them by 2x.
275
  It excels at processing moderately sized text, effectively transforming it into high-quality, legible scans.
276
- However, the model may encounter challenges when dealing with very small text, \
277
- as its performance is optimized for text of a certain minimum size. For best results, \
278
  input images should contain text that is not excessively small."""],
279
 
280
  "2xVHS2HD-RealPLKSR.pth" : ["https://github.com/starinspace/StarinspaceUpscale/releases/download/Models/2xVHS2HD-RealPLKSR.pth",
281
  "https://openmodeldb.info/models/2x-VHS2HD",
282
- """An advanced VHS recording model designed to enhance video quality by reducing artifacts such as haloing, ghosting, and noise patterns.
 
283
  Optimized primarily for PAL resolution (NTSC might work good as well)."""],
284
 
285
  "4xNomosWebPhoto_RealPLKSR.pth" : ["https://github.com/Phhofm/models/releases/download/4xNomosWebPhoto_RealPLKSR/4xNomosWebPhoto_RealPLKSR.pth",
286
  "https://openmodeldb.info/models/4x-NomosWebPhoto-RealPLKSR",
287
- """Phhofm: 4x RealPLKSR model for photography, trained with realistic noise, lens blur, jpg and webp re-compression."""],
 
288
 
289
  # DRCT
290
  "4xNomos2_hq_drct-l.pth" : ["https://github.com/Phhofm/models/releases/download/4xNomos2_hq_drct-l/4xNomos2_hq_drct-l.pth",
291
  "https://github.com/Phhofm/models/releases/tag/4xNomos2_hq_drct-l",
292
- """Phhofm: An drct-l 4x upscaling model, similiar to the 4xNomos2_hq_atd, 4xNomos2_hq_dat2 and 4xNomos2_hq_mosr models, trained and for usage on non-degraded input to give good quality output.
 
293
  """],
294
 
295
  # ATD
296
  "4xNomos2_hq_atd.pth" : ["https://github.com/Phhofm/models/releases/download/4xNomos2_hq_atd/4xNomos2_hq_atd.pth",
297
  "https://github.com/Phhofm/models/releases/tag/4xNomos2_hq_atd",
298
- """Phhofm: An atd 4x upscaling model, similiar to the 4xNomos2_hq_dat2 or 4xNomos2_hq_mosr models, trained and for usage on non-degraded input to give good quality output.
 
299
  """],
300
 
301
  # MoSR
302
  "4xNomos2_hq_mosr.pth" : ["https://github.com/Phhofm/models/releases/download/4xNomos2_hq_mosr/4xNomos2_hq_mosr.pth",
303
  "https://github.com/Phhofm/models/releases/tag/4xNomos2_hq_mosr",
304
- """Phhofm: A 4x MoSR upscaling model, meant for non-degraded input, since this model was trained on non-degraded input to give good quality output.
 
305
  """],
306
 
307
  "2x-AnimeSharpV2_MoSR_Sharp.pth" : ["https://github.com/Kim2091/Kim2091-Models/releases/download/2x-AnimeSharpV2_Set/2x-AnimeSharpV2_MoSR_Sharp.pth",
308
  "https://github.com/Kim2091/Kim2091-Models/releases/tag/2x-AnimeSharpV2_Set",
309
- """Kim2091: This is my first anime model in years. Hopefully you guys can find a good use-case for it.
 
310
  MoSR (Lower quality, faster), Sharp: For heavily degraded sources. Sharp models have issues depth of field but are best at removing artifacts
311
  """],
312
 
313
  "2x-AnimeSharpV2_MoSR_Soft.pth" : ["https://github.com/Kim2091/Kim2091-Models/releases/download/2x-AnimeSharpV2_Set/2x-AnimeSharpV2_MoSR_Soft.pth",
314
  "https://github.com/Kim2091/Kim2091-Models/releases/tag/2x-AnimeSharpV2_Set",
315
- """Kim2091: This is my first anime model in years. Hopefully you guys can find a good use-case for it.
 
316
  MoSR (Lower quality, faster), Soft: For cleaner sources. Soft models preserve depth of field but may not remove other artifacts as well
317
  """],
318
 
319
  # SRFormer
320
  "4xNomos8kSCSRFormer.pth" : ["https://github.com/Phhofm/models/releases/download/4xNomos8kSCSRFormer/4xNomos8kSCSRFormer.pth",
321
  "https://github.com/Phhofm/models/releases/tag/4xNomos8kSCSRFormer",
322
- """Phhofm: 4x photo upscaler with otf jpg compression and blur, trained on musl's Nomos8k_sfw dataset for realisic sr.
 
323
  """],
324
 
325
- # "4xFrankendata_FullDegradation_g_460000.pth" : ["https://drive.google.com/uc?export=download&confirm=1&id=1PZrj-8ofxhORv_OgTVSoRt3dYi-BtiDj",
326
- # "https://openmodeldb.info/models/4x-Frankendata-FullDegradation-SRFormer",
327
- # """Description: 4x realistic upscaler that may also work for general purpose usage.
328
- # It was trained with OTF random degradation with a very low to very high range of degradations, including blur, noise, and compression.
329
- # Trained with the same Frankendata dataset that I used for the pretrain model.
330
- # """],
331
-
332
- # "FrankendataPretrainer_SRFormer400K_g.pth" : ["https://drive.google.com/uc?export=download&confirm=1&id=1SaKvpYYIm2Vj2m9GifUMlNCbmkE6JZmr",
333
- # "https://openmodeldb.info/models/4x-FrankendataPretainer-SRFormer",
334
- # """Description: 4x realistic upscaler that may also work for general purpose usage.
335
- # It was trained with OTF random degradation with a very low to very high range of degradations, including blur, noise, and compression.
336
- # Trained with the same Frankendata dataset that I used for the pretrain model.
337
- # """],
338
-
339
- # "1xFrankenfixer_SRFormerLight_g.pth" : ["https://drive.google.com/uc?export=download&confirm=1&id=1UJ0iyFn4IGNhPIgNgrQrBxYsdDloFc9I",
340
- # "https://openmodeldb.info/models/1x-Frankenfixer-SRFormerLight",
341
- # """A 1x model designed to reduce artifacts and restore detail to images upscaled by 4xFrankendata_FullDegradation_SRFormer. It could possibly work with other upscaling models too.
342
- # """],
 
 
 
343
  }
344
 
345
  example_list = ["images/a01.jpg", "images/a02.jpg", "images/a03.jpg", "images/a04.jpg", "images/bus.jpg", "images/zidane.jpg",
@@ -361,7 +437,7 @@ def get_model_type(model_name):
361
  model_type = "RealPLKSR"
362
  elif any(value in model_name.lower() for value in ("realesrgan", "realesrnet")):
363
  model_type = "RRDB"
364
- elif any(value in model_name.lower() for value in ("realesr", "exposurecorrection", "parimgcompact", "lsdircompact")):
365
  model_type = "SRVGG"
366
  elif "esrgan" in model_name.lower():
367
  model_type = "ESRGAN"
@@ -385,6 +461,10 @@ class Upscale:
385
  print(img)
386
  print(face_restoration, upscale_model, scale)
387
  try:
 
 
 
 
388
  self.scale = scale
389
  self.img_name = os.path.basename(str(img))
390
  self.basename, self.extension = os.path.splitext(self.img_name)
@@ -396,282 +476,297 @@ class Upscale:
396
  img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
397
 
398
  self.h_input, self.w_input = img.shape[0:2]
 
399
 
400
- if face_restoration:
401
- download_from_url(face_models[face_restoration][0], face_restoration, os.path.join("weights", "face"))
402
-
403
  modelInUse = ""
404
  upscale_type = None
 
405
  if upscale_model:
406
  upscale_type, upscale_model = upscale_model.split(", ", 1)
407
  download_from_url(upscale_models[upscale_model][0], upscale_model, os.path.join("weights", "upscale"))
408
  modelInUse = f"_{os.path.splitext(upscale_model)[0]}"
409
 
410
- self.netscale = 1 if any(sub in upscale_model.lower() for sub in ("x1", "1x")) else (2 if any(sub in upscale_model.lower() for sub in ("x2", "2x")) else 4)
411
- model = None
412
- is_auto_split_upscale = True
413
- half = True if torch.cuda.is_available() else False
414
- if upscale_type:
415
- # The values of the following hyperparameters are based on the research findings of the Spandrel project.
416
- # https://github.com/chaiNNer-org/spandrel/tree/main/libs/spandrel/spandrel/architectures
417
- from basicsr.archs.rrdbnet_arch import RRDBNet
418
- loadnet = torch.load(os.path.join("weights", "upscale", upscale_model), map_location=torch.device('cpu'), weights_only=True)
419
- if 'params_ema' in loadnet or 'params' in loadnet:
420
- loadnet = loadnet['params_ema'] if 'params_ema' in loadnet else loadnet['params']
421
- # for key in loadnet_origin.keys():
422
- # print(f"{key}, {loadnet_origin[key].shape}")
423
- if upscale_type == "SRVGG":
424
- from basicsr.archs.srvgg_arch import SRVGGNetCompact
425
- body_max_num = self.find_max_numbers(loadnet, "body")
426
- num_feat = loadnet["body.0.weight"].shape[0]
427
- num_in_ch = loadnet["body.0.weight"].shape[1]
428
- num_conv = body_max_num // 2 - 1
429
- model = SRVGGNetCompact(num_in_ch=num_in_ch, num_out_ch=3, num_feat=num_feat, num_conv=num_conv, upscale=self.netscale, act_type='prelu')
430
- elif upscale_type == "RRDB" or upscale_type == "ESRGAN":
431
- if upscale_type == "RRDB":
432
- num_block = self.find_max_numbers(loadnet, "body") + 1
433
- num_feat = loadnet["conv_first.weight"].shape[0]
434
- else:
435
- num_block = self.find_max_numbers(loadnet, "model.1.sub")
436
- num_feat = loadnet["model.0.weight"].shape[0]
437
- model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=num_feat, num_block=num_block, num_grow_ch=32, scale=self.netscale, is_real_esrgan=upscale_type == "RRDB")
438
- elif upscale_type == "DAT":
439
- from basicsr.archs.dat_arch import DAT
440
- half = False
441
-
442
- in_chans = loadnet["conv_first.weight"].shape[1]
443
- embed_dim = loadnet["conv_first.weight"].shape[0]
444
- num_layers = self.find_max_numbers(loadnet, "layers") + 1
445
- depth = [6] * num_layers
446
- num_heads = [6] * num_layers
447
- for i in range(num_layers):
448
- depth[i] = self.find_max_numbers(loadnet, f"layers.{i}.blocks") + 1
449
- num_heads[i] = loadnet[f"layers.{i}.blocks.1.attn.temperature"].shape[0] if depth[i] >= 2 else \
450
- loadnet[f"layers.{i}.blocks.0.attn.attns.0.pos.pos3.2.weight"].shape[0] * 2
451
-
452
- upsampler = "pixelshuffle" if "conv_last.weight" in loadnet else "pixelshuffledirect"
453
- resi_connection = "1conv" if "conv_after_body.weight" in loadnet else "3conv"
454
- qkv_bias = "layers.0.blocks.0.attn.qkv.bias" in loadnet
455
- expansion_factor = float(loadnet["layers.0.blocks.0.ffn.fc1.weight"].shape[0] / embed_dim)
456
-
457
- img_size = 64
458
- if "layers.0.blocks.2.attn.attn_mask_0" in loadnet:
459
- attn_mask_0_x, attn_mask_0_y, _attn_mask_0_z = loadnet["layers.0.blocks.2.attn.attn_mask_0"].shape
460
- img_size = int(math.sqrt(attn_mask_0_x * attn_mask_0_y))
461
-
462
- split_size = [2, 4]
463
- if "layers.0.blocks.0.attn.attns.0.rpe_biases" in loadnet:
464
- split_sizes = loadnet["layers.0.blocks.0.attn.attns.0.rpe_biases"][-1] + 1
465
- split_size = [int(x) for x in split_sizes]
466
-
467
- model = DAT(img_size=img_size, in_chans=in_chans, embed_dim=embed_dim, split_size=split_size, depth=depth, num_heads=num_heads, expansion_factor=expansion_factor,
468
- qkv_bias=qkv_bias, resi_connection=resi_connection, upsampler=upsampler, upscale=self.netscale)
469
- elif upscale_type == "HAT":
470
- half = False
471
- from basicsr.archs.hat_arch import HAT
472
- in_chans = loadnet["conv_first.weight"].shape[1]
473
- embed_dim = loadnet["conv_first.weight"].shape[0]
474
- window_size = int(math.sqrt(loadnet["relative_position_index_SA"].shape[0]))
475
- num_layers = self.find_max_numbers(loadnet, "layers") + 1
476
- depths = [6] * num_layers
477
- num_heads = [6] * num_layers
478
- for i in range(num_layers):
479
- depths[i] = self.find_max_numbers(loadnet, f"layers.{i}.residual_group.blocks") + 1
480
- num_heads[i] = loadnet[f"layers.{i}.residual_group.overlap_attn.relative_position_bias_table"].shape[1]
481
- resi_connection = "1conv" if "conv_after_body.weight" in loadnet else "identity"
482
-
483
- qkv_bias = "layers.0.residual_group.blocks.0.attn.qkv.bias" in loadnet
484
- patch_norm = "patch_embed.norm.weight" in loadnet
485
- ape = "absolute_pos_embed" in loadnet
486
-
487
- mlp_hidden_dim = int(loadnet["layers.0.residual_group.blocks.0.mlp.fc1.weight"].shape[0])
488
- mlp_ratio = mlp_hidden_dim / embed_dim
489
- upsampler = "pixelshuffle"
490
-
491
- if "hat-l" in upscale_model.lower():
492
- compress_ratio = 3
493
- squeeze_factor = 30
494
- elif "hat-s" in upscale_model.lower():
495
- compress_ratio = 24
496
- squeeze_factor = 24
497
- model = HAT(img_size=64, patch_size=1, in_chans=in_chans, embed_dim=embed_dim, depths=depths, num_heads=num_heads, window_size=window_size, compress_ratio=compress_ratio,
498
- squeeze_factor=squeeze_factor, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, ape=ape, patch_norm=patch_norm,
499
- upsampler=upsampler, resi_connection=resi_connection, upscale=self.netscale,)
500
- elif "RealPLKSR" in upscale_type:
501
- from basicsr.archs.realplksr_arch import realplksr
502
- half = False if "RealPLSKR" in upscale_model else half
503
- use_ea = "feats.1.attn.f.0.weight" in loadnet
504
- dim = loadnet["feats.0.weight"].shape[0]
505
- num_feats = self.find_max_numbers(loadnet, "feats") + 1
506
- n_blocks = num_feats - 3
507
- kernel_size = loadnet["feats.1.lk.conv.weight"].shape[2]
508
- split_ratio = loadnet["feats.1.lk.conv.weight"].shape[0] / dim
509
- use_dysample = "to_img.init_pos" in loadnet
510
-
511
- model = realplksr(upscaling_factor=self.netscale, dim=dim, n_blocks=n_blocks, kernel_size=kernel_size, split_ratio=split_ratio, use_ea=use_ea, dysample=use_dysample)
512
- elif upscale_type == "DRCT":
513
- half = False
514
- from basicsr.archs.DRCT_arch import DRCT
515
-
516
- in_chans = loadnet["conv_first.weight"].shape[1]
517
- embed_dim = loadnet["conv_first.weight"].shape[0]
518
- num_layers = self.find_max_numbers(loadnet, "layers") + 1
519
- depths = (6,) * num_layers
520
- num_heads = []
521
- for i in range(num_layers):
522
- num_heads.append(loadnet[f"layers.{i}.swin1.attn.relative_position_bias_table"].shape[1])
523
-
524
- mlp_ratio = loadnet["layers.0.swin1.mlp.fc1.weight"].shape[0] / embed_dim
525
- window_square = loadnet["layers.0.swin1.attn.relative_position_bias_table"].shape[0]
526
- window_size = (math.isqrt(window_square) + 1) // 2
527
- upsampler = "pixelshuffle" if "conv_last.weight" in loadnet else ""
528
- resi_connection = "1conv" if "conv_after_body.weight" in loadnet else ""
529
- qkv_bias = "layers.0.swin1.attn.qkv.bias" in loadnet
530
- gc_adjust1 = loadnet["layers.0.adjust1.weight"].shape[0]
531
- patch_norm = "patch_embed.norm.weight" in loadnet
532
- ape = "absolute_pos_embed" in loadnet
533
-
534
- model = DRCT(in_chans=in_chans, img_size= 64, window_size=window_size, compress_ratio=3,squeeze_factor=30,
535
- conv_scale= 0.01, overlap_ratio= 0.5, img_range= 1., depths=depths, embed_dim=embed_dim, num_heads=num_heads,
536
- mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, ape=ape, patch_norm=patch_norm, use_checkpoint=False,
537
- upscale=self.netscale, upsampler=upsampler, resi_connection=resi_connection, gc =gc_adjust1,)
538
- elif upscale_type == "ATD":
539
- half = False
540
- from basicsr.archs.atd_arch import ATD
541
- in_chans = loadnet["conv_first.weight"].shape[1]
542
- embed_dim = loadnet["conv_first.weight"].shape[0]
543
- window_size = math.isqrt(loadnet["relative_position_index_SA"].shape[0])
544
- num_layers = self.find_max_numbers(loadnet, "layers") + 1
545
- depths = [6] * num_layers
546
- num_heads = [6] * num_layers
547
- for i in range(num_layers):
548
- depths[i] = self.find_max_numbers(loadnet, f"layers.{i}.residual_group.layers") + 1
549
- num_heads[i] = loadnet[f"layers.{i}.residual_group.layers.0.attn_win.relative_position_bias_table"].shape[1]
550
- num_tokens = loadnet["layers.0.residual_group.layers.0.attn_atd.scale"].shape[0]
551
- reducted_dim = loadnet["layers.0.residual_group.layers.0.attn_atd.wq.weight"].shape[0]
552
- convffn_kernel_size = loadnet["layers.0.residual_group.layers.0.convffn.dwconv.depthwise_conv.0.weight"].shape[2]
553
- mlp_ratio = (loadnet["layers.0.residual_group.layers.0.convffn.fc1.weight"].shape[0] / embed_dim)
554
- qkv_bias = "layers.0.residual_group.layers.0.wqkv.bias" in loadnet
555
- ape = "absolute_pos_embed" in loadnet
556
- patch_norm = "patch_embed.norm.weight" in loadnet
557
- resi_connection = "1conv" if "layers.0.conv.weight" in loadnet else "3conv"
558
-
559
- if "conv_up1.weight" in loadnet:
560
- upsampler = "nearest+conv"
561
- elif "conv_before_upsample.0.weight" in loadnet:
562
- upsampler = "pixelshuffle"
563
- elif "conv_last.weight" in loadnet:
564
- upsampler = ""
565
- else:
566
- upsampler = "pixelshuffledirect"
567
-
568
- is_light = upsampler == "pixelshuffledirect" and embed_dim == 48
569
- category_size = 128 if is_light else 256
570
-
571
- model = ATD(in_chans=in_chans, embed_dim=embed_dim, depths=depths, num_heads=num_heads, window_size=window_size, category_size=category_size,
572
- num_tokens=num_tokens, reducted_dim=reducted_dim, convffn_kernel_size=convffn_kernel_size, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, ape=ape,
573
- patch_norm=patch_norm, use_checkpoint=False, upscale=self.netscale, upsampler=upsampler, resi_connection='1conv',)
574
- elif upscale_type == "MoSR":
575
- from basicsr.archs.mosr_arch import mosr
576
- n_block = self.find_max_numbers(loadnet, "gblocks") - 5
577
- in_ch = loadnet["gblocks.0.weight"].shape[1]
578
- out_ch = loadnet["upsampler.end_conv.weight"].shape[0] if "upsampler.init_pos" in loadnet else in_ch
579
- dim = loadnet["gblocks.0.weight"].shape[0]
580
- expansion_ratio = (loadnet["gblocks.1.fc1.weight"].shape[0] / loadnet["gblocks.1.fc1.weight"].shape[1]) / 2
581
- conv_ratio = loadnet["gblocks.1.conv.weight"].shape[0] / dim
582
- kernel_size = loadnet["gblocks.1.conv.weight"].shape[2]
583
- upsampler = "dys" if "upsampler.init_pos" in loadnet else ("gps" if "upsampler.in_to_k.weight" in loadnet else "ps")
584
-
585
- model = mosr(in_ch = in_ch, out_ch = out_ch, upscale = self.netscale, n_block = n_block, dim = dim,
586
- upsampler = upsampler, kernel_size = kernel_size, expansion_ratio = expansion_ratio, conv_ratio = conv_ratio,)
587
- elif upscale_type == "SRFormer":
588
- half = False
589
- from basicsr.archs.srformer_arch import SRFormer
590
- in_chans = loadnet["conv_first.weight"].shape[1]
591
- embed_dim = loadnet["conv_first.weight"].shape[0]
592
- ape = "absolute_pos_embed" in loadnet
593
- patch_norm = "patch_embed.norm.weight" in loadnet
594
- qkv_bias = "layers.0.residual_group.blocks.0.attn.q.bias" in loadnet
595
- mlp_ratio = float(loadnet["layers.0.residual_group.blocks.0.mlp.fc1.weight"].shape[0] / embed_dim)
596
-
597
- num_layers = self.find_max_numbers(loadnet, "layers") + 1
598
- depths = [6] * num_layers
599
- num_heads = [6] * num_layers
600
- for i in range(num_layers):
601
- depths[i] = self.find_max_numbers(loadnet, f"layers.{i}.residual_group.blocks") + 1
602
- num_heads[i] = loadnet[f"layers.{i}.residual_group.blocks.0.attn.relative_position_bias_table"].shape[1]
603
-
604
- if "conv_hr.weight" in loadnet:
605
- upsampler = "nearest+conv"
606
- elif "conv_before_upsample.0.weight" in loadnet:
607
  upsampler = "pixelshuffle"
608
- elif "upsample.0.weight" in loadnet:
609
- upsampler = "pixelshuffledirect"
610
- resi_connection = "1conv" if "conv_after_body.weight" in loadnet else "3conv"
611
-
612
- window_size = int(math.sqrt(loadnet["layers.0.residual_group.blocks.0.attn.relative_position_bias_table"].shape[0])) + 1
613
-
614
- model = SRFormer(img_size=64, in_chans=in_chans, embed_dim=embed_dim, depths=depths, num_heads=num_heads, window_size=window_size, mlp_ratio=mlp_ratio,
615
- qkv_bias=qkv_bias, qk_scale=None, ape=ape, patch_norm=patch_norm, upscale=self.netscale, upsampler=upsampler, resi_connection=resi_connection,)
616
-
617
- self.upsampler = None
618
- if model:
619
- self.upsampler = RealESRGANer(scale=self.netscale, model_path=os.path.join("weights", "upscale", upscale_model), model=model, tile=0, tile_pad=10, pre_pad=0, half=half)
620
- elif upscale_model:
621
- self.upsampler = None
622
- import PIL
623
- from image_gen_aux import UpscaleWithModel
624
- class UpscaleWithModel_Gfpgan(UpscaleWithModel):
625
- def cv2pil(self, image):
626
- ''' OpenCV type -> PIL type
627
- https://qiita.com/derodero24/items/f22c22b22451609908ee
628
- '''
629
- new_image = image.copy()
630
- if new_image.ndim == 2: # Grayscale
631
- pass
632
- elif new_image.shape[2] == 3: # Color
633
- new_image = cv2.cvtColor(new_image, cv2.COLOR_BGR2RGB)
634
- elif new_image.shape[2] == 4: # Transparency
635
- new_image = cv2.cvtColor(new_image, cv2.COLOR_BGRA2RGBA)
636
- new_image = PIL.Image.fromarray(new_image)
637
- return new_image
638
-
639
- def pil2cv(self, image):
640
- ''' PIL type -> OpenCV type
641
- https://qiita.com/derodero24/items/f22c22b22451609908ee
642
- '''
643
- new_image = np.array(image, dtype=np.uint8)
644
- if new_image.ndim == 2: # Grayscale
645
- pass
646
- elif new_image.shape[2] == 3: # Color
647
- new_image = cv2.cvtColor(new_image, cv2.COLOR_RGB2BGR)
648
- elif new_image.shape[2] == 4: # Transparency
649
- new_image = cv2.cvtColor(new_image, cv2.COLOR_RGBA2BGRA)
650
- return new_image
651
-
652
- def enhance(self_, img, outscale=None):
653
- # img: numpy
654
- h_input, w_input = img.shape[0:2]
655
- pil_img = self.cv2pil(img)
656
- pil_img = self_.__call__(pil_img)
657
- cv_image = self.pil2cv(pil_img)
658
- if outscale is not None and outscale != float(self.netscale):
659
- interpolation = cv2.INTER_AREA if outscale < float(self.netscale) else cv2.INTER_LANCZOS4
660
- cv_image = cv2.resize(
661
- cv_image, (
662
- int(w_input * outscale),
663
- int(h_input * outscale),
664
- ), interpolation=interpolation)
665
- return cv_image, None
666
-
667
- device = "cuda" if torch.cuda.is_available() else "cpu"
668
- upscaler = UpscaleWithModel.from_pretrained(os.path.join("weights", "upscale", upscale_model)).to(device)
669
- upscaler.__class__ = UpscaleWithModel_Gfpgan
670
- self.upsampler = upscaler
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
671
  self.face_enhancer = None
672
 
673
- resolution = 512
674
  if face_restoration:
 
 
 
675
  modelInUse = f"_{os.path.splitext(face_restoration)[0]}" + modelInUse
676
  from gfpgan.utils import GFPGANer
677
  model_rootpath = os.path.join("weights", "face")
@@ -696,6 +791,7 @@ class Upscale:
696
  resolution = 2048
697
 
698
  self.face_enhancer = GFPGANer(model_path=model_path, upscale=self.scale, arch=arch, channel_multiplier=channel_multiplier, model_rootpath=model_rootpath, det_model=face_detection, resolution=resolution)
 
699
 
700
  files = []
701
  if not outputWithModelName:
@@ -703,9 +799,10 @@ class Upscale:
703
 
704
  try:
705
  bg_upsample_img = None
706
- if self.upsampler and hasattr(self.upsampler, "enhance"):
707
  from utils.dataops import auto_split_upscale
708
- bg_upsample_img, _ = auto_split_upscale(img, self.upsampler.enhance, self.scale) if is_auto_split_upscale else self.upsampler.enhance(img, outscale=self.scale)
 
709
 
710
  if self.face_enhancer:
711
  cropped_faces, restored_aligned, bg_upsample_img = self.face_enhancer.enhance(img, has_aligned=False, only_center_face=face_detection_only_center, paste_back=True, bg_upsample_img=bg_upsample_img, eye_dist_threshold=face_detection_threshold)
@@ -726,6 +823,7 @@ class Upscale:
726
  files.append(save_crop_path)
727
  files.append(save_restore_path)
728
  files.append(save_cmp_path)
 
729
 
730
  restored_img = bg_upsample_img
731
  except RuntimeError as error:
@@ -746,10 +844,11 @@ class Upscale:
746
 
747
  restored_img = cv2.cvtColor(restored_img, cv2.COLOR_BGR2RGB)
748
  files.append(save_path)
 
749
  return files, files
750
  except Exception as error:
751
  print(traceback.format_exc())
752
- print("global exception", error)
753
  return None, None
754
 
755
  def find_max_numbers(self, state_dict, findkeys):
@@ -766,12 +865,70 @@ class Upscale:
766
 
767
  return tuple(max_values[findkey] for findkey in findkeys) if len(findkeys) > 1 else max_values[findkeys[0]]
768
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
769
  def imwriteUTF8(self, save_path, image): # `cv2.imwrite` does not support writing files to UTF-8 file paths.
770
  img_name = os.path.basename(save_path)
771
  _, extension = os.path.splitext(img_name)
772
  is_success, im_buf_arr = cv2.imencode(extension, image)
773
  if (is_success): im_buf_arr.tofile(save_path)
774
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
775
 
776
  def main():
777
  if torch.cuda.is_available():
@@ -793,6 +950,13 @@ def main():
793
  Practically, the aforementioned algorithm is used to restore your **old photos** or improve **AI-generated faces**.<br>
794
  To use it, simply just upload the concerned image.<br>
795
  """
 
 
 
 
 
 
 
796
 
797
  upscale = Upscale()
798
 
@@ -814,13 +978,13 @@ def main():
814
  upscale_model_header = f"| Upscale Model Name | Info, Type: {tmptype}, Model execution speed: {speed} | Download URL |\n|------------|------|--------------|"
815
  upscale_model_tables.append(upscale_model_header + "\n" + "\n".join(rows))
816
 
817
- with gr.Blocks(title = title) as demo:
818
  gr.Markdown(value=f"<h1 style=\"text-align:center;\">{title}</h1><br>{description}")
819
  with gr.Row():
820
  with gr.Column(variant ="panel"):
821
  input_image = gr.Image(type="filepath", label="Input", format="png")
822
- face_model = gr.Dropdown(list(face_models.keys())+[None], type="value", value='GFPGANv1.4.pth', label='Face Restoration version', info="Face Restoration and RealESR can be freely combined in different ways, or one can be set to \"None\" to use only the other model. Face Restoration is primarily used for face restoration in real-life images, while RealESR serves as a background restoration model.")
823
- upscale_model = gr.Dropdown(list(typed_upscale_models.keys())+[None], type="value", value='SRVGG, realesr-general-x4v3.pth', label='UpScale version')
824
  upscale_scale = gr.Number(label="Rescaling factor", value=4)
825
  face_detection = gr.Dropdown(["retinaface_resnet50", "YOLOv5l", "YOLOv5n"], type="value", value="retinaface_resnet50", label="Face Detection type")
826
  face_detection_threshold = gr.Number(label="Face eye dist threshold", value=10, info="A threshold to filter out faces with too small an eye distance (e.g., side faces).")
 
7
  import torch
8
  import traceback
9
  import math
10
+ import time
11
  from collections import defaultdict
12
  from facexlib.utils.misc import download_from_url
13
  from basicsr.utils.realesrganer import RealESRGANer
 
59
  "https://github.com/wzhouxiff/RestoreFormerPlusPlus", "The same as RestoreFormer++ but legacy model"],
60
  }
61
  upscale_models = {
62
+ # SRVGGNet(Compact)
63
  "realesr-general-x4v3.pth": ["https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth",
64
  "https://github.com/xinntao/Real-ESRGAN/releases/tag/v0.3.0",
65
+ """Compression Removal, General Upscaler, JPEG, Realistic, Research, Restoration
66
+ xinntao: add realesr-general-x4v3 and realesr-general-wdn-x4v3. They are very tiny models for general scenes, and they may more robust. But as they are tiny models, their performance may be limited."""],
67
 
68
  "realesr-animevideov3.pth": ["https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-animevideov3.pth",
69
  "https://github.com/xinntao/Real-ESRGAN/releases/tag/v0.2.5.0",
70
+ """Anime, Cartoon, Compression Removal, General Upscaler, JPEG, Realistic, Research, Restoration
71
+ xinntao: update the RealESRGAN AnimeVideo-v3 model, which can achieve better results with a faster inference speed."""],
72
 
73
  "4xLSDIRCompact.pth": ["https://github.com/Phhofm/models/releases/download/4xLSDIRCompact/4xLSDIRCompact.pth",
74
  "https://github.com/Phhofm/models/releases/tag/4xLSDIRCompact",
75
+ """Realistic
76
+ Phhofm: Upscale small good quality photos to 4x their size. This is my first ever released self-trained sisr upscaling model."""],
77
 
78
  "4xLSDIRCompactC.pth": ["https://github.com/Phhofm/models/releases/download/4xLSDIRCompactC/4xLSDIRCompactC.pth",
79
  "https://github.com/Phhofm/models/releases/tag/4xLSDIRCompactC",
80
+ """Compression Removal, JPEG, Realistic, Restoration
81
+ Phhofm: 4x photo upscaler that handler jpg compression. Trying to extend my previous model to be able to handle compression (JPG 100-30) by manually altering the training dataset, since 4xLSDIRCompact cant handle compression. Use this instead of 4xLSDIRCompact if your photo has compression (like an image from the web)."""],
82
 
83
  "4xLSDIRCompactR.pth": ["https://github.com/Phhofm/models/releases/download/4xLSDIRCompactC/4xLSDIRCompactR.pth",
84
  "https://github.com/Phhofm/models/releases/tag/4xLSDIRCompactC",
85
+ """Compression Removal, Realistic, Restoration
86
+ Phhofm: 4x photo uspcaler that handles jpg compression, noise and slight. Extending my last 4xLSDIRCompact model to Real-ESRGAN, meaning trained on synthetic data instead to handle more kinds of degradations, it should be able to handle compression, noise, and slight blur."""],
87
 
88
  "4xLSDIRCompactN.pth": ["https://github.com/Phhofm/models/releases/download/4xLSDIRCompact3/4xLSDIRCompactC3.pth",
89
  "https://github.com/Phhofm/models/releases/tag/4xLSDIRCompact3",
90
+ """Realistic
91
+ Phhofm: Upscale good quality input photos to x4 their size. The original 4xLSDIRCompact a bit more trained, cannot handle degradation.
92
  I am releasing the Series 3 from my 4xLSDIRCompact models. In general my suggestion is, if you have good quality input images use 4xLSDIRCompactN3, otherwise try 4xLSDIRCompactC3 which will be able to handle jpg compression and a bit of blur, or then 4xLSDIRCompactCR3, which is an interpolation between C3 and R3 to be able to handle a bit of noise additionally."""],
93
 
94
  "4xLSDIRCompactC3.pth": ["https://github.com/Phhofm/models/releases/download/4xLSDIRCompact3/4xLSDIRCompactC3.pth",
95
  "https://github.com/Phhofm/models/releases/tag/4xLSDIRCompact3",
96
+ """Compression Removal,
97
+ JPEG, Realistic, Restoration
98
+ Phhofm: Upscale compressed photos to x4 their size. Able to handle JPG compression (30-100).
99
  I am releasing the Series 3 from my 4xLSDIRCompact models. In general my suggestion is, if you have good quality input images use 4xLSDIRCompactN3, otherwise try 4xLSDIRCompactC3 which will be able to handle jpg compression and a bit of blur, or then 4xLSDIRCompactCR3, which is an interpolation between C3 and R3 to be able to handle a bit of noise additionally."""],
100
 
101
  "4xLSDIRCompactR3.pth": ["https://github.com/Phhofm/models/releases/download/4xLSDIRCompact3/4xLSDIRCompactR3.pth",
102
  "https://github.com/Phhofm/models/releases/tag/4xLSDIRCompact3",
103
+ """Realistic, Restoration
104
+ Phhofm: Upscale (degraded) photos to x4 their size. Trained on synthetic data, meant to handle more degradations.
105
  I am releasing the Series 3 from my 4xLSDIRCompact models. In general my suggestion is, if you have good quality input images use 4xLSDIRCompactN3, otherwise try 4xLSDIRCompactC3 which will be able to handle jpg compression and a bit of blur, or then 4xLSDIRCompactCR3, which is an interpolation between C3 and R3 to be able to handle a bit of noise additionally."""],
106
 
107
  "4xLSDIRCompactCR3.pth": ["https://github.com/Phhofm/models/releases/download/4xLSDIRCompact3/4xLSDIRCompactCR3.pth",
 
110
 
111
  "2xParimgCompact.pth": ["https://github.com/Phhofm/models/releases/download/2xParimgCompact/2xParimgCompact.pth",
112
  "https://github.com/Phhofm/models/releases/tag/2xParimgCompact",
113
+ """Realistic
114
+ Phhofm: A 2x photo upscaling compact model based on Microsoft's ImagePairs. This was one of the earliest models I started training and finished it now for release. As can be seen in the examples, this model will affect colors."""],
115
 
116
  "1xExposureCorrection_compact.pth": ["https://github.com/Phhofm/models/releases/download/1xExposureCorrection_compact/1xExposureCorrection_compact.pth",
117
+ "https://github.com/Phhofm/models/releases/tag/1xExposureCorrection_compact",
118
+ """Restoration
119
+ Phhofm: This model is meant as an experiment to see if compact can be used to train on photos to exposure correct those using the pixel, perceptual, color, color and ldl losses. There is no brightness loss. Still it seems to kinda work."""],
120
 
121
  "1xUnderExposureCorrection_compact.pth": ["https://github.com/Phhofm/models/releases/download/1xExposureCorrection_compact/1xUnderExposureCorrection_compact.pth",
122
+ "https://github.com/Phhofm/models/releases/tag/1xExposureCorrection_compact",
123
+ """Restoration
124
+ Phhofm: This model is meant as an experiment to see if compact can be used to train on underexposed images to exposure correct those using the pixel, perceptual, color, color and ldl losses. There is no brightness loss. Still it seems to kinda work."""],
125
 
126
  "1xOverExposureCorrection_compact.pth": ["https://github.com/Phhofm/models/releases/download/1xExposureCorrection_compact/1xOverExposureCorrection_compact.pth",
127
+ "https://github.com/Phhofm/models/releases/tag/1xExposureCorrection_compact",
128
+ """Restoration
129
+ Phhofm: This model is meant as an experiment to see if compact can be used to train on overexposed images to exposure correct those using the pixel, perceptual, color, color and ldl losses. There is no brightness loss. Still it seems to kinda work."""],
130
+
131
+ "2x-sudo-UltraCompact.pth": ["https://objectstorage.us-phoenix-1.oraclecloud.com/n/ax6ygfvpvzka/b/open-modeldb-files/o/2x-sudo-UltraCompact.pth",
132
+ "https://openmodeldb.info/models/2x-sudo-UltraCompact",
133
+ """Anime, Cartoon, Restoration
134
+ sudo: Realtime animation restauration and doing stuff like deblur and compression artefact removal.
135
+ My first attempt to make a REALTIME 2x upscaling model while also applying teacher student learning.
136
+ (Teacher: RealESRGANv2-animevideo-xsx2.pth)"""],
137
+
138
+ "2x_AnimeJaNai_HD_V3_SuperUltraCompact.pth": ["https://github.com/the-database/mpv-upscale-2x_animejanai/releases/download/3.0.0/2x_AnimeJaNai_HD_V3_ModelsOnly.zip",
139
+ "https://openmodeldb.info/models/2x-AnimeJaNai-HD-V3-SuperUltraCompact",
140
+ """Anime, Compression Removal, Restoration
141
+ the-database: Real-time 2x Real-ESRGAN Compact/UltraCompact/SuperUltraCompact models designed for upscaling 1080p anime to 4K.
142
+ The aim of these models is to address scaling, blur, oversharpening, and compression artifacts while upscaling to deliver a result that appears as if the anime was originally mastered in 4K resolution."""],
143
+
144
+ "2x_AnimeJaNai_HD_V3_UltraCompact.pth": ["https://github.com/the-database/mpv-upscale-2x_animejanai/releases/download/3.0.0/2x_AnimeJaNai_HD_V3_ModelsOnly.zip",
145
+ "https://openmodeldb.info/models/2x-AnimeJaNai-HD-V3-UltraCompact",
146
+ """Anime, Compression Removal, Restoration
147
+ the-database: Real-time 2x Real-ESRGAN Compact/UltraCompact/SuperUltraCompact models designed for upscaling 1080p anime to 4K.
148
+ The aim of these models is to address scaling, blur, oversharpening, and compression artifacts while upscaling to deliver a result that appears as if the anime was originally mastered in 4K resolution."""],
149
+
150
+ "2x_AnimeJaNai_HD_V3_Compact.pth": ["https://github.com/the-database/mpv-upscale-2x_animejanai/releases/download/3.0.0/2x_AnimeJaNai_HD_V3_ModelsOnly.zip",
151
+ "https://openmodeldb.info/models/2x-AnimeJaNai-HD-V3-Compact",
152
+ """Anime, Compression Removal, Restoration
153
+ the-database: Real-time 2x Real-ESRGAN Compact/UltraCompact/SuperUltraCompact models designed for upscaling 1080p anime to 4K.
154
+ The aim of these models is to address scaling, blur, oversharpening, and compression artifacts while upscaling to deliver a result that appears as if the anime was originally mastered in 4K resolution."""],
155
 
156
  # RRDBNet
157
  "RealESRGAN_x4plus_anime_6B.pth": ["https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth",
158
  "https://github.com/xinntao/Real-ESRGAN/releases/tag/v0.2.2.4",
159
+ """Anime, Cartoon, Compression Removal, General Upscaler, JPEG, Realistic, Research, Restoration
160
+ xinntao: We add RealESRGAN_x4plus_anime_6B.pth, which is optimized for anime images with much smaller model size. More details and comparisons with waifu2x are in anime_model.md"""],
161
 
162
  "RealESRGAN_x2plus.pth" : ["https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth",
163
  "https://github.com/xinntao/Real-ESRGAN/releases/tag/v0.2.1",
164
+ """Compression Removal, General Upscaler, JPEG, Realistic, Research, Restoration
165
+ xinntao: Add RealESRGAN_x2plus.pth model"""],
166
 
167
  "RealESRNet_x4plus.pth" : ["https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.1/RealESRNet_x4plus.pth",
168
  "https://github.com/xinntao/Real-ESRGAN/releases/tag/v0.1.1",
169
+ """Compression Removal, General Upscaler, JPEG, Realistic, Research, Restoration
170
+ xinntao: This release is mainly for storing pre-trained models and executable files."""],
171
 
172
  "RealESRGAN_x4plus.pth" : ["https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth",
173
  "https://github.com/xinntao/Real-ESRGAN/releases/tag/v0.1.0",
174
+ """Compression Removal, General Upscaler, JPEG, Realistic, Research, Restoration
175
+ xinntao: This release is mainly for storing pre-trained models and executable files."""],
176
 
177
  # ESRGAN(oldRRDB)
178
  "4x-AnimeSharp.pth": ["https://huggingface.co/utnah/esrgan/resolve/main/4x-AnimeSharp.pth?download=true",
179
  "https://openmodeldb.info/models/4x-AnimeSharp",
180
+ """Anime, Cartoon, Text
181
+ Kim2091: Interpolation between 4x-UltraSharp and 4x-TextSharp-v0.5. Works amazingly on anime. It also upscales text, but it's far better with anime content."""],
182
 
183
  "4x_IllustrationJaNai_V1_ESRGAN_135k.pth": ["https://drive.google.com/uc?export=download&confirm=1&id=1qpioSqBkB_IkSBhEAewSSNFt6qgkBimP",
184
  "https://openmodeldb.info/models/4x-IllustrationJaNai-V1-DAT2",
185
+ """Anime, Cartoon, Compression Removal, Dehalftone, General Upscaler, JPEG, Manga, Restoration
186
+ the-database: Model for color images including manga covers and color illustrations, digital art, visual novel art, artbooks, and more.
187
  DAT2 version is the highest quality version but also the slowest. See the ESRGAN version for faster performance."""],
188
 
189
  "2x-sudo-RealESRGAN.pth": ["https://objectstorage.us-phoenix-1.oraclecloud.com/n/ax6ygfvpvzka/b/open-modeldb-files/o/2x-sudo-RealESRGAN.pth",
190
  "https://openmodeldb.info/models/2x-sudo-RealESRGAN",
191
+ """Anime, Cartoon
192
+ sudo: Tried to make the best 2x model there is for drawings. I think i archived that.
193
  And yes, it is nearly 3.8 million iterations (probably a record nobody will beat here), took me nearly half a year to train.
194
  It can happen that in one edge is a noisy pattern in edges. You can use padding/crop for that.
195
+ I aimed for perceptual quality without zooming in like 400%. Since RealESRGAN is 4x, I downscaled these images with bicubic.
196
+ Pretrained: Pretrained_Model_G: RealESRGAN_x4plus_anime_6B.pth / RealESRGAN_x4plus_anime_6B.pth (sudo_RealESRGAN2x_3.332.758_G.pth)"""],
197
 
198
  "2x-sudo-RealESRGAN-Dropout.pth": ["https://objectstorage.us-phoenix-1.oraclecloud.com/n/ax6ygfvpvzka/b/open-modeldb-files/o/2x-sudo-RealESRGAN-Dropout.pth",
199
  "https://openmodeldb.info/models/2x-sudo-RealESRGAN-Dropout",
200
+ """Anime, Cartoon
201
+ sudo: Tried to make the best 2x model there is for drawings. I think i archived that.
202
  And yes, it is nearly 3.8 million iterations (probably a record nobody will beat here), took me nearly half a year to train.
203
  It can happen that in one edge is a noisy pattern in edges. You can use padding/crop for that.
204
+ I aimed for perceptual quality without zooming in like 400%. Since RealESRGAN is 4x, I downscaled these images with bicubic.
205
+ Pretrained: Pretrained_Model_G: RealESRGAN_x4plus_anime_6B.pth / RealESRGAN_x4plus_anime_6B.pth (sudo_RealESRGAN2x_3.332.758_G.pth)"""],
206
 
207
  "4xNomos2_otf_esrgan.pth": ["https://github.com/Phhofm/models/releases/download/4xNomos2_otf_esrgan/4xNomos2_otf_esrgan.pth",
208
  "https://github.com/Phhofm/models/releases/tag/4xNomos2_otf_esrgan",
209
+ """Compression Removal, JPEG, Realistic, Restoration
210
+ Phhofm: Restoration, 4x ESRGAN model for photography, trained using the Real-ESRGAN otf degradation pipeline."""],
211
 
212
  "4xNomosWebPhoto_esrgan.pth": ["https://github.com/Phhofm/models/releases/download/4xNomosWebPhoto_esrgan/4xNomosWebPhoto_esrgan.pth",
213
  "https://github.com/Phhofm/models/releases/tag/4xNomosWebPhoto_esrgan",
214
+ """Realistic, Restoration
215
+ Phhofm: Restoration, 4x ESRGAN model for photography, trained with realistic noise, lens blur, jpg and webp re-compression.
216
  ESRGAN version of 4xNomosWebPhoto_RealPLKSR, trained on the same dataset and in the same way."""],
217
 
218
  # DATNet
219
  "4xNomos8kDAT.pth" : ["https://github.com/Phhofm/models/releases/download/4xNomos8kDAT/4xNomos8kDAT.pth",
220
  "https://openmodeldb.info/models/4x-Nomos8kDAT",
221
+ """Anime, Compression Removal, General Upscaler, JPEG, Realistic, Restoration
222
+ Phhofm: A 4x photo upscaler with otf jpg compression, blur and resize, trained on musl's Nomos8k_sfw dataset for realisic sr, this time based on the DAT arch, as a finetune on the official 4x DAT model."""],
223
 
224
  "4x-DWTP-DS-dat2-v3.pth" : ["https://objectstorage.us-phoenix-1.oraclecloud.com/n/ax6ygfvpvzka/b/open-modeldb-files/o/4x-DWTP-DS-dat2-v3.pth",
225
  "https://openmodeldb.info/models/4x-DWTP-DS-dat2-v3",
226
+ """Dehalftone, Restoration
227
+ umzi.x.dead: DAT descreenton model, designed to reduce discrepancies on tiles due to too much loss of the first version, while getting rid of the removal of paper texture"""],
228
 
229
  "4xBHI_dat2_real.pth" : ["https://github.com/Phhofm/models/releases/download/4xBHI_dat2_real/4xBHI_dat2_real.pth",
230
  "https://github.com/Phhofm/models/releases/tag/4xBHI_dat2_real",
231
+ """Compression Removal, JPEG, Realistic
232
+ Phhofm: 4x dat2 upscaling model for web and realistic images. It handles realistic noise, some realistic blur, and webp and jpg (re)compression. Trained on my BHI dataset (390'035 training tiles) with degraded LR subset."""],
233
 
234
  "4xBHI_dat2_otf.pth" : ["https://github.com/Phhofm/models/releases/download/4xBHI_dat2_otf/4xBHI_dat2_otf.pth",
235
  "https://github.com/Phhofm/models/releases/tag/4xBHI_dat2_otf",
236
+ """Compression Removal, JPEG
237
+ Phhofm: 4x dat2 upscaling model, trained with the real-esrgan otf pipeline on my bhi dataset. Handles noise and compression."""],
238
 
239
  "4xBHI_dat2_multiblur.pth" : ["https://github.com/Phhofm/models/releases/download/4xBHI_dat2_multiblurjpg/4xBHI_dat2_multiblur.pth",
240
  "https://github.com/Phhofm/models/releases/tag/4xBHI_dat2_multiblurjpg",
241
+ """Phhofm: the 4xBHI_dat2_multiblur checkpoint (trained to 250000 iters), which cannot handle compression but might give just slightly better output on non-degraded input."""],
242
 
243
  "4xBHI_dat2_multiblurjpg.pth" : ["https://github.com/Phhofm/models/releases/download/4xBHI_dat2_multiblurjpg/4xBHI_dat2_multiblurjpg.pth",
244
  "https://github.com/Phhofm/models/releases/tag/4xBHI_dat2_multiblurjpg",
245
+ """Compression Removal, JPEG
246
+ Phhofm: 4x dat2 upscaling model, trained with down_up,linear, cubic_mitchell, lanczos, gauss and box scaling algos, some average, gaussian and anisotropic blurs and jpg compression. Trained on my BHI sisr dataset."""],
247
 
248
  "4x_IllustrationJaNai_V1_DAT2_190k.pth": ["https://drive.google.com/uc?export=download&confirm=1&id=1qpioSqBkB_IkSBhEAewSSNFt6qgkBimP",
249
  "https://openmodeldb.info/models/4x-IllustrationJaNai-V1-DAT2",
250
+ """Anime, Cartoon, Compression Removal, Dehalftone, General Upscaler, JPEG, Manga, Restoration
251
+ the-database: Model for color images including manga covers and color illustrations, digital art, visual novel art, artbooks, and more.
252
  DAT2 version is the highest quality version but also the slowest. See the ESRGAN version for faster performance."""],
253
 
254
  "4x-PBRify_UpscalerDAT2_V1.pth": ["https://github.com/Kim2091/Kim2091-Models/releases/download/4x-PBRify_UpscalerDAT2_V1/4x-PBRify_UpscalerDAT2_V1.pth",
255
  "https://github.com/Kim2091/Kim2091-Models/releases/tag/4x-PBRify_UpscalerDAT2_V1",
256
+ """Compression Removal, DDS, Game Textures, Restoration
257
+ Kim2091: Yet another model in the PBRify_Remix series. This is a new upscaler to replace the previous 4x-PBRify_UpscalerSIR-M_V2 model.
258
  This model far exceeds the quality of the previous, with far more natural detail generation and better reconstruction of lines and edges."""],
259
 
260
  "4xBHI_dat2_otf_nn.pth": ["https://github.com/Phhofm/models/releases/download/4xBHI_dat2_otf_nn/4xBHI_dat2_otf_nn.pth",
261
  "https://github.com/Phhofm/models/releases/tag/4xBHI_dat2_otf_nn",
262
+ """Compression Removal, JPEG
263
+ Phhofm: 4x dat2 upscaling model, trained with the real-esrgan otf pipeline but without noise, on my bhi dataset. Handles resizes, and jpg compression."""],
264
 
265
  # HAT
266
  "4xNomos8kSCHAT-L.pth" : ["https://github.com/Phhofm/models/releases/download/4xNomos8kSCHAT/4xNomos8kSCHAT-L.pth",
267
  "https://openmodeldb.info/models/4x-Nomos8kSCHAT-L",
268
+ """Anime, Compression Removal, General Upscaler, JPEG, Realistic, Restoration
269
+ Phhofm: 4x photo upscaler with otf jpg compression and blur, trained on musl's Nomos8k_sfw dataset for realisic sr. Since this is a big model, upscaling might take a while."""],
270
 
271
  "4xNomos8kSCHAT-S.pth" : ["https://github.com/Phhofm/models/releases/download/4xNomos8kSCHAT/4xNomos8kSCHAT-S.pth",
272
  "https://openmodeldb.info/models/4x-Nomos8kSCHAT-S",
273
+ """Anime, Compression Removal, General Upscaler, JPEG, Realistic, Restoration
274
+ Phhofm: 4x photo upscaler with otf jpg compression and blur, trained on musl's Nomos8k_sfw dataset for realisic sr. HAT-S version/model."""],
275
 
276
  "4xNomos8kHAT-L_otf.pth": ["https://github.com/Phhofm/models/releases/download/4xNomos8kHAT-L_otf/4xNomos8kHAT-L_otf.pth",
277
  "https://openmodeldb.info/models/4x-Nomos8kHAT-L-otf",
278
+ """Faces, General Upscaler, Realistic, Restoration
279
+ Phhofm: 4x photo upscaler trained with otf, handles some jpg compression, some blur and some noise."""],
280
 
281
  "4xBHI_small_hat-l.pth": ["https://github.com/Phhofm/models/releases/download/4xBHI_small_hat-l/4xBHI_small_hat-l.pth",
282
  "https://github.com/Phhofm/models/releases/tag/4xBHI_small_hat-l",
 
288
  # RealPLKSR_dysample
289
  "4xHFA2k_ludvae_realplksr_dysample.pth": ["https://github.com/Phhofm/models/releases/download/4xHFA2k_ludvae_realplksr_dysample/4xHFA2k_ludvae_realplksr_dysample.pth",
290
  "https://openmodeldb.info/models/4x-HFA2k-ludvae-realplksr-dysample",
291
+ """Anime, Compression Removal, Restoration
292
+ Phhofm: A Dysample RealPLKSR 4x upscaling model for anime single-image resolution."""],
293
 
294
  "4xArtFaces_realplksr_dysample.pth" : ["https://github.com/Phhofm/models/releases/download/4xArtFaces_realplksr_dysample/4xArtFaces_realplksr_dysample.pth",
295
  "https://openmodeldb.info/models/4x-ArtFaces-realplksr-dysample",
296
+ """ArtFaces
297
+ Phhofm: A Dysample RealPLKSR 4x upscaling model for art / painted faces."""],
298
 
299
  "4x-PBRify_RPLKSRd_V3.pth" : ["https://github.com/Kim2091/Kim2091-Models/releases/download/4x-PBRify_RPLKSRd_V3/4x-PBRify_RPLKSRd_V3.pth",
300
  "https://github.com/Kim2091/Kim2091-Models/releases/tag/4x-PBRify_RPLKSRd_V3",
301
+ """Compression Removal, DDS, Debanding, Dedither, Dehalo, Game Textures, Restoration
302
+ Kim2091: This update brings a new upscaling model, 4x-PBRify_RPLKSRd_V3. This model is roughly 8x faster than the current DAT2 model, while being higher quality.
303
  It produces far more natural detail, resolves lines and edges more smoothly, and cleans up compression artifacts better.
304
  As a result of those improvements, PBR is also much improved. It tends to be clearer with less defined artifacts."""],
305
 
306
  "4xNomos2_realplksr_dysample.pth" : ["https://github.com/Phhofm/models/releases/download/4xNomos2_realplksr_dysample/4xNomos2_realplksr_dysample.pth",
307
  "https://openmodeldb.info/models/4x-Nomos2-realplksr-dysample",
308
+ """Compression Removal, JPEG, Realistic, Restoration
309
+ Phhofm: A Dysample RealPLKSR 4x upscaling model that was trained with / handles jpg compression down to 70 on the Nomosv2 dataset, preserves DoF.
310
  This model affects / saturate colors, which can be counteracted a bit by using wavelet color fix, as used in these examples."""],
311
 
312
  # RealPLKSR
313
  "2x-AnimeSharpV2_RPLKSR_Sharp.pth": ["https://github.com/Kim2091/Kim2091-Models/releases/download/2x-AnimeSharpV2_Set/2x-AnimeSharpV2_RPLKSR_Sharp.pth",
314
  "https://github.com/Kim2091/Kim2091-Models/releases/tag/2x-AnimeSharpV2_Set",
315
+ """Anime, Compression Removal, Restoration
316
+ Kim2091: This is my first anime model in years. Hopefully you guys can find a good use-case for it.
317
  RealPLKSR (Higher quality, slower) Sharp: For heavily degraded sources. Sharp models have issues depth of field but are best at removing artifacts
318
  """],
319
 
320
  "2x-AnimeSharpV2_RPLKSR_Soft.pth" : ["https://github.com/Kim2091/Kim2091-Models/releases/download/2x-AnimeSharpV2_Set/2x-AnimeSharpV2_RPLKSR_Soft.pth",
321
  "https://github.com/Kim2091/Kim2091-Models/releases/tag/2x-AnimeSharpV2_Set",
322
+ """Anime, Compression Removal, Restoration
323
+ Kim2091: This is my first anime model in years. Hopefully you guys can find a good use-case for it.
324
  RealPLKSR (Higher quality, slower) Soft: For cleaner sources. Soft models preserve depth of field but may not remove other artifacts as well"""],
325
 
326
  "4xPurePhoto-RealPLSKR.pth" : ["https://github.com/starinspace/StarinspaceUpscale/releases/download/Models/4xPurePhoto-RealPLSKR.pth",
327
  "https://openmodeldb.info/models/4x-PurePhoto-RealPLSKR",
328
+ """AI Generated, Compression Removal, JPEG, Realistic, Restoration
329
+ asterixcool: Skilled in working with cats, hair, parties, and creating clear images.
330
  Also proficient in resizing photos and enlarging large, sharp images.
331
  Can effectively improve images from small sizes as well (300px at smallest on one side, depending on the subject).
332
+ Experienced in experimenting with techniques like upscaling with this model twice and
333
  then reducing it by 50% to enhance details, especially in features like hair or animals."""],
334
 
335
  "2x_Text2HD_v.1-RealPLKSR.pth" : ["https://github.com/starinspace/StarinspaceUpscale/releases/download/Models/2x_Text2HD_v.1-RealPLKSR.pth",
336
  "https://openmodeldb.info/models/2x-Text2HD-v-1",
337
+ """Compression Removal, Denoise, General Upscaler, JPEG, Restoration, Text
338
+ asterixcool: The upscale model is specifically designed to enhance lower-quality text images,
339
  improving their clarity and readability by upscaling them by 2x.
340
  It excels at processing moderately sized text, effectively transforming it into high-quality, legible scans.
341
+ However, the model may encounter challenges when dealing with very small text,
342
+ as its performance is optimized for text of a certain minimum size. For best results,
343
  input images should contain text that is not excessively small."""],
344
 
345
  "2xVHS2HD-RealPLKSR.pth" : ["https://github.com/starinspace/StarinspaceUpscale/releases/download/Models/2xVHS2HD-RealPLKSR.pth",
346
  "https://openmodeldb.info/models/2x-VHS2HD",
347
+ """Compression Removal, Dehalo, Realistic, Restoration, Video Frame
348
+ asterixcool: An advanced VHS recording model designed to enhance video quality by reducing artifacts such as haloing, ghosting, and noise patterns.
349
  Optimized primarily for PAL resolution (NTSC might work good as well)."""],
350
 
351
  "4xNomosWebPhoto_RealPLKSR.pth" : ["https://github.com/Phhofm/models/releases/download/4xNomosWebPhoto_RealPLKSR/4xNomosWebPhoto_RealPLKSR.pth",
352
  "https://openmodeldb.info/models/4x-NomosWebPhoto-RealPLKSR",
353
+ """Realistic, Restoration
354
+ Phhofm: 4x RealPLKSR model for photography, trained with realistic noise, lens blur, jpg and webp re-compression."""],
355
 
356
  # DRCT
357
  "4xNomos2_hq_drct-l.pth" : ["https://github.com/Phhofm/models/releases/download/4xNomos2_hq_drct-l/4xNomos2_hq_drct-l.pth",
358
  "https://github.com/Phhofm/models/releases/tag/4xNomos2_hq_drct-l",
359
+ """General Upscaler, Realistic
360
+ Phhofm: An drct-l 4x upscaling model, similiar to the 4xNomos2_hq_atd, 4xNomos2_hq_dat2 and 4xNomos2_hq_mosr models, trained and for usage on non-degraded input to give good quality output.
361
  """],
362
 
363
  # ATD
364
  "4xNomos2_hq_atd.pth" : ["https://github.com/Phhofm/models/releases/download/4xNomos2_hq_atd/4xNomos2_hq_atd.pth",
365
  "https://github.com/Phhofm/models/releases/tag/4xNomos2_hq_atd",
366
+ """General Upscaler, Realistic
367
+ Phhofm: An atd 4x upscaling model, similiar to the 4xNomos2_hq_dat2 or 4xNomos2_hq_mosr models, trained and for usage on non-degraded input to give good quality output.
368
  """],
369
 
370
  # MoSR
371
  "4xNomos2_hq_mosr.pth" : ["https://github.com/Phhofm/models/releases/download/4xNomos2_hq_mosr/4xNomos2_hq_mosr.pth",
372
  "https://github.com/Phhofm/models/releases/tag/4xNomos2_hq_mosr",
373
+ """General Upscaler, Realistic
374
+ Phhofm: A 4x MoSR upscaling model, meant for non-degraded input, since this model was trained on non-degraded input to give good quality output.
375
  """],
376
 
377
  "2x-AnimeSharpV2_MoSR_Sharp.pth" : ["https://github.com/Kim2091/Kim2091-Models/releases/download/2x-AnimeSharpV2_Set/2x-AnimeSharpV2_MoSR_Sharp.pth",
378
  "https://github.com/Kim2091/Kim2091-Models/releases/tag/2x-AnimeSharpV2_Set",
379
+ """Anime, Compression Removal, Restoration
380
+ Kim2091: This is my first anime model in years. Hopefully you guys can find a good use-case for it.
381
  MoSR (Lower quality, faster), Sharp: For heavily degraded sources. Sharp models have issues depth of field but are best at removing artifacts
382
  """],
383
 
384
  "2x-AnimeSharpV2_MoSR_Soft.pth" : ["https://github.com/Kim2091/Kim2091-Models/releases/download/2x-AnimeSharpV2_Set/2x-AnimeSharpV2_MoSR_Soft.pth",
385
  "https://github.com/Kim2091/Kim2091-Models/releases/tag/2x-AnimeSharpV2_Set",
386
+ """Anime, Compression Removal, Restoration
387
+ Kim2091: This is my first anime model in years. Hopefully you guys can find a good use-case for it.
388
  MoSR (Lower quality, faster), Soft: For cleaner sources. Soft models preserve depth of field but may not remove other artifacts as well
389
  """],
390
 
391
  # SRFormer
392
  "4xNomos8kSCSRFormer.pth" : ["https://github.com/Phhofm/models/releases/download/4xNomos8kSCSRFormer/4xNomos8kSCSRFormer.pth",
393
  "https://github.com/Phhofm/models/releases/tag/4xNomos8kSCSRFormer",
394
+ """Anime, Compression Removal, General Upscaler, JPEG, Realistic, Restoration
395
+ Phhofm: 4x photo upscaler with otf jpg compression and blur, trained on musl's Nomos8k_sfw dataset for realisic sr.
396
  """],
397
 
398
+ "4xFrankendataFullDegradation_SRFormer460K_g.pth" : ["https://drive.google.com/uc?export=download&confirm=1&id=1PZrj-8ofxhORv_OgTVSoRt3dYi-BtiDj",
399
+ "https://openmodeldb.info/models/4x-Frankendata-FullDegradation-SRFormer",
400
+ """Compression Removal, Denoise, Realistic, Restoration
401
+ Crustaceous D: 4x realistic upscaler that may also work for general purpose usage.
402
+ It was trained with OTF random degradation with a very low to very high range of degradations, including blur, noise, and compression.
403
+ Trained with the same Frankendata dataset that I used for the pretrain model.
404
+ """],
405
+
406
+ "4xFrankendataPretrainer_SRFormer400K_g.pth" : ["https://drive.google.com/uc?export=download&confirm=1&id=1SaKvpYYIm2Vj2m9GifUMlNCbmkE6JZmr",
407
+ "https://openmodeldb.info/models/4x-FrankendataPretainer-SRFormer",
408
+ """Realistic, Restoration
409
+ Crustaceous D: 4x realistic upscaler that may also work for general purpose usage.
410
+ It was trained with OTF random degradation with a very low to very high range of degradations, including blur, noise, and compression.
411
+ Trained with the same Frankendata dataset that I used for the pretrain model.
412
+ """],
413
+
414
+ "1xFrankenfixer_SRFormerLight_g.pth" : ["https://drive.google.com/uc?export=download&confirm=1&id=1UJ0iyFn4IGNhPIgNgrQrBxYsdDloFc9I",
415
+ "https://openmodeldb.info/models/1x-Frankenfixer-SRFormerLight",
416
+ """Realistic, Restoration
417
+ Crustaceous D: A 1x model designed to reduce artifacts and restore detail to images upscaled by 4xFrankendata_FullDegradation_SRFormer. It could possibly work with other upscaling models too.
418
+ """],
419
  }
420
 
421
  example_list = ["images/a01.jpg", "images/a02.jpg", "images/a03.jpg", "images/a04.jpg", "images/bus.jpg", "images/zidane.jpg",
 
437
  model_type = "RealPLKSR"
438
  elif any(value in model_name.lower() for value in ("realesrgan", "realesrnet")):
439
  model_type = "RRDB"
440
+ elif any(value in model_name.lower() for value in ("realesr", "compact")):
441
  model_type = "SRVGG"
442
  elif "esrgan" in model_name.lower():
443
  model_type = "ESRGAN"
 
461
  print(img)
462
  print(face_restoration, upscale_model, scale)
463
  try:
464
+ if not img or (not face_restoration and not upscale_model):
465
+ raise ValueError("Invalid parameter setting")
466
+
467
+ timer = Timer() # Create a timer
468
  self.scale = scale
469
  self.img_name = os.path.basename(str(img))
470
  self.basename, self.extension = os.path.splitext(self.img_name)
 
476
  img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
477
 
478
  self.h_input, self.w_input = img.shape[0:2]
479
+ self.realesrganer = None
480
 
 
 
 
481
  modelInUse = ""
482
  upscale_type = None
483
+ is_auto_split_upscale = True
484
  if upscale_model:
485
  upscale_type, upscale_model = upscale_model.split(", ", 1)
486
  download_from_url(upscale_models[upscale_model][0], upscale_model, os.path.join("weights", "upscale"))
487
  modelInUse = f"_{os.path.splitext(upscale_model)[0]}"
488
 
489
+ self.netscale = 1 if any(sub in upscale_model.lower() for sub in ("x1", "1x")) else (2 if any(sub in upscale_model.lower() for sub in ("x2", "2x")) else 4)
490
+ model = None
491
+ half = True if torch.cuda.is_available() else False
492
+ if upscale_type:
493
+ # The values of the following hyperparameters are based on the research findings of the Spandrel project.
494
+ # https://github.com/chaiNNer-org/spandrel/tree/main/libs/spandrel/spandrel/architectures
495
+ from basicsr.archs.rrdbnet_arch import RRDBNet
496
+ loadnet = torch.load(os.path.join("weights", "upscale", upscale_model), map_location=torch.device('cpu'), weights_only=True)
497
+ if 'params_ema' in loadnet or 'params' in loadnet:
498
+ loadnet = loadnet['params_ema'] if 'params_ema' in loadnet else loadnet['params']
499
+
500
+ if upscale_type == "SRVGG":
501
+ from basicsr.archs.srvgg_arch import SRVGGNetCompact
502
+ body_max_num = self.find_max_numbers(loadnet, "body")
503
+ num_feat = loadnet["body.0.weight"].shape[0]
504
+ num_in_ch = loadnet["body.0.weight"].shape[1]
505
+ num_conv = body_max_num // 2 - 1
506
+ model = SRVGGNetCompact(num_in_ch=num_in_ch, num_out_ch=3, num_feat=num_feat, num_conv=num_conv, upscale=self.netscale, act_type='prelu')
507
+ elif upscale_type == "RRDB" or upscale_type == "ESRGAN":
508
+ if upscale_type == "RRDB":
509
+ num_block = self.find_max_numbers(loadnet, "body") + 1
510
+ num_feat = loadnet["conv_first.weight"].shape[0]
511
+ else:
512
+ num_block = self.find_max_numbers(loadnet, "model.1.sub")
513
+ num_feat = loadnet["model.0.weight"].shape[0]
514
+ model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=num_feat, num_block=num_block, num_grow_ch=32, scale=self.netscale, is_real_esrgan=upscale_type == "RRDB")
515
+ elif upscale_type == "DAT":
516
+ from basicsr.archs.dat_arch import DAT
517
+ half = False
518
+
519
+ in_chans = loadnet["conv_first.weight"].shape[1]
520
+ embed_dim = loadnet["conv_first.weight"].shape[0]
521
+ num_layers = self.find_max_numbers(loadnet, "layers") + 1
522
+ depth = [6] * num_layers
523
+ num_heads = [6] * num_layers
524
+ for i in range(num_layers):
525
+ depth[i] = self.find_max_numbers(loadnet, f"layers.{i}.blocks") + 1
526
+ num_heads[i] = loadnet[f"layers.{i}.blocks.1.attn.temperature"].shape[0] if depth[i] >= 2 else \
527
+ loadnet[f"layers.{i}.blocks.0.attn.attns.0.pos.pos3.2.weight"].shape[0] * 2
528
+
529
+ upsampler = "pixelshuffle" if "conv_last.weight" in loadnet else "pixelshuffledirect"
530
+ resi_connection = "1conv" if "conv_after_body.weight" in loadnet else "3conv"
531
+ qkv_bias = "layers.0.blocks.0.attn.qkv.bias" in loadnet
532
+ expansion_factor = float(loadnet["layers.0.blocks.0.ffn.fc1.weight"].shape[0] / embed_dim)
533
+
534
+ img_size = 64
535
+ if "layers.0.blocks.2.attn.attn_mask_0" in loadnet:
536
+ attn_mask_0_x, attn_mask_0_y, _attn_mask_0_z = loadnet["layers.0.blocks.2.attn.attn_mask_0"].shape
537
+ img_size = int(math.sqrt(attn_mask_0_x * attn_mask_0_y))
538
+
539
+ split_size = [2, 4]
540
+ if "layers.0.blocks.0.attn.attns.0.rpe_biases" in loadnet:
541
+ split_sizes = loadnet["layers.0.blocks.0.attn.attns.0.rpe_biases"][-1] + 1
542
+ split_size = [int(x) for x in split_sizes]
543
+
544
+ model = DAT(img_size=img_size, in_chans=in_chans, embed_dim=embed_dim, split_size=split_size, depth=depth, num_heads=num_heads, expansion_factor=expansion_factor,
545
+ qkv_bias=qkv_bias, resi_connection=resi_connection, upsampler=upsampler, upscale=self.netscale)
546
+ elif upscale_type == "HAT":
547
+ half = False
548
+ from basicsr.archs.hat_arch import HAT
549
+ in_chans = loadnet["conv_first.weight"].shape[1]
550
+ embed_dim = loadnet["conv_first.weight"].shape[0]
551
+ window_size = int(math.sqrt(loadnet["relative_position_index_SA"].shape[0]))
552
+ num_layers = self.find_max_numbers(loadnet, "layers") + 1
553
+ depths = [6] * num_layers
554
+ num_heads = [6] * num_layers
555
+ for i in range(num_layers):
556
+ depths[i] = self.find_max_numbers(loadnet, f"layers.{i}.residual_group.blocks") + 1
557
+ num_heads[i] = loadnet[f"layers.{i}.residual_group.overlap_attn.relative_position_bias_table"].shape[1]
558
+ resi_connection = "1conv" if "conv_after_body.weight" in loadnet else "identity"
559
+
560
+ compress_ratio = self.find_divisor_for_quotient(embed_dim, loadnet["layers.0.residual_group.blocks.0.conv_block.cab.0.weight"].shape[0],)
561
+ squeeze_factor = self.find_divisor_for_quotient(embed_dim, loadnet["layers.0.residual_group.blocks.0.conv_block.cab.3.attention.1.weight"].shape[0],)
562
+
563
+ qkv_bias = "layers.0.residual_group.blocks.0.attn.qkv.bias" in loadnet
564
+ patch_norm = "patch_embed.norm.weight" in loadnet
565
+ ape = "absolute_pos_embed" in loadnet
566
+
567
+ mlp_hidden_dim = int(loadnet["layers.0.residual_group.blocks.0.mlp.fc1.weight"].shape[0])
568
+ mlp_ratio = mlp_hidden_dim / embed_dim
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
569
  upsampler = "pixelshuffle"
570
+
571
+ model = HAT(img_size=64, patch_size=1, in_chans=in_chans, embed_dim=embed_dim, depths=depths, num_heads=num_heads, window_size=window_size, compress_ratio=compress_ratio,
572
+ squeeze_factor=squeeze_factor, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, ape=ape, patch_norm=patch_norm,
573
+ upsampler=upsampler, resi_connection=resi_connection, upscale=self.netscale,)
574
+ elif "RealPLKSR" in upscale_type:
575
+ from basicsr.archs.realplksr_arch import realplksr
576
+ half = False if "RealPLSKR" in upscale_model else half
577
+ use_ea = "feats.1.attn.f.0.weight" in loadnet
578
+ dim = loadnet["feats.0.weight"].shape[0]
579
+ num_feats = self.find_max_numbers(loadnet, "feats") + 1
580
+ n_blocks = num_feats - 3
581
+ kernel_size = loadnet["feats.1.lk.conv.weight"].shape[2]
582
+ split_ratio = loadnet["feats.1.lk.conv.weight"].shape[0] / dim
583
+ use_dysample = "to_img.init_pos" in loadnet
584
+
585
+ model = realplksr(upscaling_factor=self.netscale, dim=dim, n_blocks=n_blocks, kernel_size=kernel_size, split_ratio=split_ratio, use_ea=use_ea, dysample=use_dysample)
586
+ elif upscale_type == "DRCT":
587
+ half = False
588
+ from basicsr.archs.DRCT_arch import DRCT
589
+
590
+ in_chans = loadnet["conv_first.weight"].shape[1]
591
+ embed_dim = loadnet["conv_first.weight"].shape[0]
592
+ num_layers = self.find_max_numbers(loadnet, "layers") + 1
593
+ depths = (6,) * num_layers
594
+ num_heads = []
595
+ for i in range(num_layers):
596
+ num_heads.append(loadnet[f"layers.{i}.swin1.attn.relative_position_bias_table"].shape[1])
597
+
598
+ mlp_ratio = loadnet["layers.0.swin1.mlp.fc1.weight"].shape[0] / embed_dim
599
+ window_square = loadnet["layers.0.swin1.attn.relative_position_bias_table"].shape[0]
600
+ window_size = (math.isqrt(window_square) + 1) // 2
601
+ upsampler = "pixelshuffle" if "conv_last.weight" in loadnet else ""
602
+ resi_connection = "1conv" if "conv_after_body.weight" in loadnet else ""
603
+ qkv_bias = "layers.0.swin1.attn.qkv.bias" in loadnet
604
+ gc_adjust1 = loadnet["layers.0.adjust1.weight"].shape[0]
605
+ patch_norm = "patch_embed.norm.weight" in loadnet
606
+ ape = "absolute_pos_embed" in loadnet
607
+
608
+ model = DRCT(in_chans=in_chans, img_size= 64, window_size=window_size, compress_ratio=3,squeeze_factor=30,
609
+ conv_scale= 0.01, overlap_ratio= 0.5, img_range= 1., depths=depths, embed_dim=embed_dim, num_heads=num_heads,
610
+ mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, ape=ape, patch_norm=patch_norm, use_checkpoint=False,
611
+ upscale=self.netscale, upsampler=upsampler, resi_connection=resi_connection, gc =gc_adjust1,)
612
+ elif upscale_type == "ATD":
613
+ half = False
614
+ from basicsr.archs.atd_arch import ATD
615
+ in_chans = loadnet["conv_first.weight"].shape[1]
616
+ embed_dim = loadnet["conv_first.weight"].shape[0]
617
+ window_size = math.isqrt(loadnet["relative_position_index_SA"].shape[0])
618
+ num_layers = self.find_max_numbers(loadnet, "layers") + 1
619
+ depths = [6] * num_layers
620
+ num_heads = [6] * num_layers
621
+ for i in range(num_layers):
622
+ depths[i] = self.find_max_numbers(loadnet, f"layers.{i}.residual_group.layers") + 1
623
+ num_heads[i] = loadnet[f"layers.{i}.residual_group.layers.0.attn_win.relative_position_bias_table"].shape[1]
624
+ num_tokens = loadnet["layers.0.residual_group.layers.0.attn_atd.scale"].shape[0]
625
+ reducted_dim = loadnet["layers.0.residual_group.layers.0.attn_atd.wq.weight"].shape[0]
626
+ convffn_kernel_size = loadnet["layers.0.residual_group.layers.0.convffn.dwconv.depthwise_conv.0.weight"].shape[2]
627
+ mlp_ratio = (loadnet["layers.0.residual_group.layers.0.convffn.fc1.weight"].shape[0] / embed_dim)
628
+ qkv_bias = "layers.0.residual_group.layers.0.wqkv.bias" in loadnet
629
+ ape = "absolute_pos_embed" in loadnet
630
+ patch_norm = "patch_embed.norm.weight" in loadnet
631
+ resi_connection = "1conv" if "layers.0.conv.weight" in loadnet else "3conv"
632
+
633
+ if "conv_up1.weight" in loadnet:
634
+ upsampler = "nearest+conv"
635
+ elif "conv_before_upsample.0.weight" in loadnet:
636
+ upsampler = "pixelshuffle"
637
+ elif "conv_last.weight" in loadnet:
638
+ upsampler = ""
639
+ else:
640
+ upsampler = "pixelshuffledirect"
641
+
642
+ is_light = upsampler == "pixelshuffledirect" and embed_dim == 48
643
+ category_size = 128 if is_light else 256
644
+
645
+ model = ATD(in_chans=in_chans, embed_dim=embed_dim, depths=depths, num_heads=num_heads, window_size=window_size, category_size=category_size,
646
+ num_tokens=num_tokens, reducted_dim=reducted_dim, convffn_kernel_size=convffn_kernel_size, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, ape=ape,
647
+ patch_norm=patch_norm, use_checkpoint=False, upscale=self.netscale, upsampler=upsampler, resi_connection='1conv',)
648
+ elif upscale_type == "MoSR":
649
+ from basicsr.archs.mosr_arch import mosr
650
+ n_block = self.find_max_numbers(loadnet, "gblocks") - 5
651
+ in_ch = loadnet["gblocks.0.weight"].shape[1]
652
+ out_ch = loadnet["upsampler.end_conv.weight"].shape[0] if "upsampler.init_pos" in loadnet else in_ch
653
+ dim = loadnet["gblocks.0.weight"].shape[0]
654
+ expansion_ratio = (loadnet["gblocks.1.fc1.weight"].shape[0] / loadnet["gblocks.1.fc1.weight"].shape[1]) / 2
655
+ conv_ratio = loadnet["gblocks.1.conv.weight"].shape[0] / dim
656
+ kernel_size = loadnet["gblocks.1.conv.weight"].shape[2]
657
+ upsampler = "dys" if "upsampler.init_pos" in loadnet else ("gps" if "upsampler.in_to_k.weight" in loadnet else "ps")
658
+
659
+ model = mosr(in_ch = in_ch, out_ch = out_ch, upscale = self.netscale, n_block = n_block, dim = dim,
660
+ upsampler = upsampler, kernel_size = kernel_size, expansion_ratio = expansion_ratio, conv_ratio = conv_ratio,)
661
+ elif upscale_type == "SRFormer":
662
+ half = False
663
+ from basicsr.archs.srformer_arch import SRFormer
664
+ in_chans = loadnet["conv_first.weight"].shape[1]
665
+ embed_dim = loadnet["conv_first.weight"].shape[0]
666
+ ape = "absolute_pos_embed" in loadnet
667
+ patch_norm = "patch_embed.norm.weight" in loadnet
668
+ qkv_bias = "layers.0.residual_group.blocks.0.attn.q.bias" in loadnet
669
+ mlp_ratio = float(loadnet["layers.0.residual_group.blocks.0.mlp.fc1.weight"].shape[0] / embed_dim)
670
+
671
+ num_layers = self.find_max_numbers(loadnet, "layers") + 1
672
+ depths = [6] * num_layers
673
+ num_heads = [6] * num_layers
674
+ for i in range(num_layers):
675
+ depths[i] = self.find_max_numbers(loadnet, f"layers.{i}.residual_group.blocks") + 1
676
+ num_heads[i] = loadnet[f"layers.{i}.residual_group.blocks.0.attn.relative_position_bias_table"].shape[1]
677
+
678
+ if "conv_hr.weight" in loadnet:
679
+ upsampler = "nearest+conv"
680
+ elif "conv_before_upsample.0.weight" in loadnet:
681
+ upsampler = "pixelshuffle"
682
+ elif "upsample.0.weight" in loadnet:
683
+ upsampler = "pixelshuffledirect"
684
+ resi_connection = "1conv" if "conv_after_body.weight" in loadnet else "3conv"
685
+
686
+ window_size = int(math.sqrt(loadnet["layers.0.residual_group.blocks.0.attn.relative_position_bias_table"].shape[0])) + 1
687
+
688
+ if "layers.0.residual_group.blocks.1.attn_mask" in loadnet:
689
+ attn_mask_0 = loadnet["layers.0.residual_group.blocks.1.attn_mask"].shape[0]
690
+ patches_resolution = int(math.sqrt(attn_mask_0) * window_size)
691
+ else:
692
+ patches_resolution = window_size
693
+ if ape:
694
+ pos_embed_value = loadnet.get("absolute_pos_embed", [None, None])[1]
695
+ if pos_embed_value:
696
+ patches_resolution = int(math.sqrt(pos_embed_value))
697
+
698
+ img_size = patches_resolution
699
+ if img_size % window_size != 0:
700
+ for nice_number in [512, 256, 128, 96, 64, 48, 32, 24, 16]:
701
+ if nice_number % window_size != 0:
702
+ nice_number += window_size - (nice_number % window_size)
703
+ if nice_number == patches_resolution:
704
+ img_size = nice_number
705
+ break
706
+
707
+ model = SRFormer(img_size=img_size, in_chans=in_chans, embed_dim=embed_dim, depths=depths, num_heads=num_heads, window_size=window_size, mlp_ratio=mlp_ratio,
708
+ qkv_bias=qkv_bias, qk_scale=None, ape=ape, patch_norm=patch_norm, upscale=self.netscale, upsampler=upsampler, resi_connection=resi_connection,)
709
+
710
+ if model:
711
+ self.realesrganer = RealESRGANer(scale=self.netscale, model_path=os.path.join("weights", "upscale", upscale_model), model=model, tile=0, tile_pad=10, pre_pad=0, half=half)
712
+ elif upscale_model:
713
+ import PIL
714
+ from image_gen_aux import UpscaleWithModel
715
+ class UpscaleWithModel_Gfpgan(UpscaleWithModel):
716
+ def cv2pil(self, image):
717
+ ''' OpenCV type -> PIL type
718
+ https://qiita.com/derodero24/items/f22c22b22451609908ee
719
+ '''
720
+ new_image = image.copy()
721
+ if new_image.ndim == 2: # Grayscale
722
+ pass
723
+ elif new_image.shape[2] == 3: # Color
724
+ new_image = cv2.cvtColor(new_image, cv2.COLOR_BGR2RGB)
725
+ elif new_image.shape[2] == 4: # Transparency
726
+ new_image = cv2.cvtColor(new_image, cv2.COLOR_BGRA2RGBA)
727
+ new_image = PIL.Image.fromarray(new_image)
728
+ return new_image
729
+
730
+ def pil2cv(self, image):
731
+ ''' PIL type -> OpenCV type
732
+ https://qiita.com/derodero24/items/f22c22b22451609908ee
733
+ '''
734
+ new_image = np.array(image, dtype=np.uint8)
735
+ if new_image.ndim == 2: # Grayscale
736
+ pass
737
+ elif new_image.shape[2] == 3: # Color
738
+ new_image = cv2.cvtColor(new_image, cv2.COLOR_RGB2BGR)
739
+ elif new_image.shape[2] == 4: # Transparency
740
+ new_image = cv2.cvtColor(new_image, cv2.COLOR_RGBA2BGRA)
741
+ return new_image
742
+
743
+ def enhance(self_, img, outscale=None):
744
+ # img: numpy
745
+ h_input, w_input = img.shape[0:2]
746
+ pil_img = self_.cv2pil(img)
747
+ pil_img = self_.__call__(pil_img)
748
+ cv_image = self_.pil2cv(pil_img)
749
+ if outscale is not None and outscale != float(self.netscale):
750
+ interpolation = cv2.INTER_AREA if outscale < float(self.netscale) else cv2.INTER_LANCZOS4
751
+ cv_image = cv2.resize(
752
+ cv_image, (
753
+ int(w_input * outscale),
754
+ int(h_input * outscale),
755
+ ), interpolation=interpolation)
756
+ return cv_image, None
757
+
758
+ device = "cuda" if torch.cuda.is_available() else "cpu"
759
+ upscaler = UpscaleWithModel.from_pretrained(os.path.join("weights", "upscale", upscale_model)).to(device)
760
+ upscaler.__class__ = UpscaleWithModel_Gfpgan
761
+ self.realesrganer = upscaler
762
+ timer.checkpoint("Initialize BG upscale model")
763
+
764
  self.face_enhancer = None
765
 
 
766
  if face_restoration:
767
+ download_from_url(face_models[face_restoration][0], face_restoration, os.path.join("weights", "face"))
768
+
769
+ resolution = 512
770
  modelInUse = f"_{os.path.splitext(face_restoration)[0]}" + modelInUse
771
  from gfpgan.utils import GFPGANer
772
  model_rootpath = os.path.join("weights", "face")
 
791
  resolution = 2048
792
 
793
  self.face_enhancer = GFPGANer(model_path=model_path, upscale=self.scale, arch=arch, channel_multiplier=channel_multiplier, model_rootpath=model_rootpath, det_model=face_detection, resolution=resolution)
794
+ timer.checkpoint("Initialize face enhancer model")
795
 
796
  files = []
797
  if not outputWithModelName:
 
799
 
800
  try:
801
  bg_upsample_img = None
802
+ if self.realesrganer and hasattr(self.realesrganer, "enhance"):
803
  from utils.dataops import auto_split_upscale
804
+ bg_upsample_img, _ = auto_split_upscale(img, self.realesrganer.enhance, self.scale) if is_auto_split_upscale else self.realesrganer.enhance(img, outscale=self.scale)
805
+ timer.checkpoint("Background upscale Section")
806
 
807
  if self.face_enhancer:
808
  cropped_faces, restored_aligned, bg_upsample_img = self.face_enhancer.enhance(img, has_aligned=False, only_center_face=face_detection_only_center, paste_back=True, bg_upsample_img=bg_upsample_img, eye_dist_threshold=face_detection_threshold)
 
823
  files.append(save_crop_path)
824
  files.append(save_restore_path)
825
  files.append(save_cmp_path)
826
+ timer.checkpoint("Face enhancer Section")
827
 
828
  restored_img = bg_upsample_img
829
  except RuntimeError as error:
 
844
 
845
  restored_img = cv2.cvtColor(restored_img, cv2.COLOR_BGR2RGB)
846
  files.append(save_path)
847
+ timer.report() # Print all recorded times
848
  return files, files
849
  except Exception as error:
850
  print(traceback.format_exc())
851
+ print("global exception: ", error)
852
  return None, None
853
 
854
  def find_max_numbers(self, state_dict, findkeys):
 
865
 
866
  return tuple(max_values[findkey] for findkey in findkeys) if len(findkeys) > 1 else max_values[findkeys[0]]
867
 
868
+ def find_divisor_for_quotient(self, a: int, c: int):
869
+ """
870
+ Returns a number `b` such that `a // b == c`.
871
+ If `b` is an integer, return it as an `int`, otherwise return a `float`.
872
+ """
873
+ if c == 0:
874
+ raise ValueError("c cannot be zero to avoid division by zero.")
875
+
876
+ b_float = a / c
877
+
878
+ # Check if b is an integer
879
+ if b_float.is_integer():
880
+ return int(b_float)
881
+
882
+ # Try using ceil and floor
883
+ ceil_b = math.ceil(b_float)
884
+ floor_b = math.floor(b_float)
885
+
886
+ if a // ceil_b == c:
887
+ return ceil_b if ceil_b == b_float else float(ceil_b)
888
+ if a // floor_b == c:
889
+ return floor_b if floor_b == b_float else float(floor_b)
890
+
891
+ # account for rounding errors
892
+ if c == a // b_float:
893
+ return b_float
894
+ if c == a // (b_float - 0.01):
895
+ return b_float - 0.01
896
+ if c == a // (b_float + 0.01):
897
+ return b_float + 0.01
898
+
899
+ raise ValueError(f"Could not find a number b such that a // b == c. a={a}, c={c}")
900
+
901
  def imwriteUTF8(self, save_path, image): # `cv2.imwrite` does not support writing files to UTF-8 file paths.
902
  img_name = os.path.basename(save_path)
903
  _, extension = os.path.splitext(img_name)
904
  is_success, im_buf_arr = cv2.imencode(extension, image)
905
  if (is_success): im_buf_arr.tofile(save_path)
906
 
907
+ class Timer:
908
+ def __init__(self):
909
+ self.start_time = time.perf_counter() # Record the start time
910
+ self.checkpoints = [("Start", self.start_time)] # Store checkpoints
911
+
912
+ def checkpoint(self, label="Checkpoint"):
913
+ """Record a checkpoint with a given label."""
914
+ now = time.perf_counter()
915
+ self.checkpoints.append((label, now))
916
+
917
+ def report(self):
918
+ """Print all recorded checkpoints and total execution time with aligned formatting."""
919
+ print("\n> Execution Time Report:")
920
+
921
+ # Determine the max label width for alignment
922
+ max_label_length = max(len(label) for label, _ in self.checkpoints)
923
+
924
+ prev_time = self.start_time
925
+ for label, curr_time in self.checkpoints[1:]:
926
+ elapsed = curr_time - prev_time
927
+ print(f"{label.ljust(max_label_length)}: {elapsed:.3f} seconds")
928
+ prev_time = curr_time
929
+
930
+ total_time = self.checkpoints[-1][1] - self.start_time
931
+ print(f"{'Total Execution Time'.ljust(max_label_length)}: {total_time:.3f} seconds\n")
932
 
933
  def main():
934
  if torch.cuda.is_available():
 
950
  Practically, the aforementioned algorithm is used to restore your **old photos** or improve **AI-generated faces**.<br>
951
  To use it, simply just upload the concerned image.<br>
952
  """
953
+ # Custom CSS to set the height of the gr.Dropdown menu
954
+ css = """
955
+ ul.options {
956
+ max-height: 500px !important; /* Set the maximum height of the dropdown menu */
957
+ overflow-y: auto !important; /* Enable vertical scrolling if the content exceeds the height */
958
+ }
959
+ """
960
 
961
  upscale = Upscale()
962
 
 
978
  upscale_model_header = f"| Upscale Model Name | Info, Type: {tmptype}, Model execution speed: {speed} | Download URL |\n|------------|------|--------------|"
979
  upscale_model_tables.append(upscale_model_header + "\n" + "\n".join(rows))
980
 
981
+ with gr.Blocks(title = title, css = css) as demo:
982
  gr.Markdown(value=f"<h1 style=\"text-align:center;\">{title}</h1><br>{description}")
983
  with gr.Row():
984
  with gr.Column(variant ="panel"):
985
  input_image = gr.Image(type="filepath", label="Input", format="png")
986
+ face_model = gr.Dropdown([None]+list(face_models.keys()), type="value", value='GFPGANv1.4.pth', label='Face Restoration version', info="Face Restoration and RealESR can be freely combined in different ways, or one can be set to \"None\" to use only the other model. Face Restoration is primarily used for face restoration in real-life images, while RealESR serves as a background restoration model.")
987
+ upscale_model = gr.Dropdown([None]+list(typed_upscale_models.keys()), type="value", value='SRVGG, realesr-general-x4v3.pth', label='UpScale version')
988
  upscale_scale = gr.Number(label="Rescaling factor", value=4)
989
  face_detection = gr.Dropdown(["retinaface_resnet50", "YOLOv5l", "YOLOv5n"], type="value", value="retinaface_resnet50", label="Face Detection type")
990
  face_detection_threshold = gr.Number(label="Face eye dist threshold", value=10, info="A threshold to filter out faces with too small an eye distance (e.g., side faces).")
utils/dataops.py CHANGED
@@ -39,16 +39,20 @@ def auto_split_upscale(
39
  overlap: int = 32,
40
  max_depth: int = None,
41
  current_depth: int = 1,
 
 
42
  ):
43
  # Attempt to upscale if unknown depth or if reached known max depth
44
  if max_depth is None or max_depth == current_depth:
45
  try:
46
- print(f"auto_split_upscale, current depth: {current_depth}")
47
  result, _ = upscale_function(lr_img, scale)
 
48
  return result, current_depth
49
  except RuntimeError as e:
50
  # Check to see if its actually the CUDA out of memory error
51
  if "CUDA" in str(e):
 
52
  # Collect garbage (clear VRAM)
53
  torch.cuda.empty_cache()
54
  gc.collect()
@@ -60,68 +64,47 @@ def auto_split_upscale(
60
  torch.cuda.empty_cache()
61
  gc.collect()
62
 
63
- h, w, c = lr_img.shape
64
-
65
- # Split image into 4ths
66
- top_left = lr_img[: h // 2 + overlap, : w // 2 + overlap, :]
67
- top_right = lr_img[: h // 2 + overlap, w // 2 - overlap :, :]
68
- bottom_left = lr_img[h // 2 - overlap :, : w // 2 + overlap, :]
69
- bottom_right = lr_img[h // 2 - overlap :, w // 2 - overlap :, :]
70
-
71
- # Recursively upscale the quadrants
 
 
 
72
  # After we go through the top left quadrant, we know the maximum depth and no longer need to test for out-of-memory
73
  top_left_rlt, depth = auto_split_upscale(
74
- top_left,
75
- upscale_function,
76
- scale=scale,
77
- overlap=overlap,
78
- max_depth=max_depth,
79
- current_depth=current_depth + 1,
80
  )
81
  top_right_rlt, _ = auto_split_upscale(
82
- top_right,
83
- upscale_function,
84
- scale=scale,
85
- overlap=overlap,
86
- max_depth=depth,
87
- current_depth=current_depth + 1,
88
  )
89
  bottom_left_rlt, _ = auto_split_upscale(
90
- bottom_left,
91
- upscale_function,
92
- scale=scale,
93
- overlap=overlap,
94
- max_depth=depth,
95
- current_depth=current_depth + 1,
96
  )
97
  bottom_right_rlt, _ = auto_split_upscale(
98
- bottom_right,
99
- upscale_function,
100
- scale=scale,
101
- overlap=overlap,
102
- max_depth=depth,
103
- current_depth=current_depth + 1,
104
  )
105
-
106
- # Define output shape
107
- out_h = h * scale
108
- out_w = w * scale
109
-
110
- # Create blank output image
111
- output_img = np.zeros((out_h, out_w, c), np.uint8)
112
-
113
- # Fill output image with tiles, cropping out the overlaps
114
- output_img[: out_h // 2, : out_w // 2, :] = top_left_rlt[
115
- : out_h // 2, : out_w // 2, :
116
- ]
117
- output_img[: out_h // 2, -out_w // 2 :, :] = top_right_rlt[
118
- : out_h // 2, -out_w // 2 :, :
119
- ]
120
- output_img[-out_h // 2 :, : out_w // 2, :] = bottom_left_rlt[
121
- -out_h // 2 :, : out_w // 2, :
122
- ]
123
- output_img[-out_h // 2 :, -out_w // 2 :, :] = bottom_right_rlt[
124
- -out_h // 2 :, -out_w // 2 :, :
125
- ]
126
 
127
  return output_img, depth
 
39
  overlap: int = 32,
40
  max_depth: int = None,
41
  current_depth: int = 1,
42
+ current_tile: int = 1, # Tracks the current tile being processed
43
+ total_tiles: int = 1, # Total number of tiles at this depth level
44
  ):
45
  # Attempt to upscale if unknown depth or if reached known max depth
46
  if max_depth is None or max_depth == current_depth:
47
  try:
48
+ print(f"auto_split_upscale depth: {current_depth}", end=" ", flush=True)
49
  result, _ = upscale_function(lr_img, scale)
50
+ print(f"progress: {current_tile}/{total_tiles}")
51
  return result, current_depth
52
  except RuntimeError as e:
53
  # Check to see if its actually the CUDA out of memory error
54
  if "CUDA" in str(e):
55
+ print("RuntimeError: CUDA out of memory...")
56
  # Collect garbage (clear VRAM)
57
  torch.cuda.empty_cache()
58
  gc.collect()
 
64
  torch.cuda.empty_cache()
65
  gc.collect()
66
 
67
+ input_h, input_w, input_c = lr_img.shape
68
+
69
+ # Split the image into 4 quadrants with some overlap
70
+ top_left = lr_img[: input_h // 2 + overlap, : input_w // 2 + overlap, :]
71
+ top_right = lr_img[: input_h // 2 + overlap, input_w // 2 - overlap :, :]
72
+ bottom_left = lr_img[input_h // 2 - overlap :, : input_w // 2 + overlap, :]
73
+ bottom_right = lr_img[input_h // 2 - overlap :, input_w // 2 - overlap :, :]
74
+ current_depth = current_depth + 1
75
+ current_tile = (current_tile - 1) * 4
76
+ total_tiles = total_tiles * 4
77
+
78
+ # Recursively upscale each quadrant and track the current tile number
79
  # After we go through the top left quadrant, we know the maximum depth and no longer need to test for out-of-memory
80
  top_left_rlt, depth = auto_split_upscale(
81
+ top_left, upscale_function, scale=scale, overlap=overlap, max_depth=max_depth,
82
+ current_depth=current_depth, current_tile=current_tile + 1, total_tiles=total_tiles,
 
 
 
 
83
  )
84
  top_right_rlt, _ = auto_split_upscale(
85
+ top_right, upscale_function, scale=scale, overlap=overlap, max_depth=depth,
86
+ current_depth=current_depth, current_tile=current_tile + 2, total_tiles=total_tiles,
 
 
 
 
87
  )
88
  bottom_left_rlt, _ = auto_split_upscale(
89
+ bottom_left, upscale_function, scale=scale, overlap=overlap, max_depth=depth,
90
+ current_depth=current_depth, current_tile=current_tile + 3, total_tiles=total_tiles,
 
 
 
 
91
  )
92
  bottom_right_rlt, _ = auto_split_upscale(
93
+ bottom_right, upscale_function, scale=scale, overlap=overlap, max_depth=depth,
94
+ current_depth=current_depth, current_tile=current_tile + 4, total_tiles=total_tiles,
 
 
 
 
95
  )
96
+
97
+ # Define the output image size
98
+ out_h = input_h * scale
99
+ out_w = input_w * scale
100
+
101
+ # Create an empty output image
102
+ output_img = np.zeros((out_h, out_w, input_c), np.uint8)
103
+
104
+ # Fill the output image with the upscaled quadrants, removing overlap regions
105
+ output_img[: out_h // 2, : out_w // 2, :] = top_left_rlt[: out_h // 2, : out_w // 2, :]
106
+ output_img[: out_h // 2, -out_w // 2 :, :] = top_right_rlt[: out_h // 2, -out_w // 2 :, :]
107
+ output_img[-out_h // 2 :, : out_w // 2, :] = bottom_left_rlt[-out_h // 2 :, : out_w // 2, :]
108
+ output_img[-out_h // 2 :, -out_w // 2 :, :] = bottom_right_rlt[-out_h // 2 :, -out_w // 2 :, :]
 
 
 
 
 
 
 
 
109
 
110
  return output_img, depth