Spaces:
Configuration error
Configuration error
Upload 4 files
Browse files- CaffeLoader.py +254 -0
- README.md +308 -10
- app.py +37 -0
- neural_style.py +514 -0
CaffeLoader.py
ADDED
@@ -0,0 +1,254 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
|
4 |
+
|
5 |
+
class VGG(nn.Module):
|
6 |
+
def __init__(self, features, num_classes=1000):
|
7 |
+
super(VGG, self).__init__()
|
8 |
+
self.features = features
|
9 |
+
self.classifier = nn.Sequential(
|
10 |
+
nn.Linear(512 * 7 * 7, 4096),
|
11 |
+
nn.ReLU(True),
|
12 |
+
nn.Dropout(),
|
13 |
+
nn.Linear(4096, 4096),
|
14 |
+
nn.ReLU(True),
|
15 |
+
nn.Dropout(),
|
16 |
+
nn.Linear(4096, num_classes),
|
17 |
+
)
|
18 |
+
|
19 |
+
|
20 |
+
class VGG_SOD(nn.Module):
|
21 |
+
def __init__(self, features, num_classes=100):
|
22 |
+
super(VGG_SOD, self).__init__()
|
23 |
+
self.features = features
|
24 |
+
self.classifier = nn.Sequential(
|
25 |
+
nn.Linear(512 * 7 * 7, 4096),
|
26 |
+
nn.ReLU(True),
|
27 |
+
nn.Dropout(),
|
28 |
+
nn.Linear(4096, 4096),
|
29 |
+
nn.ReLU(True),
|
30 |
+
nn.Dropout(),
|
31 |
+
nn.Linear(4096, 100),
|
32 |
+
)
|
33 |
+
|
34 |
+
|
35 |
+
class VGG_FCN32S(nn.Module):
|
36 |
+
def __init__(self, features, num_classes=1000):
|
37 |
+
super(VGG_FCN32S, self).__init__()
|
38 |
+
self.features = features
|
39 |
+
self.classifier = nn.Sequential(
|
40 |
+
nn.Conv2d(512,4096,(7, 7)),
|
41 |
+
nn.ReLU(True),
|
42 |
+
nn.Dropout(0.5),
|
43 |
+
nn.Conv2d(4096,4096,(1, 1)),
|
44 |
+
nn.ReLU(True),
|
45 |
+
nn.Dropout(0.5),
|
46 |
+
)
|
47 |
+
|
48 |
+
|
49 |
+
class VGG_PRUNED(nn.Module):
|
50 |
+
def __init__(self, features, num_classes=1000):
|
51 |
+
super(VGG_PRUNED, self).__init__()
|
52 |
+
self.features = features
|
53 |
+
self.classifier = nn.Sequential(
|
54 |
+
nn.Linear(512 * 7 * 7, 4096),
|
55 |
+
nn.ReLU(True),
|
56 |
+
nn.Dropout(0.5),
|
57 |
+
nn.Linear(4096, 4096),
|
58 |
+
nn.ReLU(True),
|
59 |
+
nn.Dropout(0.5),
|
60 |
+
)
|
61 |
+
|
62 |
+
|
63 |
+
class NIN(nn.Module):
|
64 |
+
def __init__(self, pooling):
|
65 |
+
super(NIN, self).__init__()
|
66 |
+
if pooling == 'max':
|
67 |
+
pool2d = nn.MaxPool2d((3, 3),(2, 2),(0, 0),ceil_mode=True)
|
68 |
+
elif pooling == 'avg':
|
69 |
+
pool2d = nn.AvgPool2d((3, 3),(2, 2),(0, 0),ceil_mode=True)
|
70 |
+
|
71 |
+
self.features = nn.Sequential(
|
72 |
+
nn.Conv2d(3,96,(11, 11),(4, 4)),
|
73 |
+
nn.ReLU(inplace=True),
|
74 |
+
nn.Conv2d(96,96,(1, 1)),
|
75 |
+
nn.ReLU(inplace=True),
|
76 |
+
nn.Conv2d(96,96,(1, 1)),
|
77 |
+
nn.ReLU(inplace=True),
|
78 |
+
pool2d,
|
79 |
+
nn.Conv2d(96,256,(5, 5),(1, 1),(2, 2)),
|
80 |
+
nn.ReLU(inplace=True),
|
81 |
+
nn.Conv2d(256,256,(1, 1)),
|
82 |
+
nn.ReLU(inplace=True),
|
83 |
+
nn.Conv2d(256,256,(1, 1)),
|
84 |
+
nn.ReLU(inplace=True),
|
85 |
+
pool2d,
|
86 |
+
nn.Conv2d(256,384,(3, 3),(1, 1),(1, 1)),
|
87 |
+
nn.ReLU(inplace=True),
|
88 |
+
nn.Conv2d(384,384,(1, 1)),
|
89 |
+
nn.ReLU(inplace=True),
|
90 |
+
nn.Conv2d(384,384,(1, 1)),
|
91 |
+
nn.ReLU(inplace=True),
|
92 |
+
pool2d,
|
93 |
+
nn.Dropout(0.5),
|
94 |
+
nn.Conv2d(384,1024,(3, 3),(1, 1),(1, 1)),
|
95 |
+
nn.ReLU(inplace=True),
|
96 |
+
nn.Conv2d(1024,1024,(1, 1)),
|
97 |
+
nn.ReLU(inplace=True),
|
98 |
+
nn.Conv2d(1024,1000,(1, 1)),
|
99 |
+
nn.ReLU(inplace=True),
|
100 |
+
nn.AvgPool2d((6, 6),(1, 1),(0, 0),ceil_mode=True),
|
101 |
+
nn.Softmax(),
|
102 |
+
)
|
103 |
+
|
104 |
+
|
105 |
+
|
106 |
+
class ModelParallel(nn.Module):
|
107 |
+
def __init__(self, net, device_ids, device_splits):
|
108 |
+
super(ModelParallel, self).__init__()
|
109 |
+
self.device_list = self.name_devices(device_ids.split(','))
|
110 |
+
self.chunks = self.chunks_to_devices(self.split_net(net, device_splits.split(',')))
|
111 |
+
|
112 |
+
def name_devices(self, input_list):
|
113 |
+
device_list = []
|
114 |
+
for i, device in enumerate(input_list):
|
115 |
+
if str(device).lower() != 'c':
|
116 |
+
device_list.append("cuda:" + str(device))
|
117 |
+
else:
|
118 |
+
device_list.append("cpu")
|
119 |
+
return device_list
|
120 |
+
|
121 |
+
def split_net(self, net, device_splits):
|
122 |
+
chunks, cur_chunk = [], nn.Sequential()
|
123 |
+
for i, l in enumerate(net):
|
124 |
+
cur_chunk.add_module(str(i), net[i])
|
125 |
+
if str(i) in device_splits and device_splits != '':
|
126 |
+
del device_splits[0]
|
127 |
+
chunks.append(cur_chunk)
|
128 |
+
cur_chunk = nn.Sequential()
|
129 |
+
chunks.append(cur_chunk)
|
130 |
+
return chunks
|
131 |
+
|
132 |
+
def chunks_to_devices(self, chunks):
|
133 |
+
for i, chunk in enumerate(chunks):
|
134 |
+
chunk.to(self.device_list[i])
|
135 |
+
return chunks
|
136 |
+
|
137 |
+
def c(self, input, i):
|
138 |
+
if input.type() == 'torch.FloatTensor' and 'cuda' in self.device_list[i]:
|
139 |
+
input = input.type('torch.cuda.FloatTensor')
|
140 |
+
elif input.type() == 'torch.cuda.FloatTensor' and 'cpu' in self.device_list[i]:
|
141 |
+
input = input.type('torch.FloatTensor')
|
142 |
+
return input
|
143 |
+
|
144 |
+
def forward(self, input):
|
145 |
+
for i, chunk in enumerate(self.chunks):
|
146 |
+
if i < len(self.chunks) -1:
|
147 |
+
input = self.c(chunk(self.c(input, i).to(self.device_list[i])), i+1).to(self.device_list[i+1])
|
148 |
+
else:
|
149 |
+
input = chunk(input)
|
150 |
+
return input
|
151 |
+
|
152 |
+
|
153 |
+
|
154 |
+
def buildSequential(channel_list, pooling):
|
155 |
+
layers = []
|
156 |
+
in_channels = 3
|
157 |
+
if pooling == 'max':
|
158 |
+
pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
|
159 |
+
elif pooling == 'avg':
|
160 |
+
pool2d = nn.AvgPool2d(kernel_size=2, stride=2)
|
161 |
+
else:
|
162 |
+
raise ValueError("Unrecognized pooling parameter")
|
163 |
+
for c in channel_list:
|
164 |
+
if c == 'P':
|
165 |
+
layers += [pool2d]
|
166 |
+
else:
|
167 |
+
conv2d = nn.Conv2d(in_channels, c, kernel_size=3, padding=1)
|
168 |
+
layers += [conv2d, nn.ReLU(inplace=True)]
|
169 |
+
in_channels = c
|
170 |
+
return nn.Sequential(*layers)
|
171 |
+
|
172 |
+
|
173 |
+
channel_list = {
|
174 |
+
'VGG-16p': [24, 22, 'P', 41, 51, 'P', 108, 89, 111, 'P', 184, 276, 228, 'P', 512, 512, 512, 'P'],
|
175 |
+
'VGG-16': [64, 64, 'P', 128, 128, 'P', 256, 256, 256, 'P', 512, 512, 512, 'P', 512, 512, 512, 'P'],
|
176 |
+
'VGG-19': [64, 64, 'P', 128, 128, 'P', 256, 256, 256, 256, 'P', 512, 512, 512, 512, 'P', 512, 512, 512, 512, 'P'],
|
177 |
+
}
|
178 |
+
|
179 |
+
nin_dict = {
|
180 |
+
'C': ['conv1', 'cccp1', 'cccp2', 'conv2', 'cccp3', 'cccp4', 'conv3', 'cccp5', 'cccp6', 'conv4-1024', 'cccp7-1024', 'cccp8-1024'],
|
181 |
+
'R': ['relu0', 'relu1', 'relu2', 'relu3', 'relu5', 'relu6', 'relu7', 'relu8', 'relu9', 'relu10', 'relu11', 'relu12'],
|
182 |
+
'P': ['pool1', 'pool2', 'pool3', 'pool4'],
|
183 |
+
'D': ['drop'],
|
184 |
+
}
|
185 |
+
vgg16_dict = {
|
186 |
+
'C': ['conv1_1', 'conv1_2', 'conv2_1', 'conv2_2', 'conv3_1', 'conv3_2', 'conv3_3', 'conv4_1', 'conv4_2', 'conv4_3', 'conv5_1', 'conv5_2', 'conv5_3'],
|
187 |
+
'R': ['relu1_1', 'relu1_2', 'relu2_1', 'relu2_2', 'relu3_1', 'relu3_2', 'relu3_3', 'relu4_1', 'relu4_2', 'relu4_3', 'relu5_1', 'relu5_2', 'relu5_3'],
|
188 |
+
'P': ['pool1', 'pool2', 'pool3', 'pool4', 'pool5'],
|
189 |
+
}
|
190 |
+
vgg19_dict = {
|
191 |
+
'C': ['conv1_1', 'conv1_2', 'conv2_1', 'conv2_2', 'conv3_1', 'conv3_2', 'conv3_3', 'conv3_4', 'conv4_1', 'conv4_2', 'conv4_3', 'conv4_4', 'conv5_1', 'conv5_2', 'conv5_3', 'conv5_4'],
|
192 |
+
'R': ['relu1_1', 'relu1_2', 'relu2_1', 'relu2_2', 'relu3_1', 'relu3_2', 'relu3_3', 'relu3_4', 'relu4_1', 'relu4_2', 'relu4_3', 'relu4_4', 'relu5_1', 'relu5_2', 'relu5_3', 'relu5_4'],
|
193 |
+
'P': ['pool1', 'pool2', 'pool3', 'pool4', 'pool5'],
|
194 |
+
}
|
195 |
+
|
196 |
+
|
197 |
+
def modelSelector(model_file, pooling):
|
198 |
+
vgg_list = ["fcn32s", "pruning", "sod", "vgg"]
|
199 |
+
if any(name in model_file for name in vgg_list):
|
200 |
+
if "pruning" in model_file:
|
201 |
+
print("VGG-16 Architecture Detected")
|
202 |
+
print("Using The Channel Pruning Model")
|
203 |
+
cnn, layerList = VGG_PRUNED(buildSequential(channel_list['VGG-16p'], pooling)), vgg16_dict
|
204 |
+
elif "fcn32s" in model_file:
|
205 |
+
print("VGG-16 Architecture Detected")
|
206 |
+
print("Using the fcn32s-heavy-pascal Model")
|
207 |
+
cnn, layerList = VGG_FCN32S(buildSequential(channel_list['VGG-16'], pooling)), vgg16_dict
|
208 |
+
elif "sod" in model_file:
|
209 |
+
print("VGG-16 Architecture Detected")
|
210 |
+
print("Using The SOD Fintune Model")
|
211 |
+
cnn, layerList = VGG_SOD(buildSequential(channel_list['VGG-16'], pooling)), vgg16_dict
|
212 |
+
elif "19" in model_file:
|
213 |
+
print("VGG-19 Architecture Detected")
|
214 |
+
cnn, layerList = VGG(buildSequential(channel_list['VGG-19'], pooling)), vgg19_dict
|
215 |
+
elif "16" in model_file:
|
216 |
+
print("VGG-16 Architecture Detected")
|
217 |
+
cnn, layerList = VGG(buildSequential(channel_list['VGG-16'], pooling)), vgg16_dict
|
218 |
+
else:
|
219 |
+
raise ValueError("VGG architecture not recognized.")
|
220 |
+
elif "nin" in model_file:
|
221 |
+
print("NIN Architecture Detected")
|
222 |
+
cnn, layerList = NIN(pooling), nin_dict
|
223 |
+
else:
|
224 |
+
raise ValueError("Model architecture not recognized.")
|
225 |
+
return cnn, layerList
|
226 |
+
|
227 |
+
|
228 |
+
# Print like Torch7/loadcaffe
|
229 |
+
def print_loadcaffe(cnn, layerList):
|
230 |
+
c = 0
|
231 |
+
for l in list(cnn):
|
232 |
+
if "Conv2d" in str(l):
|
233 |
+
in_c, out_c, ks = str(l.in_channels), str(l.out_channels), str(l.kernel_size)
|
234 |
+
print(layerList['C'][c] +": " + (out_c + " " + in_c + " " + ks).replace(")",'').replace("(",'').replace(",",'') )
|
235 |
+
c+=1
|
236 |
+
if c == len(layerList['C']):
|
237 |
+
break
|
238 |
+
|
239 |
+
|
240 |
+
# Load the model, and configure pooling layer type
|
241 |
+
def loadCaffemodel(model_file, pooling, use_gpu, disable_check):
|
242 |
+
cnn, layerList = modelSelector(str(model_file).lower(), pooling)
|
243 |
+
|
244 |
+
cnn.load_state_dict(torch.load(model_file), strict=(not disable_check))
|
245 |
+
print("Successfully loaded " + str(model_file))
|
246 |
+
|
247 |
+
# Maybe convert the model to cuda now, to avoid later issues
|
248 |
+
if "c" not in str(use_gpu).lower() or "c" not in str(use_gpu[0]).lower():
|
249 |
+
cnn = cnn.cuda()
|
250 |
+
cnn = cnn.features
|
251 |
+
|
252 |
+
print_loadcaffe(cnn, layerList)
|
253 |
+
|
254 |
+
return cnn, layerList
|
README.md
CHANGED
@@ -1,10 +1,308 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# neural-style-pt
|
2 |
+
|
3 |
+
[](https://zenodo.org/badge/latestdoi/142345353)
|
4 |
+
|
5 |
+
This is a PyTorch implementation of the paper [A Neural Algorithm of Artistic Style](http://arxiv.org/abs/1508.06576)
|
6 |
+
by Leon A. Gatys, Alexander S. Ecker, and Matthias Bethge. The code is based on Justin Johnson's [Neural-Style](https://github.com/jcjohnson/neural-style).
|
7 |
+
|
8 |
+
The paper presents an algorithm for combining the content of one image with the style of another image using
|
9 |
+
convolutional neural networks. Here's an example that maps the artistic style of
|
10 |
+
[The Starry Night](https://en.wikipedia.org/wiki/The_Starry_Night)
|
11 |
+
onto a night-time photograph of the Stanford campus:
|
12 |
+
|
13 |
+
<div align="center">
|
14 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/inputs/starry_night_google.jpg" height="223px">
|
15 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/inputs/hoovertowernight.jpg" height="223px">
|
16 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/starry_stanford_bigger.png" width="710px">
|
17 |
+
</div>
|
18 |
+
|
19 |
+
Applying the style of different images to the same content image gives interesting results.
|
20 |
+
Here we reproduce Figure 2 from the paper, which renders a photograph of the Tubingen in Germany in a
|
21 |
+
variety of styles:
|
22 |
+
|
23 |
+
<div align="center">
|
24 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/inputs/tubingen.jpg" height="250px">
|
25 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/tubingen_shipwreck.png" height="250px">
|
26 |
+
|
27 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/tubingen_starry.png" height="250px">
|
28 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/tubingen_scream.png" height="250px">
|
29 |
+
|
30 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/tubingen_seated_nude.png" height="250px">
|
31 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/tubingen_composition_vii.png" height="250px">
|
32 |
+
</div>
|
33 |
+
|
34 |
+
Here are the results of applying the style of various pieces of artwork to this photograph of the
|
35 |
+
golden gate bridge:
|
36 |
+
|
37 |
+
|
38 |
+
<div align="center"
|
39 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/inputs/golden_gate.jpg" height="200px">
|
40 |
+
|
41 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/inputs/frida_kahlo.jpg" height="160px">
|
42 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/golden_gate_kahlo.png" height="160px">
|
43 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/inputs/escher_sphere.jpg" height="160px">
|
44 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/golden_gate_escher.png" height="160px">
|
45 |
+
</div>
|
46 |
+
|
47 |
+
<div align="center">
|
48 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/inputs/woman-with-hat-matisse.jpg" height="160px">
|
49 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/golden_gate_matisse.png" height="160px">
|
50 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/inputs/the_scream.jpg" height="160px">
|
51 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/golden_gate_scream.png" height="160px">
|
52 |
+
</div>
|
53 |
+
|
54 |
+
<div align="center">
|
55 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/inputs/starry_night_crop.png" height="160px">
|
56 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/golden_gate_starry.png" height="160px">
|
57 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/inputs/seated-nude.jpg" height="160px">
|
58 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/golden_gate_seated.png" height="160px">
|
59 |
+
</div>
|
60 |
+
|
61 |
+
### Content / Style Tradeoff
|
62 |
+
|
63 |
+
The algorithm allows the user to trade-off the relative weight of the style and content reconstruction terms,
|
64 |
+
as shown in this example where we port the style of [Picasso's 1907 self-portrait](http://www.wikiart.org/en/pablo-picasso/self-portrait-1907) onto Brad Pitt:
|
65 |
+
|
66 |
+
<div align="center">
|
67 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/inputs/picasso_selfport1907.jpg" height="220px">
|
68 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/inputs/brad_pitt.jpg" height="220px">
|
69 |
+
</div>
|
70 |
+
|
71 |
+
<div align="center">
|
72 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/pitt_picasso_content_5_style_100.png" height="220px">
|
73 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/pitt_picasso_content_1_style_100.png" height="220px">
|
74 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/pitt_picasso_content_01_style_100.png" height="220px">
|
75 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/pitt_picasso_content_0025_style_100.png" height="220px">
|
76 |
+
</div>
|
77 |
+
|
78 |
+
### Style Scale
|
79 |
+
|
80 |
+
By resizing the style image before extracting style features, we can control the types of artistic
|
81 |
+
features that are transfered from the style image; you can control this behavior with the `-style_scale` flag.
|
82 |
+
Below we see three examples of rendering the Golden Gate Bridge in the style of The Starry Night.
|
83 |
+
From left to right, `-style_scale` is 2.0, 1.0, and 0.5.
|
84 |
+
|
85 |
+
<div align="center">
|
86 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/golden_gate_starry_scale2.png" height=175px>
|
87 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/golden_gate_starry_scale1.png" height=175px>
|
88 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/golden_gate_starry_scale05.png" height=175px>
|
89 |
+
</div>
|
90 |
+
|
91 |
+
### Multiple Style Images
|
92 |
+
You can use more than one style image to blend multiple artistic styles.
|
93 |
+
|
94 |
+
Clockwise from upper left: "The Starry Night" + "The Scream", "The Scream" + "Composition VII",
|
95 |
+
"Seated Nude" + "Composition VII", and "Seated Nude" + "The Starry Night"
|
96 |
+
|
97 |
+
<div align="center">
|
98 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/tubingen_starry_scream.png" height="250px">
|
99 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/tubingen_scream_composition_vii.png" height="250px">
|
100 |
+
|
101 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/tubingen_starry_seated.png" height="250px">
|
102 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/tubingen_seated_nude_composition_vii.png" height="250px">
|
103 |
+
</div>
|
104 |
+
|
105 |
+
|
106 |
+
### Style Interpolation
|
107 |
+
When using multiple style images, you can control the degree to which they are blended:
|
108 |
+
|
109 |
+
<div align="center">
|
110 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/golden_gate_starry_scream_3_7.png" height="175px">
|
111 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/golden_gate_starry_scream_5_5.png" height="175px">
|
112 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/golden_gate_starry_scream_7_3.png" height="175px">
|
113 |
+
</div>
|
114 |
+
|
115 |
+
### Transfer style but not color
|
116 |
+
If you add the flag `-original_colors 1` then the output image will retain the colors of the original image.
|
117 |
+
|
118 |
+
<div align="center">
|
119 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/tubingen_starry.png" height="185px">
|
120 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/tubingen_scream.png" height="185px">
|
121 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/tubingen_composition_vii.png" height="185px">
|
122 |
+
|
123 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/original_color/tubingen_starry.png" height="185px">
|
124 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/original_color/tubingen_scream.png" height="185px">
|
125 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/original_color/tubingen_composition_vii.png" height="185px">
|
126 |
+
</div>
|
127 |
+
|
128 |
+
## Setup:
|
129 |
+
|
130 |
+
Dependencies:
|
131 |
+
* [PyTorch](http://pytorch.org/)
|
132 |
+
|
133 |
+
|
134 |
+
Optional dependencies:
|
135 |
+
* For CUDA backend:
|
136 |
+
* CUDA 7.5 or above
|
137 |
+
* For cuDNN backend:
|
138 |
+
* cuDNN v6 or above
|
139 |
+
* For ROCm backend:
|
140 |
+
* ROCm 2.1 or above
|
141 |
+
* For MKL backend:
|
142 |
+
* MKL 2019 or above
|
143 |
+
* For OpenMP backend:
|
144 |
+
* OpenMP 5.0 or above
|
145 |
+
|
146 |
+
After installing the dependencies, you'll need to run the following script to download the VGG model:
|
147 |
+
```
|
148 |
+
python models/download_models.py
|
149 |
+
```
|
150 |
+
This will download the original [VGG-19 model](https://gist.github.com/ksimonyan/3785162f95cd2d5fee77#file-readme-md).
|
151 |
+
The original [VGG-16 model](https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-md) will also be downloaded. By default the original VGG-19 model is used.
|
152 |
+
|
153 |
+
If you have a smaller memory GPU then using NIN Imagenet model will be better and gives slightly worse yet comparable results. You can get the details on the model from [BVLC Caffe ModelZoo](https://github.com/BVLC/caffe/wiki/Model-Zoo). The NIN model is downloaded when you run the `download_models.py` script.
|
154 |
+
|
155 |
+
You can find detailed installation instructions for Ubuntu and Windows in the [installation guide](INSTALL.md).
|
156 |
+
|
157 |
+
## Usage
|
158 |
+
Basic usage:
|
159 |
+
```
|
160 |
+
python neural_style.py -style_image <image.jpg> -content_image <image.jpg>
|
161 |
+
```
|
162 |
+
|
163 |
+
cuDNN usage with NIN Model:
|
164 |
+
```
|
165 |
+
python neural_style.py -style_image examples/inputs/picasso_selfport1907.jpg -content_image examples/inputs/brad_pitt.jpg -output_image profile.png -model_file models/nin_imagenet.pth -gpu 0 -backend cudnn -num_iterations 1000 -seed 123 -content_layers relu0,relu3,relu7,relu12 -style_layers relu0,relu3,relu7,relu12 -content_weight 10 -style_weight 500 -image_size 512 -optimizer adam
|
166 |
+
```
|
167 |
+
|
168 |
+

|
169 |
+
|
170 |
+
|
171 |
+
To use multiple style images, pass a comma-separated list like this:
|
172 |
+
|
173 |
+
`-style_image starry_night.jpg,the_scream.jpg`.
|
174 |
+
|
175 |
+
Note that paths to images should not contain the `~` character to represent your home directory; you should instead use a relative
|
176 |
+
path or a full absolute path.
|
177 |
+
|
178 |
+
**Options**:
|
179 |
+
* `-image_size`: Maximum side length (in pixels) of the generated image. Default is 512.
|
180 |
+
* `-style_blend_weights`: The weight for blending the style of multiple style images, as a
|
181 |
+
comma-separated list, such as `-style_blend_weights 3,7`. By default all style images
|
182 |
+
are equally weighted.
|
183 |
+
* `-gpu`: Zero-indexed ID of the GPU to use; for CPU mode set `-gpu` to `c`.
|
184 |
+
|
185 |
+
**Optimization options**:
|
186 |
+
* `-content_weight`: How much to weight the content reconstruction term. Default is 5e0.
|
187 |
+
* `-style_weight`: How much to weight the style reconstruction term. Default is 1e2.
|
188 |
+
* `-tv_weight`: Weight of total-variation (TV) regularization; this helps to smooth the image.
|
189 |
+
Default is 1e-3. Set to 0 to disable TV regularization.
|
190 |
+
* `-num_iterations`: Default is 1000.
|
191 |
+
* `-init`: Method for generating the generated image; one of `random` or `image`.
|
192 |
+
Default is `random` which uses a noise initialization as in the paper; `image`
|
193 |
+
initializes with the content image.
|
194 |
+
* `-init_image`: Replaces the initialization image with a user specified image.
|
195 |
+
* `-optimizer`: The optimization algorithm to use; either `lbfgs` or `adam`; default is `lbfgs`.
|
196 |
+
L-BFGS tends to give better results, but uses more memory. Switching to ADAM will reduce memory usage;
|
197 |
+
when using ADAM you will probably need to play with other parameters to get good results, especially
|
198 |
+
the style weight, content weight, and learning rate.
|
199 |
+
* `-learning_rate`: Learning rate to use with the ADAM optimizer. Default is 1e1.
|
200 |
+
* `-normalize_gradients`: If this flag is present, style and content gradients from each layer will be L1 normalized.
|
201 |
+
|
202 |
+
**Output options**:
|
203 |
+
* `-output_image`: Name of the output image. Default is `out.png`.
|
204 |
+
* `-print_iter`: Print progress every `print_iter` iterations. Set to 0 to disable printing.
|
205 |
+
* `-save_iter`: Save the image every `save_iter` iterations. Set to 0 to disable saving intermediate results.
|
206 |
+
|
207 |
+
**Layer options**:
|
208 |
+
* `-content_layers`: Comma-separated list of layer names to use for content reconstruction.
|
209 |
+
Default is `relu4_2`.
|
210 |
+
* `-style_layers`: Comma-separated list of layer names to use for style reconstruction.
|
211 |
+
Default is `relu1_1,relu2_1,relu3_1,relu4_1,relu5_1`.
|
212 |
+
|
213 |
+
**Other options**:
|
214 |
+
* `-style_scale`: Scale at which to extract features from the style image. Default is 1.0.
|
215 |
+
* `-original_colors`: If you set this to 1, then the output image will keep the colors of the content image.
|
216 |
+
* `-model_file`: Path to the `.pth` file for the VGG Caffe model. Default is the original VGG-19 model; you can also try the original VGG-16 model.
|
217 |
+
* `-pooling`: The type of pooling layers to use; one of `max` or `avg`. Default is `max`.
|
218 |
+
The VGG-19 models uses max pooling layers, but the paper mentions that replacing these layers with average
|
219 |
+
pooling layers can improve the results. I haven't been able to get good results using average pooling, but
|
220 |
+
the option is here.
|
221 |
+
* `-seed`: An integer value that you can specify for repeatable results. By default this value is random for each run.
|
222 |
+
* `-multidevice_strategy`: A comma-separated list of layer indices at which to split the network when using multiple devices. See [Multi-GPU scaling](https://github.com/ProGamerGov/neural-style-pt#multi-gpu-scaling) for more details.
|
223 |
+
* `-backend`: `nn`, `cudnn`, `openmp`, or `mkl`. Default is `nn`. `mkl` requires Intel's MKL backend.
|
224 |
+
* `-cudnn_autotune`: When using the cuDNN backend, pass this flag to use the built-in cuDNN autotuner to select
|
225 |
+
the best convolution algorithms for your architecture. This will make the first iteration a bit slower and can
|
226 |
+
take a bit more memory, but may significantly speed up the cuDNN backend.
|
227 |
+
|
228 |
+
## Frequently Asked Questions
|
229 |
+
|
230 |
+
**Problem:** The program runs out of memory and dies
|
231 |
+
|
232 |
+
**Solution:** Try reducing the image size: `-image_size 256` (or lower). Note that different image sizes will likely
|
233 |
+
require non-default values for `-style_weight` and `-content_weight` for optimal results.
|
234 |
+
If you are running on a GPU, you can also try running with `-backend cudnn` to reduce memory usage.
|
235 |
+
|
236 |
+
**Problem:** `-backend cudnn` is slower than default NN backend
|
237 |
+
|
238 |
+
**Solution:** Add the flag `-cudnn_autotune`; this will use the built-in cuDNN autotuner to select the best convolution algorithms.
|
239 |
+
|
240 |
+
**Problem:** Get the following error message:
|
241 |
+
|
242 |
+
`Missing key(s) in state_dict: "classifier.0.bias", "classifier.0.weight", "classifier.3.bias", "classifier.3.weight".
|
243 |
+
Unexpected key(s) in state_dict: "classifier.1.weight", "classifier.1.bias", "classifier.4.weight", "classifier.4.bias".`
|
244 |
+
|
245 |
+
**Solution:** Due to a mix up with layer locations, older models require a fix to be compatible with newer versions of PyTorch. The included [`donwload_models.py`](https://github.com/ProGamerGov/neural-style-pt/blob/master/models/download_models.py) script will automatically perform these fixes after downloading the models.
|
246 |
+
|
247 |
+
|
248 |
+
|
249 |
+
## Memory Usage
|
250 |
+
By default, `neural-style-pt` uses the `nn` backend for convolutions and L-BFGS for optimization. These give good results, but can both use a lot of memory. You can reduce memory usage with the following:
|
251 |
+
|
252 |
+
* **Use cuDNN**: Add the flag `-backend cudnn` to use the cuDNN backend. This will only work in GPU mode.
|
253 |
+
* **Use ADAM**: Add the flag `-optimizer adam` to use ADAM instead of L-BFGS. This should significantly
|
254 |
+
reduce memory usage, but may require tuning of other parameters for good results; in particular you should
|
255 |
+
play with the learning rate, content weight, and style weight.
|
256 |
+
This should work in both CPU and GPU modes.
|
257 |
+
* **Reduce image size**: If the above tricks are not enough, you can reduce the size of the generated image;
|
258 |
+
pass the flag `-image_size 256` to generate an image at half the default size.
|
259 |
+
|
260 |
+
With the default settings, neural-style-pt uses about 3.7 GB of GPU memory on my system; switching to ADAM and cuDNN reduces the GPU memory footprint to about 1GB.
|
261 |
+
|
262 |
+
## Speed
|
263 |
+
Speed can vary a lot depending on the backend and the optimizer.
|
264 |
+
Here are some times for running 500 iterations with `-image_size=512` on a Tesla K80 with different settings:
|
265 |
+
* `-backend nn -optimizer lbfgs`: 117 seconds
|
266 |
+
* `-backend nn -optimizer adam`: 100 seconds
|
267 |
+
* `-backend cudnn -optimizer lbfgs`: 124 seconds
|
268 |
+
* `-backend cudnn -optimizer adam`: 107 seconds
|
269 |
+
* `-backend cudnn -cudnn_autotune -optimizer lbfgs`: 109 seconds
|
270 |
+
* `-backend cudnn -cudnn_autotune -optimizer adam`: 91 seconds
|
271 |
+
|
272 |
+
Here are the same benchmarks on a GTX 1080:
|
273 |
+
* `-backend nn -optimizer lbfgs`: 56 seconds
|
274 |
+
* `-backend nn -optimizer adam`: 38 seconds
|
275 |
+
* `-backend cudnn -optimizer lbfgs`: 40 seconds
|
276 |
+
* `-backend cudnn -optimizer adam`: 40 seconds
|
277 |
+
* `-backend cudnn -cudnn_autotune -optimizer lbfgs`: 23 seconds
|
278 |
+
* `-backend cudnn -cudnn_autotune -optimizer adam`: 24 seconds
|
279 |
+
|
280 |
+
## Multi-GPU scaling
|
281 |
+
You can use multiple CPU and GPU devices to process images at higher resolutions; different layers of the network will be
|
282 |
+
computed on different devices. You can control which GPU and CPU devices are used with the `-gpu` flag, and you can control
|
283 |
+
how to split layers across devices using the `-multidevice_strategy` flag.
|
284 |
+
|
285 |
+
For example in a server with four GPUs, you can give the flag `-gpu 0,1,2,3` to process on GPUs 0, 1, 2, and 3 in that order; by also giving the flag `-multidevice_strategy 3,6,12` you indicate that the first two layers should be computed on GPU 0, layers 3 to 5 should be computed on GPU 1, layers 6 to 11 should be computed on GPU 2, and the remaining layers should be computed on GPU 3. You will need to tune the `-multidevice_strategy` for your setup in order to achieve maximal resolution.
|
286 |
+
|
287 |
+
We can achieve very high quality results at high resolution by combining multi-GPU processing with multiscale
|
288 |
+
generation as described in the paper
|
289 |
+
<a href="https://arxiv.org/abs/1611.07865">**Controlling Perceptual Factors in Neural Style Transfer**</a> by Leon A. Gatys,
|
290 |
+
Alexander S. Ecker, Matthias Bethge, Aaron Hertzmann and Eli Shechtman.
|
291 |
+
|
292 |
+
|
293 |
+
Here is a 4016 x 2213 image generated on a server with eight Tesla K80 GPUs:
|
294 |
+
|
295 |
+
<img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/starry_stanford_bigger.png" height="400px">
|
296 |
+
|
297 |
+
The script used to generate this image <a href='examples/scripts/starry_stanford_bigger.sh'>can be found here</a>.
|
298 |
+
|
299 |
+
## Implementation details
|
300 |
+
Images are initialized with white noise and optimized using L-BFGS.
|
301 |
+
|
302 |
+
We perform style reconstructions using the `conv1_1`, `conv2_1`, `conv3_1`, `conv4_1`, and `conv5_1` layers
|
303 |
+
and content reconstructions using the `conv4_2` layer. As in the paper, the five style reconstruction losses have
|
304 |
+
equal weights.
|
305 |
+
|
306 |
+
## Citation
|
307 |
+
|
308 |
+
If you find this code useful for your research, please cite it using the provided citation.
|
app.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, render_template, request, redirect, url_for
|
2 |
+
import os
|
3 |
+
from werkzeug.utils import secure_filename
|
4 |
+
from neural_style import main
|
5 |
+
|
6 |
+
app = Flask(__name__)
|
7 |
+
|
8 |
+
|
9 |
+
# 首页,用于上传图片和显示结果
|
10 |
+
@app.route('/', methods=['GET', 'POST'])
|
11 |
+
def upload_and_process():
|
12 |
+
if request.method == 'POST':
|
13 |
+
# 获取上传的图片列表
|
14 |
+
images = [request.files['image1'], request.files['image2']]
|
15 |
+
filenames = []
|
16 |
+
|
17 |
+
for image in images:
|
18 |
+
if image:
|
19 |
+
# 使用secure_filename获取安全的文件名
|
20 |
+
filename = secure_filename(image.filename)
|
21 |
+
print(filename)
|
22 |
+
# 保存上传的图片到本地
|
23 |
+
image.save(os.path.join('static', filename))
|
24 |
+
filenames.append(filename)
|
25 |
+
|
26 |
+
# 调用AI模型对图片进行处理(在这里,您需要编写AI模型的代码)
|
27 |
+
main(filenames[0], filenames[1])
|
28 |
+
|
29 |
+
# 返回结果页面并展示处理后的图片
|
30 |
+
return render_template('index.html', image_path="out.png", \
|
31 |
+
filename1=filenames[0], filename2=filenames[1])
|
32 |
+
|
33 |
+
return render_template('index.html')
|
34 |
+
|
35 |
+
|
36 |
+
if __name__ == '__main__':
|
37 |
+
app.run(debug=True)
|
neural_style.py
ADDED
@@ -0,0 +1,514 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import copy
|
3 |
+
import torch
|
4 |
+
import torch.nn as nn
|
5 |
+
import torch.optim as optim
|
6 |
+
import torchvision.transforms as transforms
|
7 |
+
|
8 |
+
from PIL import Image
|
9 |
+
from CaffeLoader import loadCaffemodel, ModelParallel
|
10 |
+
|
11 |
+
import argparse
|
12 |
+
|
13 |
+
parser = argparse.ArgumentParser()
|
14 |
+
# Basic options
|
15 |
+
parser.add_argument("-style_image", help="Style target image", default='examples/inputs/seated-nude.jpg')
|
16 |
+
parser.add_argument("-style_blend_weights", default=None)
|
17 |
+
parser.add_argument("-content_image", help="Content target image", default='examples/inputs/tubingen.jpg')
|
18 |
+
parser.add_argument("-image_size", help="Maximum height / width of generated image", type=int, default=512)
|
19 |
+
parser.add_argument("-gpu", help="Zero-indexed ID of the GPU to use; for CPU mode set -gpu = c", default='c')
|
20 |
+
|
21 |
+
# Optimization options
|
22 |
+
parser.add_argument("-content_weight", type=float, default=5e0)
|
23 |
+
parser.add_argument("-style_weight", type=float, default=1e2)
|
24 |
+
parser.add_argument("-normalize_weights", action='store_true')
|
25 |
+
parser.add_argument("-normalize_gradients", action='store_true')
|
26 |
+
parser.add_argument("-tv_weight", type=float, default=1e-3)
|
27 |
+
parser.add_argument("-num_iterations", type=int, default=200)
|
28 |
+
parser.add_argument("-init", choices=['random', 'image'], default='random')
|
29 |
+
parser.add_argument("-init_image", default=None)
|
30 |
+
parser.add_argument("-optimizer", choices=['lbfgs', 'adam'], default='lbfgs')
|
31 |
+
parser.add_argument("-learning_rate", type=float, default=1e0)
|
32 |
+
parser.add_argument("-lbfgs_num_correction", type=int, default=100)
|
33 |
+
|
34 |
+
# Output options
|
35 |
+
parser.add_argument("-print_iter", type=int, default=50)
|
36 |
+
parser.add_argument("-save_iter", type=int, default=100)
|
37 |
+
parser.add_argument("-output_image", default='out.png')
|
38 |
+
|
39 |
+
# Other options
|
40 |
+
parser.add_argument("-style_scale", type=float, default=1.0)
|
41 |
+
parser.add_argument("-original_colors", type=int, choices=[0, 1], default=0)
|
42 |
+
parser.add_argument("-pooling", choices=['avg', 'max'], default='max')
|
43 |
+
parser.add_argument("-model_file", type=str, default='models/vgg19-d01eb7cb.pth')
|
44 |
+
parser.add_argument("-disable_check", action='store_true')
|
45 |
+
parser.add_argument("-backend", choices=['nn', 'cudnn', 'mkl', 'mkldnn', 'openmp', 'mkl,cudnn', 'cudnn,mkl'],
|
46 |
+
default='nn')
|
47 |
+
parser.add_argument("-cudnn_autotune", action='store_true')
|
48 |
+
parser.add_argument("-seed", type=int, default=-1)
|
49 |
+
|
50 |
+
parser.add_argument("-content_layers", help="layers for content", default='relu4_2')
|
51 |
+
parser.add_argument("-style_layers", help="layers for style", default='relu1_1,relu2_1,relu3_1,relu4_1,relu5_1')
|
52 |
+
|
53 |
+
parser.add_argument("-multidevice_strategy", default='4,7,29')
|
54 |
+
params = parser.parse_args()
|
55 |
+
|
56 |
+
Image.MAX_IMAGE_PIXELS = 1000000000 # Support gigapixel images
|
57 |
+
|
58 |
+
|
59 |
+
def main(filename1, filename2):
|
60 |
+
params.content_image = "C:/Users/86136/Desktop/web/ai2/neural-style-pt-master/static/" + filename1
|
61 |
+
params.style_image = "C:/Users/86136/Desktop/web/ai2/neural-style-pt-master/static/" + filename2
|
62 |
+
dtype, multidevice, backward_device = setup_gpu()
|
63 |
+
|
64 |
+
cnn, layerList = loadCaffemodel(params.model_file, params.pooling, params.gpu, params.disable_check)
|
65 |
+
|
66 |
+
content_image = preprocess(params.content_image, params.image_size).type(dtype)
|
67 |
+
style_image_input = params.style_image.split(',')
|
68 |
+
style_image_list, ext = [], [".jpg", ".jpeg", ".png", ".tiff"]
|
69 |
+
for image in style_image_input:
|
70 |
+
if os.path.isdir(image):
|
71 |
+
images = (image + "/" + file for file in os.listdir(image)
|
72 |
+
if os.path.splitext(file)[1].lower() in ext)
|
73 |
+
style_image_list.extend(images)
|
74 |
+
else:
|
75 |
+
style_image_list.append(image)
|
76 |
+
style_images_caffe = []
|
77 |
+
for image in style_image_list:
|
78 |
+
style_size = int(params.image_size * params.style_scale)
|
79 |
+
img_caffe = preprocess(image, style_size).type(dtype)
|
80 |
+
style_images_caffe.append(img_caffe)
|
81 |
+
|
82 |
+
if params.init_image != None:
|
83 |
+
image_size = (content_image.size(2), content_image.size(3))
|
84 |
+
init_image = preprocess(params.init_image, image_size).type(dtype)
|
85 |
+
|
86 |
+
# Handle style blending weights for multiple style inputs
|
87 |
+
style_blend_weights = []
|
88 |
+
if params.style_blend_weights == None:
|
89 |
+
# Style blending not specified, so use equal weighting
|
90 |
+
for i in style_image_list:
|
91 |
+
style_blend_weights.append(1.0)
|
92 |
+
for i, blend_weights in enumerate(style_blend_weights):
|
93 |
+
style_blend_weights[i] = int(style_blend_weights[i])
|
94 |
+
else:
|
95 |
+
style_blend_weights = params.style_blend_weights.split(',')
|
96 |
+
assert len(style_blend_weights) == len(style_image_list), \
|
97 |
+
"-style_blend_weights and -style_images must have the same number of elements!"
|
98 |
+
|
99 |
+
# Normalize the style blending weights so they sum to 1
|
100 |
+
style_blend_sum = 0
|
101 |
+
for i, blend_weights in enumerate(style_blend_weights):
|
102 |
+
style_blend_weights[i] = float(style_blend_weights[i])
|
103 |
+
style_blend_sum = float(style_blend_sum) + style_blend_weights[i]
|
104 |
+
for i, blend_weights in enumerate(style_blend_weights):
|
105 |
+
style_blend_weights[i] = float(style_blend_weights[i]) / float(style_blend_sum)
|
106 |
+
|
107 |
+
content_layers = params.content_layers.split(',')
|
108 |
+
style_layers = params.style_layers.split(',')
|
109 |
+
|
110 |
+
# Set up the network, inserting style and content loss modules
|
111 |
+
cnn = copy.deepcopy(cnn)
|
112 |
+
content_losses, style_losses, tv_losses = [], [], []
|
113 |
+
next_content_idx, next_style_idx = 1, 1
|
114 |
+
net = nn.Sequential()
|
115 |
+
c, r = 0, 0
|
116 |
+
if params.tv_weight > 0:
|
117 |
+
tv_mod = TVLoss(params.tv_weight).type(dtype)
|
118 |
+
net.add_module(str(len(net)), tv_mod)
|
119 |
+
tv_losses.append(tv_mod)
|
120 |
+
|
121 |
+
for i, layer in enumerate(list(cnn), 1):
|
122 |
+
if next_content_idx <= len(content_layers) or next_style_idx <= len(style_layers):
|
123 |
+
if isinstance(layer, nn.Conv2d):
|
124 |
+
net.add_module(str(len(net)), layer)
|
125 |
+
|
126 |
+
if layerList['C'][c] in content_layers:
|
127 |
+
print("Setting up content layer " + str(i) + ": " + str(layerList['C'][c]))
|
128 |
+
loss_module = ContentLoss(params.content_weight, params.normalize_gradients)
|
129 |
+
net.add_module(str(len(net)), loss_module)
|
130 |
+
content_losses.append(loss_module)
|
131 |
+
|
132 |
+
if layerList['C'][c] in style_layers:
|
133 |
+
print("Setting up style layer " + str(i) + ": " + str(layerList['C'][c]))
|
134 |
+
loss_module = StyleLoss(params.style_weight, params.normalize_gradients)
|
135 |
+
net.add_module(str(len(net)), loss_module)
|
136 |
+
style_losses.append(loss_module)
|
137 |
+
c += 1
|
138 |
+
|
139 |
+
if isinstance(layer, nn.ReLU):
|
140 |
+
net.add_module(str(len(net)), layer)
|
141 |
+
|
142 |
+
if layerList['R'][r] in content_layers:
|
143 |
+
print("Setting up content layer " + str(i) + ": " + str(layerList['R'][r]))
|
144 |
+
loss_module = ContentLoss(params.content_weight, params.normalize_gradients)
|
145 |
+
net.add_module(str(len(net)), loss_module)
|
146 |
+
content_losses.append(loss_module)
|
147 |
+
next_content_idx += 1
|
148 |
+
|
149 |
+
if layerList['R'][r] in style_layers:
|
150 |
+
print("Setting up style layer " + str(i) + ": " + str(layerList['R'][r]))
|
151 |
+
loss_module = StyleLoss(params.style_weight, params.normalize_gradients)
|
152 |
+
net.add_module(str(len(net)), loss_module)
|
153 |
+
style_losses.append(loss_module)
|
154 |
+
next_style_idx += 1
|
155 |
+
r += 1
|
156 |
+
|
157 |
+
if isinstance(layer, nn.MaxPool2d) or isinstance(layer, nn.AvgPool2d):
|
158 |
+
net.add_module(str(len(net)), layer)
|
159 |
+
|
160 |
+
if multidevice:
|
161 |
+
net = setup_multi_device(net)
|
162 |
+
|
163 |
+
# Capture content targets
|
164 |
+
for i in content_losses:
|
165 |
+
i.mode = 'capture'
|
166 |
+
print("Capturing content targets")
|
167 |
+
print_torch(net, multidevice)
|
168 |
+
net(content_image)
|
169 |
+
|
170 |
+
# Capture style targets
|
171 |
+
for i in content_losses:
|
172 |
+
i.mode = 'None'
|
173 |
+
|
174 |
+
for i, image in enumerate(style_images_caffe):
|
175 |
+
print("Capturing style target " + str(i + 1))
|
176 |
+
for j in style_losses:
|
177 |
+
j.mode = 'capture'
|
178 |
+
j.blend_weight = style_blend_weights[i]
|
179 |
+
net(style_images_caffe[i])
|
180 |
+
|
181 |
+
# Set all loss modules to loss mode
|
182 |
+
for i in content_losses:
|
183 |
+
i.mode = 'loss'
|
184 |
+
for i in style_losses:
|
185 |
+
i.mode = 'loss'
|
186 |
+
|
187 |
+
# Maybe normalize content and style weights
|
188 |
+
if params.normalize_weights:
|
189 |
+
normalize_weights(content_losses, style_losses)
|
190 |
+
|
191 |
+
# Freeze the network in order to prevent
|
192 |
+
# unnecessary gradient calculations
|
193 |
+
for param in net.parameters():
|
194 |
+
param.requires_grad = False
|
195 |
+
|
196 |
+
# Initialize the image
|
197 |
+
if params.seed >= 0:
|
198 |
+
torch.manual_seed(params.seed)
|
199 |
+
torch.cuda.manual_seed_all(params.seed)
|
200 |
+
torch.backends.cudnn.deterministic = True
|
201 |
+
if params.init == 'random':
|
202 |
+
B, C, H, W = content_image.size()
|
203 |
+
img = torch.randn(C, H, W).mul(0.001).unsqueeze(0).type(dtype)
|
204 |
+
elif params.init == 'image':
|
205 |
+
if params.init_image != None:
|
206 |
+
img = init_image.clone()
|
207 |
+
else:
|
208 |
+
img = content_image.clone()
|
209 |
+
img = nn.Parameter(img)
|
210 |
+
|
211 |
+
def maybe_print(t, loss):
|
212 |
+
if params.print_iter > 0 and t % params.print_iter == 0:
|
213 |
+
print("Iteration " + str(t) + " / " + str(params.num_iterations))
|
214 |
+
for i, loss_module in enumerate(content_losses):
|
215 |
+
print(" Content " + str(i + 1) + " loss: " + str(loss_module.loss.item()))
|
216 |
+
for i, loss_module in enumerate(style_losses):
|
217 |
+
print(" Style " + str(i + 1) + " loss: " + str(loss_module.loss.item()))
|
218 |
+
print(" Total loss: " + str(loss.item()))
|
219 |
+
|
220 |
+
'''
|
221 |
+
def maybe_save(t):
|
222 |
+
should_save = params.save_iter > 0 and t % params.save_iter == 0
|
223 |
+
should_save = should_save or t == params.num_iterations
|
224 |
+
if should_save:
|
225 |
+
output_filename, file_extension = os.path.splitext(params.output_image)
|
226 |
+
if t == params.num_iterations:
|
227 |
+
filename = output_filename + str(file_extension)
|
228 |
+
else:
|
229 |
+
filename = str(output_filename) + "_" + str(t) + str(file_extension)
|
230 |
+
disp = deprocess(img.clone())
|
231 |
+
|
232 |
+
# Maybe perform postprocessing for color-independent style transfer
|
233 |
+
if params.original_colors == 1:
|
234 |
+
disp = original_colors(deprocess(content_image.clone()), disp)
|
235 |
+
|
236 |
+
disp.save(str(filename))
|
237 |
+
'''
|
238 |
+
|
239 |
+
def maybe_save(t):
|
240 |
+
should_save = params.save_iter > 0 and t % params.save_iter == 0
|
241 |
+
should_save = should_save or t == params.num_iterations
|
242 |
+
if should_save:
|
243 |
+
output_filename, file_extension = os.path.splitext(params.output_image)
|
244 |
+
if t == params.num_iterations:
|
245 |
+
filename = os.path.join('static', output_filename + file_extension)
|
246 |
+
else:
|
247 |
+
filename = os.path.join('static', output_filename + "_" + str(t) + file_extension)
|
248 |
+
disp = deprocess(img.clone())
|
249 |
+
|
250 |
+
# Maybe perform postprocessing for color-independent style transfer
|
251 |
+
if params.original_colors == 1:
|
252 |
+
disp = original_colors(deprocess(content_image.clone()), disp)
|
253 |
+
|
254 |
+
disp.save(filename)
|
255 |
+
|
256 |
+
# Function to evaluate loss and gradient. We run the net forward and
|
257 |
+
# backward to get the gradient, and sum up losses from the loss modules.
|
258 |
+
# optim.lbfgs internally handles iteration and calls this function many
|
259 |
+
# times, so we manually count the number of iterations to handle printing
|
260 |
+
# and saving intermediate results.
|
261 |
+
num_calls = [0]
|
262 |
+
|
263 |
+
def feval():
|
264 |
+
num_calls[0] += 1
|
265 |
+
optimizer.zero_grad()
|
266 |
+
net(img)
|
267 |
+
loss = 0
|
268 |
+
|
269 |
+
for mod in content_losses:
|
270 |
+
loss += mod.loss.to(backward_device)
|
271 |
+
for mod in style_losses:
|
272 |
+
loss += mod.loss.to(backward_device)
|
273 |
+
if params.tv_weight > 0:
|
274 |
+
for mod in tv_losses:
|
275 |
+
loss += mod.loss.to(backward_device)
|
276 |
+
|
277 |
+
loss.backward()
|
278 |
+
|
279 |
+
maybe_save(num_calls[0])
|
280 |
+
maybe_print(num_calls[0], loss)
|
281 |
+
|
282 |
+
return loss
|
283 |
+
|
284 |
+
optimizer, loopVal = setup_optimizer(img)
|
285 |
+
while num_calls[0] <= loopVal:
|
286 |
+
optimizer.step(feval)
|
287 |
+
|
288 |
+
|
289 |
+
# Configure the optimizer
|
290 |
+
def setup_optimizer(img):
|
291 |
+
if params.optimizer == 'lbfgs':
|
292 |
+
print("Running optimization with L-BFGS")
|
293 |
+
optim_state = {
|
294 |
+
'max_iter': params.num_iterations,
|
295 |
+
'tolerance_change': -1,
|
296 |
+
'tolerance_grad': -1,
|
297 |
+
}
|
298 |
+
if params.lbfgs_num_correction != 100:
|
299 |
+
optim_state['history_size'] = params.lbfgs_num_correction
|
300 |
+
optimizer = optim.LBFGS([img], **optim_state)
|
301 |
+
loopVal = 1
|
302 |
+
elif params.optimizer == 'adam':
|
303 |
+
print("Running optimization with ADAM")
|
304 |
+
optimizer = optim.Adam([img], lr=params.learning_rate)
|
305 |
+
loopVal = params.num_iterations - 1
|
306 |
+
return optimizer, loopVal
|
307 |
+
|
308 |
+
|
309 |
+
def setup_gpu():
|
310 |
+
def setup_cuda():
|
311 |
+
if 'cudnn' in params.backend:
|
312 |
+
torch.backends.cudnn.enabled = True
|
313 |
+
if params.cudnn_autotune:
|
314 |
+
torch.backends.cudnn.benchmark = True
|
315 |
+
else:
|
316 |
+
torch.backends.cudnn.enabled = False
|
317 |
+
|
318 |
+
def setup_cpu():
|
319 |
+
if 'mkl' in params.backend and 'mkldnn' not in params.backend:
|
320 |
+
torch.backends.mkl.enabled = True
|
321 |
+
elif 'mkldnn' in params.backend:
|
322 |
+
raise ValueError("MKL-DNN is not supported yet.")
|
323 |
+
elif 'openmp' in params.backend:
|
324 |
+
torch.backends.openmp.enabled = True
|
325 |
+
|
326 |
+
multidevice = False
|
327 |
+
if "," in str(params.gpu):
|
328 |
+
devices = params.gpu.split(',')
|
329 |
+
multidevice = True
|
330 |
+
|
331 |
+
if 'c' in str(devices[0]).lower():
|
332 |
+
backward_device = "cpu"
|
333 |
+
setup_cuda(), setup_cpu()
|
334 |
+
else:
|
335 |
+
backward_device = "cuda:" + devices[0]
|
336 |
+
setup_cuda()
|
337 |
+
dtype = torch.FloatTensor
|
338 |
+
|
339 |
+
elif "c" not in str(params.gpu).lower():
|
340 |
+
setup_cuda()
|
341 |
+
dtype, backward_device = torch.cuda.FloatTensor, "cuda:" + str(params.gpu)
|
342 |
+
else:
|
343 |
+
setup_cpu()
|
344 |
+
dtype, backward_device = torch.FloatTensor, "cpu"
|
345 |
+
return dtype, multidevice, backward_device
|
346 |
+
|
347 |
+
|
348 |
+
def setup_multi_device(net):
|
349 |
+
assert len(params.gpu.split(',')) - 1 == len(params.multidevice_strategy.split(',')), \
|
350 |
+
"The number of -multidevice_strategy layer indices minus 1, must be equal to the number of -gpu devices."
|
351 |
+
|
352 |
+
new_net = ModelParallel(net, params.gpu, params.multidevice_strategy)
|
353 |
+
return new_net
|
354 |
+
|
355 |
+
|
356 |
+
# Preprocess an image before passing it to a model.
|
357 |
+
# We need to rescale from [0, 1] to [0, 255], convert from RGB to BGR,
|
358 |
+
# and subtract the mean pixel.
|
359 |
+
def preprocess(image_name, image_size):
|
360 |
+
image = Image.open(image_name).convert('RGB')
|
361 |
+
if type(image_size) is not tuple:
|
362 |
+
image_size = tuple([int((float(image_size) / max(image.size)) * x) for x in (image.height, image.width)])
|
363 |
+
Loader = transforms.Compose([transforms.Resize(image_size), transforms.ToTensor()])
|
364 |
+
rgb2bgr = transforms.Compose([transforms.Lambda(lambda x: x[torch.LongTensor([2, 1, 0])])])
|
365 |
+
Normalize = transforms.Compose([transforms.Normalize(mean=[103.939, 116.779, 123.68], std=[1, 1, 1])])
|
366 |
+
tensor = Normalize(rgb2bgr(Loader(image) * 255)).unsqueeze(0)
|
367 |
+
return tensor
|
368 |
+
|
369 |
+
|
370 |
+
# Undo the above preprocessing.
|
371 |
+
def deprocess(output_tensor):
|
372 |
+
Normalize = transforms.Compose([transforms.Normalize(mean=[-103.939, -116.779, -123.68], std=[1, 1, 1])])
|
373 |
+
bgr2rgb = transforms.Compose([transforms.Lambda(lambda x: x[torch.LongTensor([2, 1, 0])])])
|
374 |
+
output_tensor = bgr2rgb(Normalize(output_tensor.squeeze(0).cpu())) / 255
|
375 |
+
output_tensor.clamp_(0, 1)
|
376 |
+
Image2PIL = transforms.ToPILImage()
|
377 |
+
image = Image2PIL(output_tensor.cpu())
|
378 |
+
return image
|
379 |
+
|
380 |
+
|
381 |
+
# Combine the Y channel of the generated image and the UV/CbCr channels of the
|
382 |
+
# content image to perform color-independent style transfer.
|
383 |
+
def original_colors(content, generated):
|
384 |
+
content_channels = list(content.convert('YCbCr').split())
|
385 |
+
generated_channels = list(generated.convert('YCbCr').split())
|
386 |
+
content_channels[0] = generated_channels[0]
|
387 |
+
return Image.merge('YCbCr', content_channels).convert('RGB')
|
388 |
+
|
389 |
+
|
390 |
+
# Print like Lua/Torch7
|
391 |
+
def print_torch(net, multidevice):
|
392 |
+
if multidevice:
|
393 |
+
return
|
394 |
+
simplelist = ""
|
395 |
+
for i, layer in enumerate(net, 1):
|
396 |
+
simplelist = simplelist + "(" + str(i) + ") -> "
|
397 |
+
print("nn.Sequential ( \n [input -> " + simplelist + "output]")
|
398 |
+
|
399 |
+
def strip(x):
|
400 |
+
return str(x).replace(", ", ',').replace("(", '').replace(")", '') + ", "
|
401 |
+
|
402 |
+
def n():
|
403 |
+
return " (" + str(i) + "): " + "nn." + str(l).split("(", 1)[0]
|
404 |
+
|
405 |
+
for i, l in enumerate(net, 1):
|
406 |
+
if "2d" in str(l):
|
407 |
+
ks, st, pd = strip(l.kernel_size), strip(l.stride), strip(l.padding)
|
408 |
+
if "Conv2d" in str(l):
|
409 |
+
ch = str(l.in_channels) + " -> " + str(l.out_channels)
|
410 |
+
print(n() + "(" + ch + ", " + (ks).replace(",", 'x', 1) + st + pd.replace(", ", ')'))
|
411 |
+
elif "Pool2d" in str(l):
|
412 |
+
st = st.replace(" ", ' ') + st.replace(", ", ')')
|
413 |
+
print(n() + "(" + ((ks).replace(",", 'x' + ks, 1) + st).replace(", ", ','))
|
414 |
+
else:
|
415 |
+
print(n())
|
416 |
+
print(")")
|
417 |
+
|
418 |
+
|
419 |
+
# Divide weights by channel size
|
420 |
+
def normalize_weights(content_losses, style_losses):
|
421 |
+
for n, i in enumerate(content_losses):
|
422 |
+
i.strength = i.strength / max(i.target.size())
|
423 |
+
for n, i in enumerate(style_losses):
|
424 |
+
i.strength = i.strength / max(i.target.size())
|
425 |
+
|
426 |
+
|
427 |
+
# Scale gradients in the backward pass
|
428 |
+
class ScaleGradients(torch.autograd.Function):
|
429 |
+
@staticmethod
|
430 |
+
def forward(self, input_tensor, strength):
|
431 |
+
self.strength = strength
|
432 |
+
return input_tensor
|
433 |
+
|
434 |
+
@staticmethod
|
435 |
+
def backward(self, grad_output):
|
436 |
+
grad_input = grad_output.clone()
|
437 |
+
grad_input = grad_input / (torch.norm(grad_input, keepdim=True) + 1e-8)
|
438 |
+
return grad_input * self.strength * self.strength, None
|
439 |
+
|
440 |
+
|
441 |
+
# Define an nn Module to compute content loss
|
442 |
+
class ContentLoss(nn.Module):
|
443 |
+
|
444 |
+
def __init__(self, strength, normalize):
|
445 |
+
super(ContentLoss, self).__init__()
|
446 |
+
self.strength = strength
|
447 |
+
self.crit = nn.MSELoss()
|
448 |
+
self.mode = 'None'
|
449 |
+
self.normalize = normalize
|
450 |
+
|
451 |
+
def forward(self, input):
|
452 |
+
if self.mode == 'loss':
|
453 |
+
loss = self.crit(input, self.target)
|
454 |
+
if self.normalize:
|
455 |
+
loss = ScaleGradients.apply(loss, self.strength)
|
456 |
+
self.loss = loss * self.strength
|
457 |
+
elif self.mode == 'capture':
|
458 |
+
self.target = input.detach()
|
459 |
+
return input
|
460 |
+
|
461 |
+
|
462 |
+
class GramMatrix(nn.Module):
|
463 |
+
|
464 |
+
def forward(self, input):
|
465 |
+
B, C, H, W = input.size()
|
466 |
+
x_flat = input.view(C, H * W)
|
467 |
+
return torch.mm(x_flat, x_flat.t())
|
468 |
+
|
469 |
+
|
470 |
+
# Define an nn Module to compute style loss
|
471 |
+
class StyleLoss(nn.Module):
|
472 |
+
|
473 |
+
def __init__(self, strength, normalize):
|
474 |
+
super(StyleLoss, self).__init__()
|
475 |
+
self.target = torch.Tensor()
|
476 |
+
self.strength = strength
|
477 |
+
self.gram = GramMatrix()
|
478 |
+
self.crit = nn.MSELoss()
|
479 |
+
self.mode = 'None'
|
480 |
+
self.blend_weight = None
|
481 |
+
self.normalize = normalize
|
482 |
+
|
483 |
+
def forward(self, input):
|
484 |
+
self.G = self.gram(input)
|
485 |
+
self.G = self.G.div(input.nelement())
|
486 |
+
if self.mode == 'capture':
|
487 |
+
if self.blend_weight == None:
|
488 |
+
self.target = self.G.detach()
|
489 |
+
elif self.target.nelement() == 0:
|
490 |
+
self.target = self.G.detach().mul(self.blend_weight)
|
491 |
+
else:
|
492 |
+
self.target = self.target.add(self.blend_weight, self.G.detach())
|
493 |
+
elif self.mode == 'loss':
|
494 |
+
loss = self.crit(self.G, self.target)
|
495 |
+
if self.normalize:
|
496 |
+
loss = ScaleGradients.apply(loss, self.strength)
|
497 |
+
self.loss = self.strength * loss
|
498 |
+
return input
|
499 |
+
|
500 |
+
|
501 |
+
class TVLoss(nn.Module):
|
502 |
+
|
503 |
+
def __init__(self, strength):
|
504 |
+
super(TVLoss, self).__init__()
|
505 |
+
self.strength = strength
|
506 |
+
|
507 |
+
def forward(self, input):
|
508 |
+
self.x_diff = input[:, :, 1:, :] - input[:, :, :-1, :]
|
509 |
+
self.y_diff = input[:, :, :, 1:] - input[:, :, :, :-1]
|
510 |
+
self.loss = self.strength * (torch.sum(torch.abs(self.x_diff)) + torch.sum(torch.abs(self.y_diff)))
|
511 |
+
return input
|
512 |
+
|
513 |
+
# if __name__ == "__main__":
|
514 |
+
# main()
|