DarkMrdan commited on
Commit
39f74d8
·
1 Parent(s): 1f85ff0

Upload 4 files

Browse files
Files changed (4) hide show
  1. CaffeLoader.py +254 -0
  2. README.md +308 -10
  3. app.py +37 -0
  4. neural_style.py +514 -0
CaffeLoader.py ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+
5
+ class VGG(nn.Module):
6
+ def __init__(self, features, num_classes=1000):
7
+ super(VGG, self).__init__()
8
+ self.features = features
9
+ self.classifier = nn.Sequential(
10
+ nn.Linear(512 * 7 * 7, 4096),
11
+ nn.ReLU(True),
12
+ nn.Dropout(),
13
+ nn.Linear(4096, 4096),
14
+ nn.ReLU(True),
15
+ nn.Dropout(),
16
+ nn.Linear(4096, num_classes),
17
+ )
18
+
19
+
20
+ class VGG_SOD(nn.Module):
21
+ def __init__(self, features, num_classes=100):
22
+ super(VGG_SOD, self).__init__()
23
+ self.features = features
24
+ self.classifier = nn.Sequential(
25
+ nn.Linear(512 * 7 * 7, 4096),
26
+ nn.ReLU(True),
27
+ nn.Dropout(),
28
+ nn.Linear(4096, 4096),
29
+ nn.ReLU(True),
30
+ nn.Dropout(),
31
+ nn.Linear(4096, 100),
32
+ )
33
+
34
+
35
+ class VGG_FCN32S(nn.Module):
36
+ def __init__(self, features, num_classes=1000):
37
+ super(VGG_FCN32S, self).__init__()
38
+ self.features = features
39
+ self.classifier = nn.Sequential(
40
+ nn.Conv2d(512,4096,(7, 7)),
41
+ nn.ReLU(True),
42
+ nn.Dropout(0.5),
43
+ nn.Conv2d(4096,4096,(1, 1)),
44
+ nn.ReLU(True),
45
+ nn.Dropout(0.5),
46
+ )
47
+
48
+
49
+ class VGG_PRUNED(nn.Module):
50
+ def __init__(self, features, num_classes=1000):
51
+ super(VGG_PRUNED, self).__init__()
52
+ self.features = features
53
+ self.classifier = nn.Sequential(
54
+ nn.Linear(512 * 7 * 7, 4096),
55
+ nn.ReLU(True),
56
+ nn.Dropout(0.5),
57
+ nn.Linear(4096, 4096),
58
+ nn.ReLU(True),
59
+ nn.Dropout(0.5),
60
+ )
61
+
62
+
63
+ class NIN(nn.Module):
64
+ def __init__(self, pooling):
65
+ super(NIN, self).__init__()
66
+ if pooling == 'max':
67
+ pool2d = nn.MaxPool2d((3, 3),(2, 2),(0, 0),ceil_mode=True)
68
+ elif pooling == 'avg':
69
+ pool2d = nn.AvgPool2d((3, 3),(2, 2),(0, 0),ceil_mode=True)
70
+
71
+ self.features = nn.Sequential(
72
+ nn.Conv2d(3,96,(11, 11),(4, 4)),
73
+ nn.ReLU(inplace=True),
74
+ nn.Conv2d(96,96,(1, 1)),
75
+ nn.ReLU(inplace=True),
76
+ nn.Conv2d(96,96,(1, 1)),
77
+ nn.ReLU(inplace=True),
78
+ pool2d,
79
+ nn.Conv2d(96,256,(5, 5),(1, 1),(2, 2)),
80
+ nn.ReLU(inplace=True),
81
+ nn.Conv2d(256,256,(1, 1)),
82
+ nn.ReLU(inplace=True),
83
+ nn.Conv2d(256,256,(1, 1)),
84
+ nn.ReLU(inplace=True),
85
+ pool2d,
86
+ nn.Conv2d(256,384,(3, 3),(1, 1),(1, 1)),
87
+ nn.ReLU(inplace=True),
88
+ nn.Conv2d(384,384,(1, 1)),
89
+ nn.ReLU(inplace=True),
90
+ nn.Conv2d(384,384,(1, 1)),
91
+ nn.ReLU(inplace=True),
92
+ pool2d,
93
+ nn.Dropout(0.5),
94
+ nn.Conv2d(384,1024,(3, 3),(1, 1),(1, 1)),
95
+ nn.ReLU(inplace=True),
96
+ nn.Conv2d(1024,1024,(1, 1)),
97
+ nn.ReLU(inplace=True),
98
+ nn.Conv2d(1024,1000,(1, 1)),
99
+ nn.ReLU(inplace=True),
100
+ nn.AvgPool2d((6, 6),(1, 1),(0, 0),ceil_mode=True),
101
+ nn.Softmax(),
102
+ )
103
+
104
+
105
+
106
+ class ModelParallel(nn.Module):
107
+ def __init__(self, net, device_ids, device_splits):
108
+ super(ModelParallel, self).__init__()
109
+ self.device_list = self.name_devices(device_ids.split(','))
110
+ self.chunks = self.chunks_to_devices(self.split_net(net, device_splits.split(',')))
111
+
112
+ def name_devices(self, input_list):
113
+ device_list = []
114
+ for i, device in enumerate(input_list):
115
+ if str(device).lower() != 'c':
116
+ device_list.append("cuda:" + str(device))
117
+ else:
118
+ device_list.append("cpu")
119
+ return device_list
120
+
121
+ def split_net(self, net, device_splits):
122
+ chunks, cur_chunk = [], nn.Sequential()
123
+ for i, l in enumerate(net):
124
+ cur_chunk.add_module(str(i), net[i])
125
+ if str(i) in device_splits and device_splits != '':
126
+ del device_splits[0]
127
+ chunks.append(cur_chunk)
128
+ cur_chunk = nn.Sequential()
129
+ chunks.append(cur_chunk)
130
+ return chunks
131
+
132
+ def chunks_to_devices(self, chunks):
133
+ for i, chunk in enumerate(chunks):
134
+ chunk.to(self.device_list[i])
135
+ return chunks
136
+
137
+ def c(self, input, i):
138
+ if input.type() == 'torch.FloatTensor' and 'cuda' in self.device_list[i]:
139
+ input = input.type('torch.cuda.FloatTensor')
140
+ elif input.type() == 'torch.cuda.FloatTensor' and 'cpu' in self.device_list[i]:
141
+ input = input.type('torch.FloatTensor')
142
+ return input
143
+
144
+ def forward(self, input):
145
+ for i, chunk in enumerate(self.chunks):
146
+ if i < len(self.chunks) -1:
147
+ input = self.c(chunk(self.c(input, i).to(self.device_list[i])), i+1).to(self.device_list[i+1])
148
+ else:
149
+ input = chunk(input)
150
+ return input
151
+
152
+
153
+
154
+ def buildSequential(channel_list, pooling):
155
+ layers = []
156
+ in_channels = 3
157
+ if pooling == 'max':
158
+ pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
159
+ elif pooling == 'avg':
160
+ pool2d = nn.AvgPool2d(kernel_size=2, stride=2)
161
+ else:
162
+ raise ValueError("Unrecognized pooling parameter")
163
+ for c in channel_list:
164
+ if c == 'P':
165
+ layers += [pool2d]
166
+ else:
167
+ conv2d = nn.Conv2d(in_channels, c, kernel_size=3, padding=1)
168
+ layers += [conv2d, nn.ReLU(inplace=True)]
169
+ in_channels = c
170
+ return nn.Sequential(*layers)
171
+
172
+
173
+ channel_list = {
174
+ 'VGG-16p': [24, 22, 'P', 41, 51, 'P', 108, 89, 111, 'P', 184, 276, 228, 'P', 512, 512, 512, 'P'],
175
+ 'VGG-16': [64, 64, 'P', 128, 128, 'P', 256, 256, 256, 'P', 512, 512, 512, 'P', 512, 512, 512, 'P'],
176
+ 'VGG-19': [64, 64, 'P', 128, 128, 'P', 256, 256, 256, 256, 'P', 512, 512, 512, 512, 'P', 512, 512, 512, 512, 'P'],
177
+ }
178
+
179
+ nin_dict = {
180
+ 'C': ['conv1', 'cccp1', 'cccp2', 'conv2', 'cccp3', 'cccp4', 'conv3', 'cccp5', 'cccp6', 'conv4-1024', 'cccp7-1024', 'cccp8-1024'],
181
+ 'R': ['relu0', 'relu1', 'relu2', 'relu3', 'relu5', 'relu6', 'relu7', 'relu8', 'relu9', 'relu10', 'relu11', 'relu12'],
182
+ 'P': ['pool1', 'pool2', 'pool3', 'pool4'],
183
+ 'D': ['drop'],
184
+ }
185
+ vgg16_dict = {
186
+ 'C': ['conv1_1', 'conv1_2', 'conv2_1', 'conv2_2', 'conv3_1', 'conv3_2', 'conv3_3', 'conv4_1', 'conv4_2', 'conv4_3', 'conv5_1', 'conv5_2', 'conv5_3'],
187
+ 'R': ['relu1_1', 'relu1_2', 'relu2_1', 'relu2_2', 'relu3_1', 'relu3_2', 'relu3_3', 'relu4_1', 'relu4_2', 'relu4_3', 'relu5_1', 'relu5_2', 'relu5_3'],
188
+ 'P': ['pool1', 'pool2', 'pool3', 'pool4', 'pool5'],
189
+ }
190
+ vgg19_dict = {
191
+ 'C': ['conv1_1', 'conv1_2', 'conv2_1', 'conv2_2', 'conv3_1', 'conv3_2', 'conv3_3', 'conv3_4', 'conv4_1', 'conv4_2', 'conv4_3', 'conv4_4', 'conv5_1', 'conv5_2', 'conv5_3', 'conv5_4'],
192
+ 'R': ['relu1_1', 'relu1_2', 'relu2_1', 'relu2_2', 'relu3_1', 'relu3_2', 'relu3_3', 'relu3_4', 'relu4_1', 'relu4_2', 'relu4_3', 'relu4_4', 'relu5_1', 'relu5_2', 'relu5_3', 'relu5_4'],
193
+ 'P': ['pool1', 'pool2', 'pool3', 'pool4', 'pool5'],
194
+ }
195
+
196
+
197
+ def modelSelector(model_file, pooling):
198
+ vgg_list = ["fcn32s", "pruning", "sod", "vgg"]
199
+ if any(name in model_file for name in vgg_list):
200
+ if "pruning" in model_file:
201
+ print("VGG-16 Architecture Detected")
202
+ print("Using The Channel Pruning Model")
203
+ cnn, layerList = VGG_PRUNED(buildSequential(channel_list['VGG-16p'], pooling)), vgg16_dict
204
+ elif "fcn32s" in model_file:
205
+ print("VGG-16 Architecture Detected")
206
+ print("Using the fcn32s-heavy-pascal Model")
207
+ cnn, layerList = VGG_FCN32S(buildSequential(channel_list['VGG-16'], pooling)), vgg16_dict
208
+ elif "sod" in model_file:
209
+ print("VGG-16 Architecture Detected")
210
+ print("Using The SOD Fintune Model")
211
+ cnn, layerList = VGG_SOD(buildSequential(channel_list['VGG-16'], pooling)), vgg16_dict
212
+ elif "19" in model_file:
213
+ print("VGG-19 Architecture Detected")
214
+ cnn, layerList = VGG(buildSequential(channel_list['VGG-19'], pooling)), vgg19_dict
215
+ elif "16" in model_file:
216
+ print("VGG-16 Architecture Detected")
217
+ cnn, layerList = VGG(buildSequential(channel_list['VGG-16'], pooling)), vgg16_dict
218
+ else:
219
+ raise ValueError("VGG architecture not recognized.")
220
+ elif "nin" in model_file:
221
+ print("NIN Architecture Detected")
222
+ cnn, layerList = NIN(pooling), nin_dict
223
+ else:
224
+ raise ValueError("Model architecture not recognized.")
225
+ return cnn, layerList
226
+
227
+
228
+ # Print like Torch7/loadcaffe
229
+ def print_loadcaffe(cnn, layerList):
230
+ c = 0
231
+ for l in list(cnn):
232
+ if "Conv2d" in str(l):
233
+ in_c, out_c, ks = str(l.in_channels), str(l.out_channels), str(l.kernel_size)
234
+ print(layerList['C'][c] +": " + (out_c + " " + in_c + " " + ks).replace(")",'').replace("(",'').replace(",",'') )
235
+ c+=1
236
+ if c == len(layerList['C']):
237
+ break
238
+
239
+
240
+ # Load the model, and configure pooling layer type
241
+ def loadCaffemodel(model_file, pooling, use_gpu, disable_check):
242
+ cnn, layerList = modelSelector(str(model_file).lower(), pooling)
243
+
244
+ cnn.load_state_dict(torch.load(model_file), strict=(not disable_check))
245
+ print("Successfully loaded " + str(model_file))
246
+
247
+ # Maybe convert the model to cuda now, to avoid later issues
248
+ if "c" not in str(use_gpu).lower() or "c" not in str(use_gpu[0]).lower():
249
+ cnn = cnn.cuda()
250
+ cnn = cnn.features
251
+
252
+ print_loadcaffe(cnn, layerList)
253
+
254
+ return cnn, layerList
README.md CHANGED
@@ -1,10 +1,308 @@
1
- ---
2
- title: Style Transfer
3
- emoji: 🏃
4
- colorFrom: gray
5
- colorTo: blue
6
- sdk: static
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # neural-style-pt
2
+
3
+ [![DOI](https://zenodo.org/badge/142345353.svg)](https://zenodo.org/badge/latestdoi/142345353)
4
+
5
+ This is a PyTorch implementation of the paper [A Neural Algorithm of Artistic Style](http://arxiv.org/abs/1508.06576)
6
+ by Leon A. Gatys, Alexander S. Ecker, and Matthias Bethge. The code is based on Justin Johnson's [Neural-Style](https://github.com/jcjohnson/neural-style).
7
+
8
+ The paper presents an algorithm for combining the content of one image with the style of another image using
9
+ convolutional neural networks. Here's an example that maps the artistic style of
10
+ [The Starry Night](https://en.wikipedia.org/wiki/The_Starry_Night)
11
+ onto a night-time photograph of the Stanford campus:
12
+
13
+ <div align="center">
14
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/inputs/starry_night_google.jpg" height="223px">
15
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/inputs/hoovertowernight.jpg" height="223px">
16
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/starry_stanford_bigger.png" width="710px">
17
+ </div>
18
+
19
+ Applying the style of different images to the same content image gives interesting results.
20
+ Here we reproduce Figure 2 from the paper, which renders a photograph of the Tubingen in Germany in a
21
+ variety of styles:
22
+
23
+ <div align="center">
24
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/inputs/tubingen.jpg" height="250px">
25
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/tubingen_shipwreck.png" height="250px">
26
+
27
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/tubingen_starry.png" height="250px">
28
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/tubingen_scream.png" height="250px">
29
+
30
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/tubingen_seated_nude.png" height="250px">
31
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/tubingen_composition_vii.png" height="250px">
32
+ </div>
33
+
34
+ Here are the results of applying the style of various pieces of artwork to this photograph of the
35
+ golden gate bridge:
36
+
37
+
38
+ <div align="center"
39
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/inputs/golden_gate.jpg" height="200px">
40
+
41
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/inputs/frida_kahlo.jpg" height="160px">
42
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/golden_gate_kahlo.png" height="160px">
43
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/inputs/escher_sphere.jpg" height="160px">
44
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/golden_gate_escher.png" height="160px">
45
+ </div>
46
+
47
+ <div align="center">
48
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/inputs/woman-with-hat-matisse.jpg" height="160px">
49
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/golden_gate_matisse.png" height="160px">
50
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/inputs/the_scream.jpg" height="160px">
51
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/golden_gate_scream.png" height="160px">
52
+ </div>
53
+
54
+ <div align="center">
55
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/inputs/starry_night_crop.png" height="160px">
56
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/golden_gate_starry.png" height="160px">
57
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/inputs/seated-nude.jpg" height="160px">
58
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/golden_gate_seated.png" height="160px">
59
+ </div>
60
+
61
+ ### Content / Style Tradeoff
62
+
63
+ The algorithm allows the user to trade-off the relative weight of the style and content reconstruction terms,
64
+ as shown in this example where we port the style of [Picasso's 1907 self-portrait](http://www.wikiart.org/en/pablo-picasso/self-portrait-1907) onto Brad Pitt:
65
+
66
+ <div align="center">
67
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/inputs/picasso_selfport1907.jpg" height="220px">
68
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/inputs/brad_pitt.jpg" height="220px">
69
+ </div>
70
+
71
+ <div align="center">
72
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/pitt_picasso_content_5_style_100.png" height="220px">
73
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/pitt_picasso_content_1_style_100.png" height="220px">
74
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/pitt_picasso_content_01_style_100.png" height="220px">
75
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/pitt_picasso_content_0025_style_100.png" height="220px">
76
+ </div>
77
+
78
+ ### Style Scale
79
+
80
+ By resizing the style image before extracting style features, we can control the types of artistic
81
+ features that are transfered from the style image; you can control this behavior with the `-style_scale` flag.
82
+ Below we see three examples of rendering the Golden Gate Bridge in the style of The Starry Night.
83
+ From left to right, `-style_scale` is 2.0, 1.0, and 0.5.
84
+
85
+ <div align="center">
86
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/golden_gate_starry_scale2.png" height=175px>
87
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/golden_gate_starry_scale1.png" height=175px>
88
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/golden_gate_starry_scale05.png" height=175px>
89
+ </div>
90
+
91
+ ### Multiple Style Images
92
+ You can use more than one style image to blend multiple artistic styles.
93
+
94
+ Clockwise from upper left: "The Starry Night" + "The Scream", "The Scream" + "Composition VII",
95
+ "Seated Nude" + "Composition VII", and "Seated Nude" + "The Starry Night"
96
+
97
+ <div align="center">
98
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/tubingen_starry_scream.png" height="250px">
99
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/tubingen_scream_composition_vii.png" height="250px">
100
+
101
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/tubingen_starry_seated.png" height="250px">
102
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/tubingen_seated_nude_composition_vii.png" height="250px">
103
+ </div>
104
+
105
+
106
+ ### Style Interpolation
107
+ When using multiple style images, you can control the degree to which they are blended:
108
+
109
+ <div align="center">
110
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/golden_gate_starry_scream_3_7.png" height="175px">
111
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/golden_gate_starry_scream_5_5.png" height="175px">
112
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/golden_gate_starry_scream_7_3.png" height="175px">
113
+ </div>
114
+
115
+ ### Transfer style but not color
116
+ If you add the flag `-original_colors 1` then the output image will retain the colors of the original image.
117
+
118
+ <div align="center">
119
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/tubingen_starry.png" height="185px">
120
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/tubingen_scream.png" height="185px">
121
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/tubingen_composition_vii.png" height="185px">
122
+
123
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/original_color/tubingen_starry.png" height="185px">
124
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/original_color/tubingen_scream.png" height="185px">
125
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/original_color/tubingen_composition_vii.png" height="185px">
126
+ </div>
127
+
128
+ ## Setup:
129
+
130
+ Dependencies:
131
+ * [PyTorch](http://pytorch.org/)
132
+
133
+
134
+ Optional dependencies:
135
+ * For CUDA backend:
136
+ * CUDA 7.5 or above
137
+ * For cuDNN backend:
138
+ * cuDNN v6 or above
139
+ * For ROCm backend:
140
+ * ROCm 2.1 or above
141
+ * For MKL backend:
142
+ * MKL 2019 or above
143
+ * For OpenMP backend:
144
+ * OpenMP 5.0 or above
145
+
146
+ After installing the dependencies, you'll need to run the following script to download the VGG model:
147
+ ```
148
+ python models/download_models.py
149
+ ```
150
+ This will download the original [VGG-19 model](https://gist.github.com/ksimonyan/3785162f95cd2d5fee77#file-readme-md).
151
+ The original [VGG-16 model](https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-md) will also be downloaded. By default the original VGG-19 model is used.
152
+
153
+ If you have a smaller memory GPU then using NIN Imagenet model will be better and gives slightly worse yet comparable results. You can get the details on the model from [BVLC Caffe ModelZoo](https://github.com/BVLC/caffe/wiki/Model-Zoo). The NIN model is downloaded when you run the `download_models.py` script.
154
+
155
+ You can find detailed installation instructions for Ubuntu and Windows in the [installation guide](INSTALL.md).
156
+
157
+ ## Usage
158
+ Basic usage:
159
+ ```
160
+ python neural_style.py -style_image <image.jpg> -content_image <image.jpg>
161
+ ```
162
+
163
+ cuDNN usage with NIN Model:
164
+ ```
165
+ python neural_style.py -style_image examples/inputs/picasso_selfport1907.jpg -content_image examples/inputs/brad_pitt.jpg -output_image profile.png -model_file models/nin_imagenet.pth -gpu 0 -backend cudnn -num_iterations 1000 -seed 123 -content_layers relu0,relu3,relu7,relu12 -style_layers relu0,relu3,relu7,relu12 -content_weight 10 -style_weight 500 -image_size 512 -optimizer adam
166
+ ```
167
+
168
+ ![cuDNN NIN Model Picasso Brad Pitt](https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/pitt_picasso_nin_cudnn.png)
169
+
170
+
171
+ To use multiple style images, pass a comma-separated list like this:
172
+
173
+ `-style_image starry_night.jpg,the_scream.jpg`.
174
+
175
+ Note that paths to images should not contain the `~` character to represent your home directory; you should instead use a relative
176
+ path or a full absolute path.
177
+
178
+ **Options**:
179
+ * `-image_size`: Maximum side length (in pixels) of the generated image. Default is 512.
180
+ * `-style_blend_weights`: The weight for blending the style of multiple style images, as a
181
+ comma-separated list, such as `-style_blend_weights 3,7`. By default all style images
182
+ are equally weighted.
183
+ * `-gpu`: Zero-indexed ID of the GPU to use; for CPU mode set `-gpu` to `c`.
184
+
185
+ **Optimization options**:
186
+ * `-content_weight`: How much to weight the content reconstruction term. Default is 5e0.
187
+ * `-style_weight`: How much to weight the style reconstruction term. Default is 1e2.
188
+ * `-tv_weight`: Weight of total-variation (TV) regularization; this helps to smooth the image.
189
+ Default is 1e-3. Set to 0 to disable TV regularization.
190
+ * `-num_iterations`: Default is 1000.
191
+ * `-init`: Method for generating the generated image; one of `random` or `image`.
192
+ Default is `random` which uses a noise initialization as in the paper; `image`
193
+ initializes with the content image.
194
+ * `-init_image`: Replaces the initialization image with a user specified image.
195
+ * `-optimizer`: The optimization algorithm to use; either `lbfgs` or `adam`; default is `lbfgs`.
196
+ L-BFGS tends to give better results, but uses more memory. Switching to ADAM will reduce memory usage;
197
+ when using ADAM you will probably need to play with other parameters to get good results, especially
198
+ the style weight, content weight, and learning rate.
199
+ * `-learning_rate`: Learning rate to use with the ADAM optimizer. Default is 1e1.
200
+ * `-normalize_gradients`: If this flag is present, style and content gradients from each layer will be L1 normalized.
201
+
202
+ **Output options**:
203
+ * `-output_image`: Name of the output image. Default is `out.png`.
204
+ * `-print_iter`: Print progress every `print_iter` iterations. Set to 0 to disable printing.
205
+ * `-save_iter`: Save the image every `save_iter` iterations. Set to 0 to disable saving intermediate results.
206
+
207
+ **Layer options**:
208
+ * `-content_layers`: Comma-separated list of layer names to use for content reconstruction.
209
+ Default is `relu4_2`.
210
+ * `-style_layers`: Comma-separated list of layer names to use for style reconstruction.
211
+ Default is `relu1_1,relu2_1,relu3_1,relu4_1,relu5_1`.
212
+
213
+ **Other options**:
214
+ * `-style_scale`: Scale at which to extract features from the style image. Default is 1.0.
215
+ * `-original_colors`: If you set this to 1, then the output image will keep the colors of the content image.
216
+ * `-model_file`: Path to the `.pth` file for the VGG Caffe model. Default is the original VGG-19 model; you can also try the original VGG-16 model.
217
+ * `-pooling`: The type of pooling layers to use; one of `max` or `avg`. Default is `max`.
218
+ The VGG-19 models uses max pooling layers, but the paper mentions that replacing these layers with average
219
+ pooling layers can improve the results. I haven't been able to get good results using average pooling, but
220
+ the option is here.
221
+ * `-seed`: An integer value that you can specify for repeatable results. By default this value is random for each run.
222
+ * `-multidevice_strategy`: A comma-separated list of layer indices at which to split the network when using multiple devices. See [Multi-GPU scaling](https://github.com/ProGamerGov/neural-style-pt#multi-gpu-scaling) for more details.
223
+ * `-backend`: `nn`, `cudnn`, `openmp`, or `mkl`. Default is `nn`. `mkl` requires Intel's MKL backend.
224
+ * `-cudnn_autotune`: When using the cuDNN backend, pass this flag to use the built-in cuDNN autotuner to select
225
+ the best convolution algorithms for your architecture. This will make the first iteration a bit slower and can
226
+ take a bit more memory, but may significantly speed up the cuDNN backend.
227
+
228
+ ## Frequently Asked Questions
229
+
230
+ **Problem:** The program runs out of memory and dies
231
+
232
+ **Solution:** Try reducing the image size: `-image_size 256` (or lower). Note that different image sizes will likely
233
+ require non-default values for `-style_weight` and `-content_weight` for optimal results.
234
+ If you are running on a GPU, you can also try running with `-backend cudnn` to reduce memory usage.
235
+
236
+ **Problem:** `-backend cudnn` is slower than default NN backend
237
+
238
+ **Solution:** Add the flag `-cudnn_autotune`; this will use the built-in cuDNN autotuner to select the best convolution algorithms.
239
+
240
+ **Problem:** Get the following error message:
241
+
242
+ `Missing key(s) in state_dict: "classifier.0.bias", "classifier.0.weight", "classifier.3.bias", "classifier.3.weight".
243
+ Unexpected key(s) in state_dict: "classifier.1.weight", "classifier.1.bias", "classifier.4.weight", "classifier.4.bias".`
244
+
245
+ **Solution:** Due to a mix up with layer locations, older models require a fix to be compatible with newer versions of PyTorch. The included [`donwload_models.py`](https://github.com/ProGamerGov/neural-style-pt/blob/master/models/download_models.py) script will automatically perform these fixes after downloading the models.
246
+
247
+
248
+
249
+ ## Memory Usage
250
+ By default, `neural-style-pt` uses the `nn` backend for convolutions and L-BFGS for optimization. These give good results, but can both use a lot of memory. You can reduce memory usage with the following:
251
+
252
+ * **Use cuDNN**: Add the flag `-backend cudnn` to use the cuDNN backend. This will only work in GPU mode.
253
+ * **Use ADAM**: Add the flag `-optimizer adam` to use ADAM instead of L-BFGS. This should significantly
254
+ reduce memory usage, but may require tuning of other parameters for good results; in particular you should
255
+ play with the learning rate, content weight, and style weight.
256
+ This should work in both CPU and GPU modes.
257
+ * **Reduce image size**: If the above tricks are not enough, you can reduce the size of the generated image;
258
+ pass the flag `-image_size 256` to generate an image at half the default size.
259
+
260
+ With the default settings, neural-style-pt uses about 3.7 GB of GPU memory on my system; switching to ADAM and cuDNN reduces the GPU memory footprint to about 1GB.
261
+
262
+ ## Speed
263
+ Speed can vary a lot depending on the backend and the optimizer.
264
+ Here are some times for running 500 iterations with `-image_size=512` on a Tesla K80 with different settings:
265
+ * `-backend nn -optimizer lbfgs`: 117 seconds
266
+ * `-backend nn -optimizer adam`: 100 seconds
267
+ * `-backend cudnn -optimizer lbfgs`: 124 seconds
268
+ * `-backend cudnn -optimizer adam`: 107 seconds
269
+ * `-backend cudnn -cudnn_autotune -optimizer lbfgs`: 109 seconds
270
+ * `-backend cudnn -cudnn_autotune -optimizer adam`: 91 seconds
271
+
272
+ Here are the same benchmarks on a GTX 1080:
273
+ * `-backend nn -optimizer lbfgs`: 56 seconds
274
+ * `-backend nn -optimizer adam`: 38 seconds
275
+ * `-backend cudnn -optimizer lbfgs`: 40 seconds
276
+ * `-backend cudnn -optimizer adam`: 40 seconds
277
+ * `-backend cudnn -cudnn_autotune -optimizer lbfgs`: 23 seconds
278
+ * `-backend cudnn -cudnn_autotune -optimizer adam`: 24 seconds
279
+
280
+ ## Multi-GPU scaling
281
+ You can use multiple CPU and GPU devices to process images at higher resolutions; different layers of the network will be
282
+ computed on different devices. You can control which GPU and CPU devices are used with the `-gpu` flag, and you can control
283
+ how to split layers across devices using the `-multidevice_strategy` flag.
284
+
285
+ For example in a server with four GPUs, you can give the flag `-gpu 0,1,2,3` to process on GPUs 0, 1, 2, and 3 in that order; by also giving the flag `-multidevice_strategy 3,6,12` you indicate that the first two layers should be computed on GPU 0, layers 3 to 5 should be computed on GPU 1, layers 6 to 11 should be computed on GPU 2, and the remaining layers should be computed on GPU 3. You will need to tune the `-multidevice_strategy` for your setup in order to achieve maximal resolution.
286
+
287
+ We can achieve very high quality results at high resolution by combining multi-GPU processing with multiscale
288
+ generation as described in the paper
289
+ <a href="https://arxiv.org/abs/1611.07865">**Controlling Perceptual Factors in Neural Style Transfer**</a> by Leon A. Gatys,
290
+ Alexander S. Ecker, Matthias Bethge, Aaron Hertzmann and Eli Shechtman.
291
+
292
+
293
+ Here is a 4016 x 2213 image generated on a server with eight Tesla K80 GPUs:
294
+
295
+ <img src="https://raw.githubusercontent.com/ProGamerGov/neural-style-pt/master/examples/outputs/starry_stanford_bigger.png" height="400px">
296
+
297
+ The script used to generate this image <a href='examples/scripts/starry_stanford_bigger.sh'>can be found here</a>.
298
+
299
+ ## Implementation details
300
+ Images are initialized with white noise and optimized using L-BFGS.
301
+
302
+ We perform style reconstructions using the `conv1_1`, `conv2_1`, `conv3_1`, `conv4_1`, and `conv5_1` layers
303
+ and content reconstructions using the `conv4_2` layer. As in the paper, the five style reconstruction losses have
304
+ equal weights.
305
+
306
+ ## Citation
307
+
308
+ If you find this code useful for your research, please cite it using the provided citation.
app.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, request, redirect, url_for
2
+ import os
3
+ from werkzeug.utils import secure_filename
4
+ from neural_style import main
5
+
6
+ app = Flask(__name__)
7
+
8
+
9
+ # 首页,用于上传图片和显示结果
10
+ @app.route('/', methods=['GET', 'POST'])
11
+ def upload_and_process():
12
+ if request.method == 'POST':
13
+ # 获取上传的图片列表
14
+ images = [request.files['image1'], request.files['image2']]
15
+ filenames = []
16
+
17
+ for image in images:
18
+ if image:
19
+ # 使用secure_filename获取安全的文件名
20
+ filename = secure_filename(image.filename)
21
+ print(filename)
22
+ # 保存上传的图片到本地
23
+ image.save(os.path.join('static', filename))
24
+ filenames.append(filename)
25
+
26
+ # 调用AI模型对图片进行处理(在这里,您需要编写AI模型的代码)
27
+ main(filenames[0], filenames[1])
28
+
29
+ # 返回结果页面并展示处理后的图片
30
+ return render_template('index.html', image_path="out.png", \
31
+ filename1=filenames[0], filename2=filenames[1])
32
+
33
+ return render_template('index.html')
34
+
35
+
36
+ if __name__ == '__main__':
37
+ app.run(debug=True)
neural_style.py ADDED
@@ -0,0 +1,514 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import copy
3
+ import torch
4
+ import torch.nn as nn
5
+ import torch.optim as optim
6
+ import torchvision.transforms as transforms
7
+
8
+ from PIL import Image
9
+ from CaffeLoader import loadCaffemodel, ModelParallel
10
+
11
+ import argparse
12
+
13
+ parser = argparse.ArgumentParser()
14
+ # Basic options
15
+ parser.add_argument("-style_image", help="Style target image", default='examples/inputs/seated-nude.jpg')
16
+ parser.add_argument("-style_blend_weights", default=None)
17
+ parser.add_argument("-content_image", help="Content target image", default='examples/inputs/tubingen.jpg')
18
+ parser.add_argument("-image_size", help="Maximum height / width of generated image", type=int, default=512)
19
+ parser.add_argument("-gpu", help="Zero-indexed ID of the GPU to use; for CPU mode set -gpu = c", default='c')
20
+
21
+ # Optimization options
22
+ parser.add_argument("-content_weight", type=float, default=5e0)
23
+ parser.add_argument("-style_weight", type=float, default=1e2)
24
+ parser.add_argument("-normalize_weights", action='store_true')
25
+ parser.add_argument("-normalize_gradients", action='store_true')
26
+ parser.add_argument("-tv_weight", type=float, default=1e-3)
27
+ parser.add_argument("-num_iterations", type=int, default=200)
28
+ parser.add_argument("-init", choices=['random', 'image'], default='random')
29
+ parser.add_argument("-init_image", default=None)
30
+ parser.add_argument("-optimizer", choices=['lbfgs', 'adam'], default='lbfgs')
31
+ parser.add_argument("-learning_rate", type=float, default=1e0)
32
+ parser.add_argument("-lbfgs_num_correction", type=int, default=100)
33
+
34
+ # Output options
35
+ parser.add_argument("-print_iter", type=int, default=50)
36
+ parser.add_argument("-save_iter", type=int, default=100)
37
+ parser.add_argument("-output_image", default='out.png')
38
+
39
+ # Other options
40
+ parser.add_argument("-style_scale", type=float, default=1.0)
41
+ parser.add_argument("-original_colors", type=int, choices=[0, 1], default=0)
42
+ parser.add_argument("-pooling", choices=['avg', 'max'], default='max')
43
+ parser.add_argument("-model_file", type=str, default='models/vgg19-d01eb7cb.pth')
44
+ parser.add_argument("-disable_check", action='store_true')
45
+ parser.add_argument("-backend", choices=['nn', 'cudnn', 'mkl', 'mkldnn', 'openmp', 'mkl,cudnn', 'cudnn,mkl'],
46
+ default='nn')
47
+ parser.add_argument("-cudnn_autotune", action='store_true')
48
+ parser.add_argument("-seed", type=int, default=-1)
49
+
50
+ parser.add_argument("-content_layers", help="layers for content", default='relu4_2')
51
+ parser.add_argument("-style_layers", help="layers for style", default='relu1_1,relu2_1,relu3_1,relu4_1,relu5_1')
52
+
53
+ parser.add_argument("-multidevice_strategy", default='4,7,29')
54
+ params = parser.parse_args()
55
+
56
+ Image.MAX_IMAGE_PIXELS = 1000000000 # Support gigapixel images
57
+
58
+
59
+ def main(filename1, filename2):
60
+ params.content_image = "C:/Users/86136/Desktop/web/ai2/neural-style-pt-master/static/" + filename1
61
+ params.style_image = "C:/Users/86136/Desktop/web/ai2/neural-style-pt-master/static/" + filename2
62
+ dtype, multidevice, backward_device = setup_gpu()
63
+
64
+ cnn, layerList = loadCaffemodel(params.model_file, params.pooling, params.gpu, params.disable_check)
65
+
66
+ content_image = preprocess(params.content_image, params.image_size).type(dtype)
67
+ style_image_input = params.style_image.split(',')
68
+ style_image_list, ext = [], [".jpg", ".jpeg", ".png", ".tiff"]
69
+ for image in style_image_input:
70
+ if os.path.isdir(image):
71
+ images = (image + "/" + file for file in os.listdir(image)
72
+ if os.path.splitext(file)[1].lower() in ext)
73
+ style_image_list.extend(images)
74
+ else:
75
+ style_image_list.append(image)
76
+ style_images_caffe = []
77
+ for image in style_image_list:
78
+ style_size = int(params.image_size * params.style_scale)
79
+ img_caffe = preprocess(image, style_size).type(dtype)
80
+ style_images_caffe.append(img_caffe)
81
+
82
+ if params.init_image != None:
83
+ image_size = (content_image.size(2), content_image.size(3))
84
+ init_image = preprocess(params.init_image, image_size).type(dtype)
85
+
86
+ # Handle style blending weights for multiple style inputs
87
+ style_blend_weights = []
88
+ if params.style_blend_weights == None:
89
+ # Style blending not specified, so use equal weighting
90
+ for i in style_image_list:
91
+ style_blend_weights.append(1.0)
92
+ for i, blend_weights in enumerate(style_blend_weights):
93
+ style_blend_weights[i] = int(style_blend_weights[i])
94
+ else:
95
+ style_blend_weights = params.style_blend_weights.split(',')
96
+ assert len(style_blend_weights) == len(style_image_list), \
97
+ "-style_blend_weights and -style_images must have the same number of elements!"
98
+
99
+ # Normalize the style blending weights so they sum to 1
100
+ style_blend_sum = 0
101
+ for i, blend_weights in enumerate(style_blend_weights):
102
+ style_blend_weights[i] = float(style_blend_weights[i])
103
+ style_blend_sum = float(style_blend_sum) + style_blend_weights[i]
104
+ for i, blend_weights in enumerate(style_blend_weights):
105
+ style_blend_weights[i] = float(style_blend_weights[i]) / float(style_blend_sum)
106
+
107
+ content_layers = params.content_layers.split(',')
108
+ style_layers = params.style_layers.split(',')
109
+
110
+ # Set up the network, inserting style and content loss modules
111
+ cnn = copy.deepcopy(cnn)
112
+ content_losses, style_losses, tv_losses = [], [], []
113
+ next_content_idx, next_style_idx = 1, 1
114
+ net = nn.Sequential()
115
+ c, r = 0, 0
116
+ if params.tv_weight > 0:
117
+ tv_mod = TVLoss(params.tv_weight).type(dtype)
118
+ net.add_module(str(len(net)), tv_mod)
119
+ tv_losses.append(tv_mod)
120
+
121
+ for i, layer in enumerate(list(cnn), 1):
122
+ if next_content_idx <= len(content_layers) or next_style_idx <= len(style_layers):
123
+ if isinstance(layer, nn.Conv2d):
124
+ net.add_module(str(len(net)), layer)
125
+
126
+ if layerList['C'][c] in content_layers:
127
+ print("Setting up content layer " + str(i) + ": " + str(layerList['C'][c]))
128
+ loss_module = ContentLoss(params.content_weight, params.normalize_gradients)
129
+ net.add_module(str(len(net)), loss_module)
130
+ content_losses.append(loss_module)
131
+
132
+ if layerList['C'][c] in style_layers:
133
+ print("Setting up style layer " + str(i) + ": " + str(layerList['C'][c]))
134
+ loss_module = StyleLoss(params.style_weight, params.normalize_gradients)
135
+ net.add_module(str(len(net)), loss_module)
136
+ style_losses.append(loss_module)
137
+ c += 1
138
+
139
+ if isinstance(layer, nn.ReLU):
140
+ net.add_module(str(len(net)), layer)
141
+
142
+ if layerList['R'][r] in content_layers:
143
+ print("Setting up content layer " + str(i) + ": " + str(layerList['R'][r]))
144
+ loss_module = ContentLoss(params.content_weight, params.normalize_gradients)
145
+ net.add_module(str(len(net)), loss_module)
146
+ content_losses.append(loss_module)
147
+ next_content_idx += 1
148
+
149
+ if layerList['R'][r] in style_layers:
150
+ print("Setting up style layer " + str(i) + ": " + str(layerList['R'][r]))
151
+ loss_module = StyleLoss(params.style_weight, params.normalize_gradients)
152
+ net.add_module(str(len(net)), loss_module)
153
+ style_losses.append(loss_module)
154
+ next_style_idx += 1
155
+ r += 1
156
+
157
+ if isinstance(layer, nn.MaxPool2d) or isinstance(layer, nn.AvgPool2d):
158
+ net.add_module(str(len(net)), layer)
159
+
160
+ if multidevice:
161
+ net = setup_multi_device(net)
162
+
163
+ # Capture content targets
164
+ for i in content_losses:
165
+ i.mode = 'capture'
166
+ print("Capturing content targets")
167
+ print_torch(net, multidevice)
168
+ net(content_image)
169
+
170
+ # Capture style targets
171
+ for i in content_losses:
172
+ i.mode = 'None'
173
+
174
+ for i, image in enumerate(style_images_caffe):
175
+ print("Capturing style target " + str(i + 1))
176
+ for j in style_losses:
177
+ j.mode = 'capture'
178
+ j.blend_weight = style_blend_weights[i]
179
+ net(style_images_caffe[i])
180
+
181
+ # Set all loss modules to loss mode
182
+ for i in content_losses:
183
+ i.mode = 'loss'
184
+ for i in style_losses:
185
+ i.mode = 'loss'
186
+
187
+ # Maybe normalize content and style weights
188
+ if params.normalize_weights:
189
+ normalize_weights(content_losses, style_losses)
190
+
191
+ # Freeze the network in order to prevent
192
+ # unnecessary gradient calculations
193
+ for param in net.parameters():
194
+ param.requires_grad = False
195
+
196
+ # Initialize the image
197
+ if params.seed >= 0:
198
+ torch.manual_seed(params.seed)
199
+ torch.cuda.manual_seed_all(params.seed)
200
+ torch.backends.cudnn.deterministic = True
201
+ if params.init == 'random':
202
+ B, C, H, W = content_image.size()
203
+ img = torch.randn(C, H, W).mul(0.001).unsqueeze(0).type(dtype)
204
+ elif params.init == 'image':
205
+ if params.init_image != None:
206
+ img = init_image.clone()
207
+ else:
208
+ img = content_image.clone()
209
+ img = nn.Parameter(img)
210
+
211
+ def maybe_print(t, loss):
212
+ if params.print_iter > 0 and t % params.print_iter == 0:
213
+ print("Iteration " + str(t) + " / " + str(params.num_iterations))
214
+ for i, loss_module in enumerate(content_losses):
215
+ print(" Content " + str(i + 1) + " loss: " + str(loss_module.loss.item()))
216
+ for i, loss_module in enumerate(style_losses):
217
+ print(" Style " + str(i + 1) + " loss: " + str(loss_module.loss.item()))
218
+ print(" Total loss: " + str(loss.item()))
219
+
220
+ '''
221
+ def maybe_save(t):
222
+ should_save = params.save_iter > 0 and t % params.save_iter == 0
223
+ should_save = should_save or t == params.num_iterations
224
+ if should_save:
225
+ output_filename, file_extension = os.path.splitext(params.output_image)
226
+ if t == params.num_iterations:
227
+ filename = output_filename + str(file_extension)
228
+ else:
229
+ filename = str(output_filename) + "_" + str(t) + str(file_extension)
230
+ disp = deprocess(img.clone())
231
+
232
+ # Maybe perform postprocessing for color-independent style transfer
233
+ if params.original_colors == 1:
234
+ disp = original_colors(deprocess(content_image.clone()), disp)
235
+
236
+ disp.save(str(filename))
237
+ '''
238
+
239
+ def maybe_save(t):
240
+ should_save = params.save_iter > 0 and t % params.save_iter == 0
241
+ should_save = should_save or t == params.num_iterations
242
+ if should_save:
243
+ output_filename, file_extension = os.path.splitext(params.output_image)
244
+ if t == params.num_iterations:
245
+ filename = os.path.join('static', output_filename + file_extension)
246
+ else:
247
+ filename = os.path.join('static', output_filename + "_" + str(t) + file_extension)
248
+ disp = deprocess(img.clone())
249
+
250
+ # Maybe perform postprocessing for color-independent style transfer
251
+ if params.original_colors == 1:
252
+ disp = original_colors(deprocess(content_image.clone()), disp)
253
+
254
+ disp.save(filename)
255
+
256
+ # Function to evaluate loss and gradient. We run the net forward and
257
+ # backward to get the gradient, and sum up losses from the loss modules.
258
+ # optim.lbfgs internally handles iteration and calls this function many
259
+ # times, so we manually count the number of iterations to handle printing
260
+ # and saving intermediate results.
261
+ num_calls = [0]
262
+
263
+ def feval():
264
+ num_calls[0] += 1
265
+ optimizer.zero_grad()
266
+ net(img)
267
+ loss = 0
268
+
269
+ for mod in content_losses:
270
+ loss += mod.loss.to(backward_device)
271
+ for mod in style_losses:
272
+ loss += mod.loss.to(backward_device)
273
+ if params.tv_weight > 0:
274
+ for mod in tv_losses:
275
+ loss += mod.loss.to(backward_device)
276
+
277
+ loss.backward()
278
+
279
+ maybe_save(num_calls[0])
280
+ maybe_print(num_calls[0], loss)
281
+
282
+ return loss
283
+
284
+ optimizer, loopVal = setup_optimizer(img)
285
+ while num_calls[0] <= loopVal:
286
+ optimizer.step(feval)
287
+
288
+
289
+ # Configure the optimizer
290
+ def setup_optimizer(img):
291
+ if params.optimizer == 'lbfgs':
292
+ print("Running optimization with L-BFGS")
293
+ optim_state = {
294
+ 'max_iter': params.num_iterations,
295
+ 'tolerance_change': -1,
296
+ 'tolerance_grad': -1,
297
+ }
298
+ if params.lbfgs_num_correction != 100:
299
+ optim_state['history_size'] = params.lbfgs_num_correction
300
+ optimizer = optim.LBFGS([img], **optim_state)
301
+ loopVal = 1
302
+ elif params.optimizer == 'adam':
303
+ print("Running optimization with ADAM")
304
+ optimizer = optim.Adam([img], lr=params.learning_rate)
305
+ loopVal = params.num_iterations - 1
306
+ return optimizer, loopVal
307
+
308
+
309
+ def setup_gpu():
310
+ def setup_cuda():
311
+ if 'cudnn' in params.backend:
312
+ torch.backends.cudnn.enabled = True
313
+ if params.cudnn_autotune:
314
+ torch.backends.cudnn.benchmark = True
315
+ else:
316
+ torch.backends.cudnn.enabled = False
317
+
318
+ def setup_cpu():
319
+ if 'mkl' in params.backend and 'mkldnn' not in params.backend:
320
+ torch.backends.mkl.enabled = True
321
+ elif 'mkldnn' in params.backend:
322
+ raise ValueError("MKL-DNN is not supported yet.")
323
+ elif 'openmp' in params.backend:
324
+ torch.backends.openmp.enabled = True
325
+
326
+ multidevice = False
327
+ if "," in str(params.gpu):
328
+ devices = params.gpu.split(',')
329
+ multidevice = True
330
+
331
+ if 'c' in str(devices[0]).lower():
332
+ backward_device = "cpu"
333
+ setup_cuda(), setup_cpu()
334
+ else:
335
+ backward_device = "cuda:" + devices[0]
336
+ setup_cuda()
337
+ dtype = torch.FloatTensor
338
+
339
+ elif "c" not in str(params.gpu).lower():
340
+ setup_cuda()
341
+ dtype, backward_device = torch.cuda.FloatTensor, "cuda:" + str(params.gpu)
342
+ else:
343
+ setup_cpu()
344
+ dtype, backward_device = torch.FloatTensor, "cpu"
345
+ return dtype, multidevice, backward_device
346
+
347
+
348
+ def setup_multi_device(net):
349
+ assert len(params.gpu.split(',')) - 1 == len(params.multidevice_strategy.split(',')), \
350
+ "The number of -multidevice_strategy layer indices minus 1, must be equal to the number of -gpu devices."
351
+
352
+ new_net = ModelParallel(net, params.gpu, params.multidevice_strategy)
353
+ return new_net
354
+
355
+
356
+ # Preprocess an image before passing it to a model.
357
+ # We need to rescale from [0, 1] to [0, 255], convert from RGB to BGR,
358
+ # and subtract the mean pixel.
359
+ def preprocess(image_name, image_size):
360
+ image = Image.open(image_name).convert('RGB')
361
+ if type(image_size) is not tuple:
362
+ image_size = tuple([int((float(image_size) / max(image.size)) * x) for x in (image.height, image.width)])
363
+ Loader = transforms.Compose([transforms.Resize(image_size), transforms.ToTensor()])
364
+ rgb2bgr = transforms.Compose([transforms.Lambda(lambda x: x[torch.LongTensor([2, 1, 0])])])
365
+ Normalize = transforms.Compose([transforms.Normalize(mean=[103.939, 116.779, 123.68], std=[1, 1, 1])])
366
+ tensor = Normalize(rgb2bgr(Loader(image) * 255)).unsqueeze(0)
367
+ return tensor
368
+
369
+
370
+ # Undo the above preprocessing.
371
+ def deprocess(output_tensor):
372
+ Normalize = transforms.Compose([transforms.Normalize(mean=[-103.939, -116.779, -123.68], std=[1, 1, 1])])
373
+ bgr2rgb = transforms.Compose([transforms.Lambda(lambda x: x[torch.LongTensor([2, 1, 0])])])
374
+ output_tensor = bgr2rgb(Normalize(output_tensor.squeeze(0).cpu())) / 255
375
+ output_tensor.clamp_(0, 1)
376
+ Image2PIL = transforms.ToPILImage()
377
+ image = Image2PIL(output_tensor.cpu())
378
+ return image
379
+
380
+
381
+ # Combine the Y channel of the generated image and the UV/CbCr channels of the
382
+ # content image to perform color-independent style transfer.
383
+ def original_colors(content, generated):
384
+ content_channels = list(content.convert('YCbCr').split())
385
+ generated_channels = list(generated.convert('YCbCr').split())
386
+ content_channels[0] = generated_channels[0]
387
+ return Image.merge('YCbCr', content_channels).convert('RGB')
388
+
389
+
390
+ # Print like Lua/Torch7
391
+ def print_torch(net, multidevice):
392
+ if multidevice:
393
+ return
394
+ simplelist = ""
395
+ for i, layer in enumerate(net, 1):
396
+ simplelist = simplelist + "(" + str(i) + ") -> "
397
+ print("nn.Sequential ( \n [input -> " + simplelist + "output]")
398
+
399
+ def strip(x):
400
+ return str(x).replace(", ", ',').replace("(", '').replace(")", '') + ", "
401
+
402
+ def n():
403
+ return " (" + str(i) + "): " + "nn." + str(l).split("(", 1)[0]
404
+
405
+ for i, l in enumerate(net, 1):
406
+ if "2d" in str(l):
407
+ ks, st, pd = strip(l.kernel_size), strip(l.stride), strip(l.padding)
408
+ if "Conv2d" in str(l):
409
+ ch = str(l.in_channels) + " -> " + str(l.out_channels)
410
+ print(n() + "(" + ch + ", " + (ks).replace(",", 'x', 1) + st + pd.replace(", ", ')'))
411
+ elif "Pool2d" in str(l):
412
+ st = st.replace(" ", ' ') + st.replace(", ", ')')
413
+ print(n() + "(" + ((ks).replace(",", 'x' + ks, 1) + st).replace(", ", ','))
414
+ else:
415
+ print(n())
416
+ print(")")
417
+
418
+
419
+ # Divide weights by channel size
420
+ def normalize_weights(content_losses, style_losses):
421
+ for n, i in enumerate(content_losses):
422
+ i.strength = i.strength / max(i.target.size())
423
+ for n, i in enumerate(style_losses):
424
+ i.strength = i.strength / max(i.target.size())
425
+
426
+
427
+ # Scale gradients in the backward pass
428
+ class ScaleGradients(torch.autograd.Function):
429
+ @staticmethod
430
+ def forward(self, input_tensor, strength):
431
+ self.strength = strength
432
+ return input_tensor
433
+
434
+ @staticmethod
435
+ def backward(self, grad_output):
436
+ grad_input = grad_output.clone()
437
+ grad_input = grad_input / (torch.norm(grad_input, keepdim=True) + 1e-8)
438
+ return grad_input * self.strength * self.strength, None
439
+
440
+
441
+ # Define an nn Module to compute content loss
442
+ class ContentLoss(nn.Module):
443
+
444
+ def __init__(self, strength, normalize):
445
+ super(ContentLoss, self).__init__()
446
+ self.strength = strength
447
+ self.crit = nn.MSELoss()
448
+ self.mode = 'None'
449
+ self.normalize = normalize
450
+
451
+ def forward(self, input):
452
+ if self.mode == 'loss':
453
+ loss = self.crit(input, self.target)
454
+ if self.normalize:
455
+ loss = ScaleGradients.apply(loss, self.strength)
456
+ self.loss = loss * self.strength
457
+ elif self.mode == 'capture':
458
+ self.target = input.detach()
459
+ return input
460
+
461
+
462
+ class GramMatrix(nn.Module):
463
+
464
+ def forward(self, input):
465
+ B, C, H, W = input.size()
466
+ x_flat = input.view(C, H * W)
467
+ return torch.mm(x_flat, x_flat.t())
468
+
469
+
470
+ # Define an nn Module to compute style loss
471
+ class StyleLoss(nn.Module):
472
+
473
+ def __init__(self, strength, normalize):
474
+ super(StyleLoss, self).__init__()
475
+ self.target = torch.Tensor()
476
+ self.strength = strength
477
+ self.gram = GramMatrix()
478
+ self.crit = nn.MSELoss()
479
+ self.mode = 'None'
480
+ self.blend_weight = None
481
+ self.normalize = normalize
482
+
483
+ def forward(self, input):
484
+ self.G = self.gram(input)
485
+ self.G = self.G.div(input.nelement())
486
+ if self.mode == 'capture':
487
+ if self.blend_weight == None:
488
+ self.target = self.G.detach()
489
+ elif self.target.nelement() == 0:
490
+ self.target = self.G.detach().mul(self.blend_weight)
491
+ else:
492
+ self.target = self.target.add(self.blend_weight, self.G.detach())
493
+ elif self.mode == 'loss':
494
+ loss = self.crit(self.G, self.target)
495
+ if self.normalize:
496
+ loss = ScaleGradients.apply(loss, self.strength)
497
+ self.loss = self.strength * loss
498
+ return input
499
+
500
+
501
+ class TVLoss(nn.Module):
502
+
503
+ def __init__(self, strength):
504
+ super(TVLoss, self).__init__()
505
+ self.strength = strength
506
+
507
+ def forward(self, input):
508
+ self.x_diff = input[:, :, 1:, :] - input[:, :, :-1, :]
509
+ self.y_diff = input[:, :, :, 1:] - input[:, :, :, :-1]
510
+ self.loss = self.strength * (torch.sum(torch.abs(self.x_diff)) + torch.sum(torch.abs(self.y_diff)))
511
+ return input
512
+
513
+ # if __name__ == "__main__":
514
+ # main()