-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathsn.py
238 lines (210 loc) · 9.62 KB
/
sn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
# spectral normalization
# https://github.com/pfnet-research/sngan_projection
import chainer
import numpy as np
from chainer import cuda
from chainer.functions.array.broadcast import broadcast_to
from chainer.functions.connection import convolution_2d
from chainer.links.connection.convolution_2d import Convolution2D
from chainer.functions.connection import linear
from chainer.links.connection.linear import Linear
import chainer.functions as F
def _l2normalize(v, eps=1e-12):
norm = cuda.reduce('T x', 'T out',
'x * x', 'a + b', 'out = sqrt(a)', 0,
'norm_sn')
div = cuda.elementwise('T x, T norm, T eps',
'T out',
'out = x / (norm + eps)',
'div_sn')
return div(v, norm(v), eps)
def max_singular_value(W, u=None, Ip=1):
"""
Apply power iteration for the weight parameter
"""
if not Ip >= 1:
raise ValueError("The number of power iterations should be positive integer")
xp = cuda.get_array_module(W.data)
if u is None:
u = xp.random.normal(size=(1, W.shape[0])).astype(xp.float32)
_u = u
for _ in range(Ip):
_v = _l2normalize(xp.dot(_u, W.data), eps=1e-12)
_u = _l2normalize(xp.dot(_v, W.data.transpose()), eps=1e-12)
sigma = F.sum(F.linear(_u, F.transpose(W)) * _v)
return sigma, _u, _v
def max_singular_value_fully_differentiable(W, u=None, Ip=1):
"""
Apply power iteration for the weight parameter (fully differentiable version)
"""
if not Ip >= 1:
raise ValueError("The number of power iterations should be positive integer")
xp = cuda.get_array_module(W.data)
if u is None:
u = xp.random.normal(size=(1, W.shape[0])).astype(xp.float32)
_u = u
for _ in range(Ip):
_v = F.normalize(F.matmul(_u, W), eps=1e-12)
_u = F.normalize(F.matmul(_v, F.transpose(W)), eps=1e-12)
_u = F.matmul(_v, F.transpose(W))
norm = F.sqrt(F.sum(_u ** 2))
return norm, _l2normalize(_u.data), _v
class SNConvolution2D(Convolution2D):
"""Two-dimensional convolutional layer with spectral normalization.
This link wraps the :func:`~chainer.functions.convolution_2d` function and
holds the filter weight and bias vector as parameters.
Args:
in_channels (int): Number of channels of input arrays. If ``None``,
parameter initialization will be deferred until the first forward
datasets pass at which time the size will be determined.
out_channels (int): Number of channels of output arrays.
ksize (int or pair of ints): Size of filters (a.k.a. kernels).
``ksize=k`` and ``ksize=(k, k)`` are equivalent.
stride (int or pair of ints): Stride of filter applications.
``stride=s`` and ``stride=(s, s)`` are equivalent.
pad (int or pair of ints): Spatial padding width for input arrays.
``pad=p`` and ``pad=(p, p)`` are equivalent.
wscale (float): Scaling factor of the initial weight.
bias (float): Initial bias value.
nobias (bool): If ``True``, then this link does not use the bias term.
initialW (4-D array): Initial weight value. If ``None``, then this
function uses to initialize ``wscale``.
May also be a callable that takes ``numpy.ndarray`` or
``cupy.ndarray`` and edits its value.
initial_bias (1-D array): Initial bias value. If ``None``, then this
function uses to initialize ``bias``.
May also be a callable that takes ``numpy.ndarray`` or
``cupy.ndarray`` and edits its value.
use_gamma (bool): If true, apply scalar multiplication to the
normalized weight (i.e. reparameterize).
Ip (int): The number of power iteration for calculating the spcetral
norm of the weights.
factor (float) : constant factor to adjust spectral norm of W_bar.
.. seealso::
See :func:`chainer.functions.convolution_2d` for the definition of
two-dimensional convolution.
Attributes:
W (~chainer.Variable): Weight parameter.
W_bar (~chainer.Variable): Spectrally normalized weight parameter.
b (~chainer.Variable): Bias parameter.
u (~numpy.array): Current estimation of the right largest singular vector of W.
(optional) gamma (~chainer.Variable): the multiplier parameter.
(optional) factor (float): constant factor to adjust spectral norm of W_bar.
"""
def __init__(self, in_channels, out_channels, ksize, stride=1, pad=0,
nobias=False, initialW=None, initial_bias=None, use_gamma=False, Ip=1, factor=None):
self.Ip = Ip
self.use_gamma = use_gamma
self.factor = factor
super(SNConvolution2D, self).__init__(
in_channels, out_channels, ksize, stride, pad,
nobias, initialW, initial_bias)
self.u = np.random.normal(size=(1, out_channels)).astype(dtype="f")
self.register_persistent('u')
@property
def W_bar(self):
"""
Spectrally Normalized Weight
"""
W_mat = self.W.reshape(self.W.shape[0], -1)
sigma, _u, _ = max_singular_value(W_mat, self.u, self.Ip)
if self.factor:
sigma = sigma / self.factor
sigma = broadcast_to(sigma.reshape((1, 1, 1, 1)), self.W.shape)
if chainer.config.train:
# Update estimated 1st singular vector
self.u[:] = _u
if hasattr(self, 'gamma'):
return broadcast_to(self.gamma, self.W.shape) * self.W / sigma
else:
return self.W / sigma
def _initialize_params(self, in_size):
super(SNConvolution2D, self)._initialize_params(in_size)
if self.use_gamma:
W_mat = self.W.data.reshape(self.W.shape[0], -1)
_, s, _ = np.linalg.svd(W_mat)
with self.init_scope():
self.gamma = chainer.Parameter(s[0], (1, 1, 1, 1))
def __call__(self, x):
"""Applies the convolution layer.
Args:
x (~chainer.Variable): Input image.
Returns:
~chainer.Variable: Output of the convolution.
"""
if self.W.data is None:
self._initialize_params(x.shape[1])
return convolution_2d.convolution_2d(
x, self.W_bar, self.b, self.stride, self.pad)
class SNLinear(Linear):
"""Linear layer with Spectral Normalization.
Args:
in_size (int): Dimension of input vectors. If ``None``, parameter
initialization will be deferred until the first forward datasets pass
at which time the size will be determined.
out_size (int): Dimension of output vectors.
wscale (float): Scaling factor of the weight matrix.
bias (float): Initial bias value.
nobias (bool): If ``True``, then this function does not use the bias.
initialW (2-D array): Initial weight value. If ``None``, then this
function uses to initialize ``wscale``.
May also be a callable that takes ``numpy.ndarray`` or
``cupy.ndarray`` and edits its value.
initial_bias (1-D array): Initial bias value. If ``None``, then this
function uses to initialize ``bias``.
May also be a callable that takes ``numpy.ndarray`` or
``cupy.ndarray`` and edits its value.
use_gamma (bool): If true, apply scalar multiplication to the
normalized weight (i.e. reparameterize).
Ip (int): The number of power iteration for calculating the spcetral
norm of the weights.
factor (float) : constant factor to adjust spectral norm of W_bar.
.. seealso:: :func:`~chainer.functions.linear`
Attributes:
W (~chainer.Variable): Weight parameter.
W_bar (~chainer.Variable): Spectrally normalized weight parameter.
b (~chainer.Variable): Bias parameter.
u (~numpy.array): Current estimation of the right largest singular vector of W.
(optional) gamma (~chainer.Variable): the multiplier parameter.
(optional) factor (float): constant factor to adjust spectral norm of W_bar.
"""
def __init__(self, in_size, out_size, use_gamma=False, nobias=False,
initialW=None, initial_bias=None, Ip=1, factor=None):
self.Ip = Ip
self.use_gamma = use_gamma
self.factor = factor
super(SNLinear, self).__init__(
in_size, out_size, nobias, initialW, initial_bias
)
self.u = np.random.normal(size=(1, out_size)).astype(dtype="f")
self.register_persistent('u')
@property
def W_bar(self):
"""
Spectral Normalized Weight
"""
sigma, _u, _ = max_singular_value(self.W, self.u, self.Ip)
if self.factor:
sigma = sigma / self.factor
sigma = broadcast_to(sigma.reshape((1, 1)), self.W.shape)
self.u[:] = _u
if hasattr(self, 'gamma'):
return broadcast_to(self.gamma, self.W.shape) * self.W / sigma
else:
return self.W / sigma
def _initialize_params(self, in_size):
super(SNLinear, self)._initialize_params(in_size)
if self.use_gamma:
_, s, _ = np.linalg.svd(self.W.data)
with self.init_scope():
self.gamma = chainer.Parameter(s[0], (1, 1))
def __call__(self, x):
"""Applies the linear layer.
Args:
x (~chainer.Variable): Batch of input vectors.
Returns:
~chainer.Variable: Output of the linear layer.
"""
if self.W.data is None:
self._initialize_params(x.size // x.shape[0])
return linear.linear(x, self.W_bar, self.b)