Skip to content

Commit

Permalink
Fast SSE calculation
Browse files Browse the repository at this point in the history
Implement faster SSE calculation using Einstein summation (np.einsum);  this greatly reduces test stat calculation time when Q gets large (Q > 10000)
  • Loading branch information
0todd0000 committed May 30, 2024
1 parent 9cec2cf commit c1f9afa
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 79 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

setup(
name = 'spm1d',
version = '0.4.29',
version = '0.4.30',
description = 'One-Dimensional Statistical Parametric Mapping',
author = 'Todd Pataky',
author_email = '[email protected]',
Expand Down
2 changes: 1 addition & 1 deletion spm1d/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
'''


__version__ = '0.4.29' # 2024-05-15
__version__ = '0.4.30' # 2024-05-30


__all__ = ['data', 'io', 'plot', 'rft1d', 'stats', 'util']
Expand Down
135 changes: 68 additions & 67 deletions spm1d/stats/anova/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,77 +14,78 @@


class LinearModel(object):
def __init__(self, Y, X, roi=None):
Y = np.asarray(Y, dtype=float)
self.dim = Y.ndim - 1 #dependent variable dimensionality (0 or 1)
self.Y = self._asmatrix(Y) #stacked dependent variable (JxQ)
self.X = np.asarray(X) #design matrix
self.J = self.X.shape[0] #number of observations
self.Q = self.Y.shape[1] #number of field nodes
self.QT = None #QR decomposition of design matrix
self.eij = None #residuals
self.roi = roi #regions of interest
# self.contrasts = contrasts #list of contrast objects
self._R = None #residual forming matrix
self._beta = None #least-squares parameters
self._rankR = None #error degrees of freedom
self._dfE = None #error degrees of freedom (equivalent to _rankR)
self._SSE = None #sum-of-squares (error)
self._MSE = None #mean-squared error
if self.dim==1:
self.eij = None
self.fwhm = None
self.resels = None
### labels:
self.term_labels = None
self.Fterms = None
def __init__(self, Y, X, roi=None):
Y = np.asarray(Y, dtype=float)
self.dim = Y.ndim - 1 #dependent variable dimensionality (0 or 1)
self.Y = self._asmatrix(Y) #stacked dependent variable (JxQ)
self.X = np.asarray(X) #design matrix
self.J = self.X.shape[0] #number of observations
self.Q = self.Y.shape[1] #number of field nodes
self.QT = None #QR decomposition of design matrix
self.eij = None #residuals
self.roi = roi #regions of interest
# self.contrasts = contrasts #list of contrast objects
self._R = None #residual forming matrix
self._beta = None #least-squares parameters
self._rankR = None #error degrees of freedom
self._dfE = None #error degrees of freedom (equivalent to _rankR)
self._SSE = None #sum-of-squares (error)
self._MSE = None #mean-squared error
if self.dim==1:
self.eij = None
self.fwhm = None
self.resels = None
### labels:
self.term_labels = None
self.Fterms = None

# def _asmatrix(self, Y):
# return np.matrix(Y).T if Y.ndim==1 else np.matrix(Y)
def _asmatrix(self, Y, dtype=float):
return np.asarray([Y], dtype=dtype).T if Y.ndim==1 else np.asarray(Y, dtype=dtype)
def _asmatrix(self, Y, dtype=float):
return np.asarray([Y], dtype=dtype).T if Y.ndim==1 else np.asarray(Y, dtype=dtype)

def _rank(self, A, tol=None):
'''
This is a slight modification of np.linalg.matrix_rank.
The tolerance performs poorly for some matrices
Here the tolerance is boosted by a factor of ten for improved performance.
'''
M = np.asarray(A)
S = np.linalg.svd(M, compute_uv=False)
if tol is None:
tol = 10 * S.max() * max(M.shape) * np.finfo(M.dtype).eps
rank = sum(S > tol)
return rank
def _rank(self, A, tol=None):
'''
This is a slight modification of np.linalg.matrix_rank.
The tolerance performs poorly for some matrices
Here the tolerance is boosted by a factor of ten for improved performance.
'''
M = np.asarray(A)
S = np.linalg.svd(M, compute_uv=False)
if tol is None:
tol = 10 * S.max() * max(M.shape) * np.finfo(M.dtype).eps
rank = sum(S > tol)
return rank

def fit(self, approx_residuals=None):
Y,X,J = self.Y, self.X, self.J
Xi = np.linalg.pinv(X) #design matrix pseudoinverse
self._beta = Xi @ Y #estimated parameters
self._R = np.eye(J) - X @ Xi #residual forming matrix
self._rankR = self._rank(self._R)
self._SSE = np.diag( Y.T @ self._R @ Y )
self._dfE = self._rankR
if self._dfE > eps:
self._MSE = self._SSE / self._dfE
if approx_residuals is None:
self.eij = np.asarray(self.Y - X@self._beta) #residuals
else:
C = approx_residuals
A = X @ C.T
Ai = np.linalg.pinv(A)
beta = Ai@Y
self.eij = np.asarray(Y - A@beta) #approximate residuals
if self.dim==1:
self.fwhm = rft1d.geom.estimate_fwhm(self.eij) #smoothness
### compute resel counts:
if self.roi is None:
self.resels = rft1d.geom.resel_counts(self.eij, self.fwhm, element_based=False) #resel
else:
B = np.any( np.isnan(self.eij), axis=0) #node is true if NaN
B = np.logical_and(np.logical_not(B), self.roi) #node is true if in ROI and also not NaN
mask = np.logical_not(B) #true for masked-out regions
self.resels = rft1d.geom.resel_counts(mask, self.fwhm, element_based=False) #resel
self.QT = np.linalg.qr(X)[0].T
def fit(self, approx_residuals=None):
Y,X,J = self.Y, self.X, self.J
Xi = np.linalg.pinv(X) #design matrix pseudoinverse
self._beta = Xi @ Y #estimated parameters
self._R = np.eye(J) - X @ Xi #residual forming matrix
self._rankR = self._rank(self._R)
# self._SSE = np.diag( Y.T @ self._R @ Y ) # old SSE calculation
self._SSE = np.einsum('ij,ji->i', Y.T @ self._R, Y) # new SSE calculation, 2024-05-30 (using Einstein summation trick)
self._dfE = self._rankR
if self._dfE > eps:
self._MSE = self._SSE / self._dfE
if approx_residuals is None:
self.eij = np.asarray(self.Y - X@self._beta) #residuals
else:
C = approx_residuals
A = X @ C.T
Ai = np.linalg.pinv(A)
beta = Ai@Y
self.eij = np.asarray(Y - A@beta) #approximate residuals
if self.dim==1:
self.fwhm = rft1d.geom.estimate_fwhm(self.eij) #smoothness
### compute resel counts:
if self.roi is None:
self.resels = rft1d.geom.resel_counts(self.eij, self.fwhm, element_based=False) #resel
else:
B = np.any( np.isnan(self.eij), axis=0) #node is true if NaN
B = np.logical_and(np.logical_not(B), self.roi) #node is true if in ROI and also not NaN
mask = np.logical_not(B) #true for masked-out regions
self.resels = rft1d.geom.resel_counts(mask, self.fwhm, element_based=False) #resel
self.QT = np.linalg.qr(X)[0].T


20 changes: 12 additions & 8 deletions spm1d/stats/nonparam/calculators.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,14 +195,18 @@ def get_test_stat(self, y):


class CalculatorRegress1D(CalculatorRegress0D):
def get_test_stat(self, y):
Y = np.matrix(y)
b = self.Xi * Y #parameters
eij = Y - self.X*b #residuals
R = eij.T*eij #residual sum of squares
sigma2 = np.diag(R) / self.df #variance
t = np.array(self.c.T*b).flatten() / ((sigma2*self.cXXc)**0.5 + eps)
return t
def get_test_stat(self, y):
Y = np.matrix(y)
b = self.Xi * Y #parameters
eij = Y - self.X*b #residuals
# # previous sigma2 calculation (slow when Q get large: about 5 ms for Q=1000 and about 900 ms for Q=10000! )
# R = eij.T@eij #residuals: sum of squares
# sigma2 = np.diag(R)/df #variance
# new sigam2 calculation (using eigensum trick)
diagR = np.einsum('ij,ji->i', eij.T, eij) # residual sum of squares (eigensum trick)
sigma2 = diagR / df #variance
t = np.array(self.c.T*b).flatten() / ((sigma2*self.cXXc)**0.5 + eps)
return t


class CalculatorCCA0D(object):
Expand Down
8 changes: 6 additions & 2 deletions spm1d/stats/t.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,13 @@ def glm(Y, X, c, Q=None, roi=None):
### solve the GLM:
b = np.linalg.pinv(X) @ Y #parameters
eij = Y - X@b #residuals
R = eij.T@eij #residuals: sum of squares
df = Y.shape[0] - rank(X) #degrees of freedom
sigma2 = np.diag(R)/df #variance
# # previous sigma2 calculation (slow when Q get large: about 5 ms for Q=1000 and about 900 ms for Q=10000! )
# R = eij.T@eij #residuals: sum of squares
# sigma2 = np.diag(R)/df #variance
# new sigam2 calculation (using Einstein summation trick)
diagR = np.einsum('ij,ji->i', eij.T, eij) # residual sum of squares (eigensum trick)
sigma2 = diagR / df #variance
### compute t statistic
cXXc = c.T @ np.linalg.inv(X.T@X) @ c
cXXc = float(cXXc[0,0]) if isinstance(cXXc, np.ndarray) else float(cXXc)
Expand Down

0 comments on commit c1f9afa

Please sign in to comment.