From c1f9afaeca5b8580f9843c392d1a69059f2b4b4d Mon Sep 17 00:00:00 2001 From: Todd Pataky <0todd0@gmail.com> Date: Thu, 30 May 2024 16:02:07 +0900 Subject: [PATCH] Fast SSE calculation Implement faster SSE calculation using Einstein summation (np.einsum); this greatly reduces test stat calculation time when Q gets large (Q > 10000) --- setup.py | 2 +- spm1d/__init__.py | 2 +- spm1d/stats/anova/models.py | 135 ++++++++++++++-------------- spm1d/stats/nonparam/calculators.py | 20 +++-- spm1d/stats/t.py | 8 +- 5 files changed, 88 insertions(+), 79 deletions(-) diff --git a/setup.py b/setup.py index 82b96f3a..0aefe183 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ setup( name = 'spm1d', - version = '0.4.29', + version = '0.4.30', description = 'One-Dimensional Statistical Parametric Mapping', author = 'Todd Pataky', author_email = 'spm1d.mail@gmail.com', diff --git a/spm1d/__init__.py b/spm1d/__init__.py index 59cafc95..6868beb2 100644 --- a/spm1d/__init__.py +++ b/spm1d/__init__.py @@ -20,7 +20,7 @@ ''' -__version__ = '0.4.29' # 2024-05-15 +__version__ = '0.4.30' # 2024-05-30 __all__ = ['data', 'io', 'plot', 'rft1d', 'stats', 'util'] diff --git a/spm1d/stats/anova/models.py b/spm1d/stats/anova/models.py index 5a1605ff..78532227 100644 --- a/spm1d/stats/anova/models.py +++ b/spm1d/stats/anova/models.py @@ -14,77 +14,78 @@ class LinearModel(object): - def __init__(self, Y, X, roi=None): - Y = np.asarray(Y, dtype=float) - self.dim = Y.ndim - 1 #dependent variable dimensionality (0 or 1) - self.Y = self._asmatrix(Y) #stacked dependent variable (JxQ) - self.X = np.asarray(X) #design matrix - self.J = self.X.shape[0] #number of observations - self.Q = self.Y.shape[1] #number of field nodes - self.QT = None #QR decomposition of design matrix - self.eij = None #residuals - self.roi = roi #regions of interest - # self.contrasts = contrasts #list of contrast objects - self._R = None #residual forming matrix - self._beta = None #least-squares parameters - self._rankR = None #error degrees of freedom - self._dfE = None #error degrees of freedom (equivalent to _rankR) - self._SSE = None #sum-of-squares (error) - self._MSE = None #mean-squared error - if self.dim==1: - self.eij = None - self.fwhm = None - self.resels = None - ### labels: - self.term_labels = None - self.Fterms = None + def __init__(self, Y, X, roi=None): + Y = np.asarray(Y, dtype=float) + self.dim = Y.ndim - 1 #dependent variable dimensionality (0 or 1) + self.Y = self._asmatrix(Y) #stacked dependent variable (JxQ) + self.X = np.asarray(X) #design matrix + self.J = self.X.shape[0] #number of observations + self.Q = self.Y.shape[1] #number of field nodes + self.QT = None #QR decomposition of design matrix + self.eij = None #residuals + self.roi = roi #regions of interest + # self.contrasts = contrasts #list of contrast objects + self._R = None #residual forming matrix + self._beta = None #least-squares parameters + self._rankR = None #error degrees of freedom + self._dfE = None #error degrees of freedom (equivalent to _rankR) + self._SSE = None #sum-of-squares (error) + self._MSE = None #mean-squared error + if self.dim==1: + self.eij = None + self.fwhm = None + self.resels = None + ### labels: + self.term_labels = None + self.Fterms = None # def _asmatrix(self, Y): # return np.matrix(Y).T if Y.ndim==1 else np.matrix(Y) - def _asmatrix(self, Y, dtype=float): - return np.asarray([Y], dtype=dtype).T if Y.ndim==1 else np.asarray(Y, dtype=dtype) + def _asmatrix(self, Y, dtype=float): + return np.asarray([Y], dtype=dtype).T if Y.ndim==1 else np.asarray(Y, dtype=dtype) - def _rank(self, A, tol=None): - ''' - This is a slight modification of np.linalg.matrix_rank. - The tolerance performs poorly for some matrices - Here the tolerance is boosted by a factor of ten for improved performance. - ''' - M = np.asarray(A) - S = np.linalg.svd(M, compute_uv=False) - if tol is None: - tol = 10 * S.max() * max(M.shape) * np.finfo(M.dtype).eps - rank = sum(S > tol) - return rank + def _rank(self, A, tol=None): + ''' + This is a slight modification of np.linalg.matrix_rank. + The tolerance performs poorly for some matrices + Here the tolerance is boosted by a factor of ten for improved performance. + ''' + M = np.asarray(A) + S = np.linalg.svd(M, compute_uv=False) + if tol is None: + tol = 10 * S.max() * max(M.shape) * np.finfo(M.dtype).eps + rank = sum(S > tol) + return rank - def fit(self, approx_residuals=None): - Y,X,J = self.Y, self.X, self.J - Xi = np.linalg.pinv(X) #design matrix pseudoinverse - self._beta = Xi @ Y #estimated parameters - self._R = np.eye(J) - X @ Xi #residual forming matrix - self._rankR = self._rank(self._R) - self._SSE = np.diag( Y.T @ self._R @ Y ) - self._dfE = self._rankR - if self._dfE > eps: - self._MSE = self._SSE / self._dfE - if approx_residuals is None: - self.eij = np.asarray(self.Y - X@self._beta) #residuals - else: - C = approx_residuals - A = X @ C.T - Ai = np.linalg.pinv(A) - beta = Ai@Y - self.eij = np.asarray(Y - A@beta) #approximate residuals - if self.dim==1: - self.fwhm = rft1d.geom.estimate_fwhm(self.eij) #smoothness - ### compute resel counts: - if self.roi is None: - self.resels = rft1d.geom.resel_counts(self.eij, self.fwhm, element_based=False) #resel - else: - B = np.any( np.isnan(self.eij), axis=0) #node is true if NaN - B = np.logical_and(np.logical_not(B), self.roi) #node is true if in ROI and also not NaN - mask = np.logical_not(B) #true for masked-out regions - self.resels = rft1d.geom.resel_counts(mask, self.fwhm, element_based=False) #resel - self.QT = np.linalg.qr(X)[0].T + def fit(self, approx_residuals=None): + Y,X,J = self.Y, self.X, self.J + Xi = np.linalg.pinv(X) #design matrix pseudoinverse + self._beta = Xi @ Y #estimated parameters + self._R = np.eye(J) - X @ Xi #residual forming matrix + self._rankR = self._rank(self._R) + # self._SSE = np.diag( Y.T @ self._R @ Y ) # old SSE calculation + self._SSE = np.einsum('ij,ji->i', Y.T @ self._R, Y) # new SSE calculation, 2024-05-30 (using Einstein summation trick) + self._dfE = self._rankR + if self._dfE > eps: + self._MSE = self._SSE / self._dfE + if approx_residuals is None: + self.eij = np.asarray(self.Y - X@self._beta) #residuals + else: + C = approx_residuals + A = X @ C.T + Ai = np.linalg.pinv(A) + beta = Ai@Y + self.eij = np.asarray(Y - A@beta) #approximate residuals + if self.dim==1: + self.fwhm = rft1d.geom.estimate_fwhm(self.eij) #smoothness + ### compute resel counts: + if self.roi is None: + self.resels = rft1d.geom.resel_counts(self.eij, self.fwhm, element_based=False) #resel + else: + B = np.any( np.isnan(self.eij), axis=0) #node is true if NaN + B = np.logical_and(np.logical_not(B), self.roi) #node is true if in ROI and also not NaN + mask = np.logical_not(B) #true for masked-out regions + self.resels = rft1d.geom.resel_counts(mask, self.fwhm, element_based=False) #resel + self.QT = np.linalg.qr(X)[0].T diff --git a/spm1d/stats/nonparam/calculators.py b/spm1d/stats/nonparam/calculators.py index 2e045846..37ca7818 100644 --- a/spm1d/stats/nonparam/calculators.py +++ b/spm1d/stats/nonparam/calculators.py @@ -195,14 +195,18 @@ def get_test_stat(self, y): class CalculatorRegress1D(CalculatorRegress0D): - def get_test_stat(self, y): - Y = np.matrix(y) - b = self.Xi * Y #parameters - eij = Y - self.X*b #residuals - R = eij.T*eij #residual sum of squares - sigma2 = np.diag(R) / self.df #variance - t = np.array(self.c.T*b).flatten() / ((sigma2*self.cXXc)**0.5 + eps) - return t + def get_test_stat(self, y): + Y = np.matrix(y) + b = self.Xi * Y #parameters + eij = Y - self.X*b #residuals + # # previous sigma2 calculation (slow when Q get large: about 5 ms for Q=1000 and about 900 ms for Q=10000! ) + # R = eij.T@eij #residuals: sum of squares + # sigma2 = np.diag(R)/df #variance + # new sigam2 calculation (using eigensum trick) + diagR = np.einsum('ij,ji->i', eij.T, eij) # residual sum of squares (eigensum trick) + sigma2 = diagR / df #variance + t = np.array(self.c.T*b).flatten() / ((sigma2*self.cXXc)**0.5 + eps) + return t class CalculatorCCA0D(object): diff --git a/spm1d/stats/t.py b/spm1d/stats/t.py index 48a22103..cc646caa 100644 --- a/spm1d/stats/t.py +++ b/spm1d/stats/t.py @@ -50,9 +50,13 @@ def glm(Y, X, c, Q=None, roi=None): ### solve the GLM: b = np.linalg.pinv(X) @ Y #parameters eij = Y - X@b #residuals - R = eij.T@eij #residuals: sum of squares df = Y.shape[0] - rank(X) #degrees of freedom - sigma2 = np.diag(R)/df #variance + # # previous sigma2 calculation (slow when Q get large: about 5 ms for Q=1000 and about 900 ms for Q=10000! ) + # R = eij.T@eij #residuals: sum of squares + # sigma2 = np.diag(R)/df #variance + # new sigam2 calculation (using Einstein summation trick) + diagR = np.einsum('ij,ji->i', eij.T, eij) # residual sum of squares (eigensum trick) + sigma2 = diagR / df #variance ### compute t statistic cXXc = c.T @ np.linalg.inv(X.T@X) @ c cXXc = float(cXXc[0,0]) if isinstance(cXXc, np.ndarray) else float(cXXc)