From c1f9afaeca5b8580f9843c392d1a69059f2b4b4d Mon Sep 17 00:00:00 2001
From: Todd Pataky <0todd0@gmail.com>
Date: Thu, 30 May 2024 16:02:07 +0900
Subject: [PATCH] Fast SSE calculation

Implement faster SSE calculation using Einstein summation (np.einsum);  this greatly reduces test stat calculation time when Q gets large (Q > 10000)
---
 setup.py                            |   2 +-
 spm1d/__init__.py                   |   2 +-
 spm1d/stats/anova/models.py         | 135 ++++++++++++++--------------
 spm1d/stats/nonparam/calculators.py |  20 +++--
 spm1d/stats/t.py                    |   8 +-
 5 files changed, 88 insertions(+), 79 deletions(-)

diff --git a/setup.py b/setup.py
index 82b96f3a..0aefe183 100644
--- a/setup.py
+++ b/setup.py
@@ -12,7 +12,7 @@
 
 setup(
 	name             = 'spm1d',
-	version          = '0.4.29',
+	version          = '0.4.30',
 	description      = 'One-Dimensional Statistical Parametric Mapping',
 	author           = 'Todd Pataky',
 	author_email     = 'spm1d.mail@gmail.com',
diff --git a/spm1d/__init__.py b/spm1d/__init__.py
index 59cafc95..6868beb2 100644
--- a/spm1d/__init__.py
+++ b/spm1d/__init__.py
@@ -20,7 +20,7 @@
 '''
 
 
-__version__ = '0.4.29'  # 2024-05-15
+__version__ = '0.4.30'  # 2024-05-30
 
 
 __all__ = ['data', 'io', 'plot', 'rft1d', 'stats', 'util']
diff --git a/spm1d/stats/anova/models.py b/spm1d/stats/anova/models.py
index 5a1605ff..78532227 100644
--- a/spm1d/stats/anova/models.py
+++ b/spm1d/stats/anova/models.py
@@ -14,77 +14,78 @@
 
 
 class LinearModel(object):
-	def __init__(self, Y, X, roi=None):
-		Y               = np.asarray(Y, dtype=float)
-		self.dim        = Y.ndim - 1            #dependent variable dimensionality (0 or 1)
-		self.Y          = self._asmatrix(Y)     #stacked dependent variable (JxQ)
-		self.X          = np.asarray(X)         #design matrix
-		self.J          = self.X.shape[0]       #number of observations
-		self.Q          = self.Y.shape[1]       #number of field nodes
-		self.QT         = None                  #QR decomposition of design matrix
-		self.eij        = None                  #residuals
-		self.roi        = roi                   #regions of interest
-		# self.contrasts  = contrasts             #list of contrast objects
-		self._R         = None                  #residual forming matrix
-		self._beta      = None                  #least-squares parameters
-		self._rankR     = None                  #error degrees of freedom
-		self._dfE       = None                  #error degrees of freedom (equivalent to _rankR)
-		self._SSE       = None                  #sum-of-squares (error)
-		self._MSE       = None                  #mean-squared error
-		if self.dim==1:
-			self.eij    = None
-			self.fwhm   = None
-			self.resels = None
-		### labels:
-		self.term_labels = None
-		self.Fterms      = None
+    def __init__(self, Y, X, roi=None):
+        Y               = np.asarray(Y, dtype=float)
+        self.dim        = Y.ndim - 1            #dependent variable dimensionality (0 or 1)
+        self.Y          = self._asmatrix(Y)     #stacked dependent variable (JxQ)
+        self.X          = np.asarray(X)         #design matrix
+        self.J          = self.X.shape[0]       #number of observations
+        self.Q          = self.Y.shape[1]       #number of field nodes
+        self.QT         = None                  #QR decomposition of design matrix
+        self.eij        = None                  #residuals
+        self.roi        = roi                   #regions of interest
+        # self.contrasts  = contrasts             #list of contrast objects
+        self._R         = None                  #residual forming matrix
+        self._beta      = None                  #least-squares parameters
+        self._rankR     = None                  #error degrees of freedom
+        self._dfE       = None                  #error degrees of freedom (equivalent to _rankR)
+        self._SSE       = None                  #sum-of-squares (error)
+        self._MSE       = None                  #mean-squared error
+        if self.dim==1:
+            self.eij    = None
+            self.fwhm   = None
+            self.resels = None
+        ### labels:
+        self.term_labels = None
+        self.Fterms      = None
 
     # def _asmatrix(self, Y):
     #     return np.matrix(Y).T if Y.ndim==1 else np.matrix(Y)
-	def _asmatrix(self, Y, dtype=float):
-		return np.asarray([Y], dtype=dtype).T if Y.ndim==1 else np.asarray(Y, dtype=dtype)
+    def _asmatrix(self, Y, dtype=float):
+        return np.asarray([Y], dtype=dtype).T if Y.ndim==1 else np.asarray(Y, dtype=dtype)
 
-	def _rank(self, A, tol=None):
-		'''
-		This is a slight modification of np.linalg.matrix_rank.
-		The tolerance performs poorly for some matrices
-		Here the tolerance is boosted by a factor of ten for improved performance.
-		'''
-		M = np.asarray(A)
-		S = np.linalg.svd(M, compute_uv=False)
-		if tol is None:
-			tol = 10 * S.max() * max(M.shape) * np.finfo(M.dtype).eps
-		rank = sum(S > tol)
-		return rank
+    def _rank(self, A, tol=None):
+        '''
+        This is a slight modification of np.linalg.matrix_rank.
+        The tolerance performs poorly for some matrices
+        Here the tolerance is boosted by a factor of ten for improved performance.
+        '''
+        M = np.asarray(A)
+        S = np.linalg.svd(M, compute_uv=False)
+        if tol is None:
+            tol = 10 * S.max() * max(M.shape) * np.finfo(M.dtype).eps
+        rank = sum(S > tol)
+        return rank
 
-	def fit(self, approx_residuals=None):
-		Y,X,J           = self.Y, self.X, self.J
-		Xi              = np.linalg.pinv(X)         #design matrix pseudoinverse
-		self._beta      = Xi @ Y                      #estimated parameters
-		self._R         = np.eye(J) - X @ Xi          #residual forming matrix
-		self._rankR     = self._rank(self._R)
-		self._SSE       = np.diag( Y.T @ self._R @ Y )
-		self._dfE       = self._rankR
-		if self._dfE > eps:
-			self._MSE = self._SSE / self._dfE
-		if approx_residuals is None:
-			self.eij    = np.asarray(self.Y - X@self._beta)  #residuals
-		else:
-			C           = approx_residuals
-			A           = X @ C.T
-			Ai          = np.linalg.pinv(A)
-			beta        = Ai@Y
-			self.eij    = np.asarray(Y - A@beta)  #approximate residuals
-		if self.dim==1:
-			self.fwhm   = rft1d.geom.estimate_fwhm(self.eij)            #smoothness
-			### compute resel counts:
-			if self.roi is None:
-				self.resels = rft1d.geom.resel_counts(self.eij, self.fwhm, element_based=False) #resel
-			else:
-				B      = np.any( np.isnan(self.eij), axis=0)  #node is true if NaN
-				B      = np.logical_and(np.logical_not(B), self.roi)  #node is true if in ROI and also not NaN
-				mask   = np.logical_not(B)  #true for masked-out regions
-				self.resels = rft1d.geom.resel_counts(mask, self.fwhm, element_based=False) #resel
-		self.QT         = np.linalg.qr(X)[0].T
+    def fit(self, approx_residuals=None):
+        Y,X,J           = self.Y, self.X, self.J
+        Xi              = np.linalg.pinv(X)         #design matrix pseudoinverse
+        self._beta      = Xi @ Y                      #estimated parameters
+        self._R         = np.eye(J) - X @ Xi          #residual forming matrix
+        self._rankR     = self._rank(self._R)
+        # self._SSE       = np.diag( Y.T @ self._R @ Y )  # old SSE calculation
+        self._SSE       = np.einsum('ij,ji->i', Y.T @ self._R, Y) # new SSE calculation, 2024-05-30 (using Einstein summation trick)
+        self._dfE       = self._rankR
+        if self._dfE > eps:
+            self._MSE = self._SSE / self._dfE
+        if approx_residuals is None:
+            self.eij    = np.asarray(self.Y - X@self._beta)  #residuals
+        else:
+            C           = approx_residuals
+            A           = X @ C.T
+            Ai          = np.linalg.pinv(A)
+            beta        = Ai@Y
+            self.eij    = np.asarray(Y - A@beta)  #approximate residuals
+        if self.dim==1:
+            self.fwhm   = rft1d.geom.estimate_fwhm(self.eij)            #smoothness
+            ### compute resel counts:
+            if self.roi is None:
+                self.resels = rft1d.geom.resel_counts(self.eij, self.fwhm, element_based=False) #resel
+            else:
+                B      = np.any( np.isnan(self.eij), axis=0)  #node is true if NaN
+                B      = np.logical_and(np.logical_not(B), self.roi)  #node is true if in ROI and also not NaN
+                mask   = np.logical_not(B)  #true for masked-out regions
+                self.resels = rft1d.geom.resel_counts(mask, self.fwhm, element_based=False) #resel
+        self.QT         = np.linalg.qr(X)[0].T
 
 
diff --git a/spm1d/stats/nonparam/calculators.py b/spm1d/stats/nonparam/calculators.py
index 2e045846..37ca7818 100644
--- a/spm1d/stats/nonparam/calculators.py
+++ b/spm1d/stats/nonparam/calculators.py
@@ -195,14 +195,18 @@ def get_test_stat(self, y):
 
 
 class CalculatorRegress1D(CalculatorRegress0D):
-	def get_test_stat(self, y):
-		Y      = np.matrix(y)
-		b      = self.Xi * Y            #parameters
-		eij    = Y - self.X*b           #residuals
-		R      = eij.T*eij              #residual sum of squares
-		sigma2 = np.diag(R) / self.df   #variance
-		t      = np.array(self.c.T*b).flatten()  /   ((sigma2*self.cXXc)**0.5 + eps)
-		return t
+    def get_test_stat(self, y):
+        Y      = np.matrix(y)
+        b      = self.Xi * Y            #parameters
+        eij    = Y - self.X*b           #residuals
+        # # previous sigma2 calculation (slow when Q get large: about 5 ms for Q=1000 and about 900 ms for Q=10000! )
+        # R      = eij.T@eij              #residuals: sum of squares
+        # sigma2 = np.diag(R)/df          #variance
+        # new sigam2 calculation (using eigensum trick)
+        diagR  = np.einsum('ij,ji->i', eij.T, eij)  # residual sum of squares (eigensum trick)
+        sigma2 = diagR / df          #variance
+        t      = np.array(self.c.T*b).flatten()  /   ((sigma2*self.cXXc)**0.5 + eps)
+        return t
 
 
 class CalculatorCCA0D(object):
diff --git a/spm1d/stats/t.py b/spm1d/stats/t.py
index 48a22103..cc646caa 100644
--- a/spm1d/stats/t.py
+++ b/spm1d/stats/t.py
@@ -50,9 +50,13 @@ def glm(Y, X, c, Q=None, roi=None):
     ### solve the GLM:
     b      = np.linalg.pinv(X) @ Y  #parameters
     eij    = Y - X@b                #residuals
-    R      = eij.T@eij              #residuals: sum of squares
     df     = Y.shape[0] - rank(X)   #degrees of freedom
-    sigma2 = np.diag(R)/df          #variance
+    # # previous sigma2 calculation (slow when Q get large: about 5 ms for Q=1000 and about 900 ms for Q=10000! )
+    # R      = eij.T@eij              #residuals: sum of squares
+    # sigma2 = np.diag(R)/df          #variance
+    # new sigam2 calculation (using Einstein summation trick)
+    diagR  = np.einsum('ij,ji->i', eij.T, eij)  # residual sum of squares (eigensum trick)
+    sigma2 = diagR / df          #variance
     ### compute t statistic
     cXXc   = c.T @ np.linalg.inv(X.T@X) @ c
     cXXc   = float(cXXc[0,0]) if isinstance(cXXc, np.ndarray) else float(cXXc)