diff --git a/lapack-netlib/SRC/VARIANTS/Makefile b/lapack-netlib/SRC/VARIANTS/Makefile index 35e50cbc2c..4b0575cc6f 100644 --- a/lapack-netlib/SRC/VARIANTS/Makefile +++ b/lapack-netlib/SRC/VARIANTS/Makefile @@ -30,9 +30,11 @@ LUREC = lu/REC/cgetrf.o lu/REC/dgetrf.o lu/REC/sgetrf.o lu/REC/zgetrf.o QRLL = qr/LL/cgeqrf.o qr/LL/dgeqrf.o qr/LL/sgeqrf.o qr/LL/zgeqrf.o +LARFTL2 = larft/LL-LVL2/clarft.o larft/LL-LVL2/dlarft.o larft/LL-LVL2/slarft.o larft/LL-LVL2/zlarft.o + .PHONY: all -all: cholrl.a choltop.a lucr.a lull.a lurec.a qrll.a +all: cholrl.a choltop.a lucr.a lull.a lurec.a qrll.a larftl2.a cholrl.a: $(CHOLRL) $(AR) $(ARFLAGS) $@ $^ @@ -58,9 +60,13 @@ qrll.a: $(QRLL) $(AR) $(ARFLAGS) $@ $^ $(RANLIB) $@ +larftl2.a: $(LARFTL2) + $(AR) $(ARFLAGS) $@ $^ + $(RANLIB) $@ + .PHONY: clean cleanobj cleanlib clean: cleanobj cleanlib cleanobj: - rm -f $(CHOLRL) $(CHOLTOP) $(LUCR) $(LULL) $(LUREC) $(QRLL) + rm -f $(CHOLRL) $(CHOLTOP) $(LUCR) $(LULL) $(LUREC) $(QRLL) $(LARFTL2) cleanlib: rm -f *.a diff --git a/lapack-netlib/SRC/VARIANTS/README b/lapack-netlib/SRC/VARIANTS/README index ef7626debe..217cfa3e01 100644 --- a/lapack-netlib/SRC/VARIANTS/README +++ b/lapack-netlib/SRC/VARIANTS/README @@ -23,6 +23,7 @@ This directory contains several variants of LAPACK routines in single/double/com - [sdcz]geqrf with QR Left Looking Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/qr/LL - [sdcz]potrf with Cholesky Right Looking Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/cholesky/RL - [sdcz]potrf with Cholesky Top Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/cholesky/TOP + - [sdcz]larft using a Left Looking Level 2 BLAS version algorithm - Directory: SRC/VARIANTS/larft/LL-LVL2 References:For a more detailed description please refer to - [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997), @@ -44,6 +45,7 @@ Corresponding libraries created in SRC/VARIANTS: - QR Left Looking : qrll.a - Cholesky Right Looking : cholrl.a - Cholesky Top : choltop.a + - LARFT Level 2: larftl2.a =========== diff --git a/lapack-netlib/SRC/VARIANTS/larft/LL_LVL2/clarft.f b/lapack-netlib/SRC/VARIANTS/larft/LL_LVL2/clarft.f new file mode 100644 index 0000000000..9a7000eff3 --- /dev/null +++ b/lapack-netlib/SRC/VARIANTS/larft/LL_LVL2/clarft.f @@ -0,0 +1,328 @@ +*> \brief \b CLARFT VARIANT: left-looking Level 2 BLAS version of the algorithm +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLARFT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, STOREV +* INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. +* COMPLEX T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLARFT forms the triangular factor T of a complex block reflector H +*> of order n, which is defined as a product of k elementary reflectors. +*> +*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +*> +*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +*> +*> If STOREV = 'C', the vector which defines the elementary reflector +*> H(i) is stored in the i-th column of the array V, and +*> +*> H = I - V * T * V**H +*> +*> If STOREV = 'R', the vector which defines the elementary reflector +*> H(i) is stored in the i-th row of the array V, and +*> +*> H = I - V**H * T * V +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies the order in which the elementary reflectors are +*> multiplied to form the block reflector: +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Specifies how the vectors which define the elementary +*> reflectors are stored (see also Further Details): +*> = 'C': columnwise +*> = 'R': rowwise +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the block reflector H. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the triangular factor T (= the number of +*> elementary reflectors). K >= 1. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,N) if STOREV = 'R' +*> The matrix V. See further details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,K) +*> The k by k triangular factor T of the block reflector. +*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +*> lower triangular. The rest of the array is not used. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup larft +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK auxiliary routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + COMPLEX T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J, PREVLASTV, LASTV +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CGEMV, CTRMV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( DIRECT, 'F' ) ) THEN + PREVLASTV = N + DO I = 1, K + PREVLASTV = MAX( PREVLASTV, I ) + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = 1, I + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * CONJG( V( I , J ) ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i) +* + CALL CGEMV( 'Conjugate transpose', J-I, I-1, + $ -TAU( I ), V( I+1, 1 ), LDV, + $ V( I+1, I ), 1, + $ ONE, T( 1, I ), 1 ) + ELSE +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( J , I ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H +* + CALL CGEMM( 'N', 'C', I-1, 1, J-I, -TAU( I ), + $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, + $ ONE, T( 1, I ), LDT ) + END IF +* +* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) +* + CALL CTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, + $ T, + $ LDT, T( 1, I ), 1 ) + T( I, I ) = TAU( I ) + IF( I.GT.1 ) THEN + PREVLASTV = MAX( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + END DO + ELSE + PREVLASTV = 1 + DO I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = I, K + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( I.LT.K ) THEN + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * CONJG( V( N-K+I , J ) ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i) +* + CALL CGEMV( 'Conjugate transpose', N-K+I-J, K-I, + $ -TAU( I ), V( J, I+1 ), LDV, V( J, I ), + $ 1, ONE, T( I+1, I ), 1 ) + ELSE +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( J, N-K+I ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H +* + CALL CGEMM( 'N', 'C', K-I, 1, N-K+I-J, + $ -TAU( I ), + $ V( I+1, J ), LDV, V( I, J ), LDV, + $ ONE, T( I+1, I ), LDT ) + END IF +* +* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL CTRMV( 'Lower', 'No transpose', 'Non-unit', + $ K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + IF( I.GT.1 ) THEN + PREVLASTV = MIN( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + T( I, I ) = TAU( I ) + END IF + END DO + END IF + RETURN +* +* End of CLARFT +* + END diff --git a/lapack-netlib/SRC/VARIANTS/larft/LL_LVL2/dlarft.f b/lapack-netlib/SRC/VARIANTS/larft/LL_LVL2/dlarft.f new file mode 100644 index 0000000000..19b7c7b1b2 --- /dev/null +++ b/lapack-netlib/SRC/VARIANTS/larft/LL_LVL2/dlarft.f @@ -0,0 +1,326 @@ +*> \brief \b DLARFT VARIANT: left-looking Level 2 BLAS version of the algorithm +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARFT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, STOREV +* INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARFT forms the triangular factor T of a real block reflector H +*> of order n, which is defined as a product of k elementary reflectors. +*> +*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +*> +*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +*> +*> If STOREV = 'C', the vector which defines the elementary reflector +*> H(i) is stored in the i-th column of the array V, and +*> +*> H = I - V * T * V**T +*> +*> If STOREV = 'R', the vector which defines the elementary reflector +*> H(i) is stored in the i-th row of the array V, and +*> +*> H = I - V**T * T * V +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies the order in which the elementary reflectors are +*> multiplied to form the block reflector: +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Specifies how the vectors which define the elementary +*> reflectors are stored (see also Further Details): +*> = 'C': columnwise +*> = 'R': rowwise +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the block reflector H. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the triangular factor T (= the number of +*> elementary reflectors). K >= 1. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,N) if STOREV = 'R' +*> The matrix V. See further details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,K) +*> The k by k triangular factor T of the block reflector. +*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +*> lower triangular. The rest of the array is not used. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup larft +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK auxiliary routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, PREVLASTV, LASTV +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DTRMV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( DIRECT, 'F' ) ) THEN + PREVLASTV = N + DO I = 1, K + PREVLASTV = MAX( I, PREVLASTV ) + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = 1, I + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( I , J ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i) +* + CALL DGEMV( 'Transpose', J-I, I-1, -TAU( I ), + $ V( I+1, 1 ), LDV, V( I+1, I ), 1, ONE, + $ T( 1, I ), 1 ) + ELSE +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( J , I ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T +* + CALL DGEMV( 'No transpose', I-1, J-I, -TAU( I ), + $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, ONE, + $ T( 1, I ), 1 ) + END IF +* +* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) +* + CALL DTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, + $ T, + $ LDT, T( 1, I ), 1 ) + T( I, I ) = TAU( I ) + IF( I.GT.1 ) THEN + PREVLASTV = MAX( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + END DO + ELSE + PREVLASTV = 1 + DO I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = I, K + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( I.LT.K ) THEN + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( N-K+I , J ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i) +* + CALL DGEMV( 'Transpose', N-K+I-J, K-I, + $ -TAU( I ), + $ V( J, I+1 ), LDV, V( J, I ), 1, ONE, + $ T( I+1, I ), 1 ) + ELSE +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( J, N-K+I ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T +* + CALL DGEMV( 'No transpose', K-I, N-K+I-J, + $ -TAU( I ), V( I+1, J ), LDV, V( I, J ), LDV, + $ ONE, T( I+1, I ), 1 ) + END IF +* +* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL DTRMV( 'Lower', 'No transpose', 'Non-unit', + $ K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + IF( I.GT.1 ) THEN + PREVLASTV = MIN( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + T( I, I ) = TAU( I ) + END IF + END DO + END IF + RETURN +* +* End of DLARFT +* + END diff --git a/lapack-netlib/SRC/VARIANTS/larft/LL_LVL2/slarft.f b/lapack-netlib/SRC/VARIANTS/larft/LL_LVL2/slarft.f new file mode 100644 index 0000000000..e1578e2587 --- /dev/null +++ b/lapack-netlib/SRC/VARIANTS/larft/LL_LVL2/slarft.f @@ -0,0 +1,326 @@ +*> \brief \b SLARFT VARIANT: left-looking Level 2 BLAS version of the algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARFT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, STOREV +* INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. +* REAL T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLARFT forms the triangular factor T of a real block reflector H +*> of order n, which is defined as a product of k elementary reflectors. +*> +*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +*> +*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +*> +*> If STOREV = 'C', the vector which defines the elementary reflector +*> H(i) is stored in the i-th column of the array V, and +*> +*> H = I - V * T * V**T +*> +*> If STOREV = 'R', the vector which defines the elementary reflector +*> H(i) is stored in the i-th row of the array V, and +*> +*> H = I - V**T * T * V +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies the order in which the elementary reflectors are +*> multiplied to form the block reflector: +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Specifies how the vectors which define the elementary +*> reflectors are stored (see also Further Details): +*> = 'C': columnwise +*> = 'R': rowwise +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the block reflector H. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the triangular factor T (= the number of +*> elementary reflectors). K >= 1. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is REAL array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,N) if STOREV = 'R' +*> The matrix V. See further details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is REAL array, dimension (LDT,K) +*> The k by k triangular factor T of the block reflector. +*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +*> lower triangular. The rest of the array is not used. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup larft +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK auxiliary routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + REAL T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, PREVLASTV, LASTV +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, STRMV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( DIRECT, 'F' ) ) THEN + PREVLASTV = N + DO I = 1, K + PREVLASTV = MAX( I, PREVLASTV ) + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = 1, I + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( I , J ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i) +* + CALL SGEMV( 'Transpose', J-I, I-1, -TAU( I ), + $ V( I+1, 1 ), LDV, V( I+1, I ), 1, ONE, + $ T( 1, I ), 1 ) + ELSE +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( J , I ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T +* + CALL SGEMV( 'No transpose', I-1, J-I, -TAU( I ), + $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, + $ ONE, T( 1, I ), 1 ) + END IF +* +* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) +* + CALL STRMV( 'Upper', 'No transpose', 'Non-unit', I-1, + $ T, + $ LDT, T( 1, I ), 1 ) + T( I, I ) = TAU( I ) + IF( I.GT.1 ) THEN + PREVLASTV = MAX( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + END DO + ELSE + PREVLASTV = 1 + DO I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = I, K + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( I.LT.K ) THEN + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( N-K+I , J ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i) +* + CALL SGEMV( 'Transpose', N-K+I-J, K-I, + $ -TAU( I ), + $ V( J, I+1 ), LDV, V( J, I ), 1, ONE, + $ T( I+1, I ), 1 ) + ELSE +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( J, N-K+I ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T +* + CALL SGEMV( 'No transpose', K-I, N-K+I-J, + $ -TAU( I ), V( I+1, J ), LDV, V( I, J ), LDV, + $ ONE, T( I+1, I ), 1 ) + END IF +* +* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL STRMV( 'Lower', 'No transpose', 'Non-unit', + $ K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + IF( I.GT.1 ) THEN + PREVLASTV = MIN( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + T( I, I ) = TAU( I ) + END IF + END DO + END IF + RETURN +* +* End of SLARFT +* + END diff --git a/lapack-netlib/SRC/VARIANTS/larft/LL_LVL2/zlarft.f b/lapack-netlib/SRC/VARIANTS/larft/LL_LVL2/zlarft.f new file mode 100644 index 0000000000..6abadd501e --- /dev/null +++ b/lapack-netlib/SRC/VARIANTS/larft/LL_LVL2/zlarft.f @@ -0,0 +1,327 @@ +*> \brief \b ZLARFT VARIANT: left-looking Level 2 BLAS version of the algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLARFT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, STOREV +* INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. +* COMPLEX*16 T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARFT forms the triangular factor T of a complex block reflector H +*> of order n, which is defined as a product of k elementary reflectors. +*> +*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +*> +*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +*> +*> If STOREV = 'C', the vector which defines the elementary reflector +*> H(i) is stored in the i-th column of the array V, and +*> +*> H = I - V * T * V**H +*> +*> If STOREV = 'R', the vector which defines the elementary reflector +*> H(i) is stored in the i-th row of the array V, and +*> +*> H = I - V**H * T * V +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies the order in which the elementary reflectors are +*> multiplied to form the block reflector: +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Specifies how the vectors which define the elementary +*> reflectors are stored (see also Further Details): +*> = 'C': columnwise +*> = 'R': rowwise +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the block reflector H. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the triangular factor T (= the number of +*> elementary reflectors). K >= 1. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX*16 array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,N) if STOREV = 'R' +*> The matrix V. See further details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,K) +*> The k by k triangular factor T of the block reflector. +*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +*> lower triangular. The rest of the array is not used. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup larft +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK auxiliary routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + COMPLEX*16 T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J, PREVLASTV, LASTV +* .. +* .. External Subroutines .. + EXTERNAL ZGEMV, ZTRMV, ZGEMM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( DIRECT, 'F' ) ) THEN + PREVLASTV = N + DO I = 1, K + PREVLASTV = MAX( PREVLASTV, I ) + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = 1, I + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * CONJG( V( I , J ) ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i) +* + CALL ZGEMV( 'Conjugate transpose', J-I, I-1, + $ -TAU( I ), V( I+1, 1 ), LDV, + $ V( I+1, I ), 1, ONE, T( 1, I ), 1 ) + ELSE +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( J , I ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H +* + CALL ZGEMM( 'N', 'C', I-1, 1, J-I, -TAU( I ), + $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, + $ ONE, T( 1, I ), LDT ) + END IF +* +* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) +* + CALL ZTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, + $ T, + $ LDT, T( 1, I ), 1 ) + T( I, I ) = TAU( I ) + IF( I.GT.1 ) THEN + PREVLASTV = MAX( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + END DO + ELSE + PREVLASTV = 1 + DO I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = I, K + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( I.LT.K ) THEN + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * CONJG( V( N-K+I , J ) ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i) +* + CALL ZGEMV( 'Conjugate transpose', N-K+I-J, K-I, + $ -TAU( I ), V( J, I+1 ), LDV, V( J, I ), + $ 1, ONE, T( I+1, I ), 1 ) + ELSE +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( J, N-K+I ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H +* + CALL ZGEMM( 'N', 'C', K-I, 1, N-K+I-J, + $ -TAU( I ), + $ V( I+1, J ), LDV, V( I, J ), LDV, + $ ONE, T( I+1, I ), LDT ) + END IF +* +* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL ZTRMV( 'Lower', 'No transpose', 'Non-unit', + $ K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + IF( I.GT.1 ) THEN + PREVLASTV = MIN( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + T( I, I ) = TAU( I ) + END IF + END DO + END IF + RETURN +* +* End of ZLARFT +* + END diff --git a/lapack-netlib/SRC/clarft.f b/lapack-netlib/SRC/clarft.f index fdf80b78e9..de8b97bf9c 100644 --- a/lapack-netlib/SRC/clarft.f +++ b/lapack-netlib/SRC/clarft.f @@ -18,7 +18,7 @@ * Definition: * =========== * -* SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) * * .. Scalar Arguments .. * CHARACTER DIRECT, STOREV @@ -130,7 +130,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERauxiliary +*> \ingroup larft * *> \par Further Details: * ===================== @@ -159,167 +159,473 @@ *> \endverbatim *> * ===================================================================== - SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) + RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, + $ TAU, T, LDT ) * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * -* .. Scalar Arguments .. - CHARACTER DIRECT, STOREV - INTEGER K, LDT, LDV, N +* .. Scalar Arguments +* + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N * .. * .. Array Arguments .. - COMPLEX T( LDT, * ), TAU( * ), V( LDV, * ) -* .. * -* ===================================================================== + COMPLEX T( LDT, * ), TAU( * ), V( LDV, * ) +* .. * * .. Parameters .. - COMPLEX ONE, ZERO - PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), - $ ZERO = ( 0.0E+0, 0.0E+0 ) ) -* .. +* + COMPLEX ONE, NEG_ONE, ZERO + PARAMETER(ONE=1.0E+0, ZERO = 0.0E+0, NEG_ONE=-1.0E+0) +* * .. Local Scalars .. - INTEGER I, J, PREVLASTV, LASTV -* .. +* + INTEGER I,J,L + LOGICAL QR,LQ,QL,DIRF,COLV +* * .. External Subroutines .. - EXTERNAL CGEMM, CGEMV, CTRMV -* .. -* .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME +* + EXTERNAL CTRMM,CGEMM,CLACPY +* +* .. External Functions.. +* + LOGICAL LSAME + EXTERNAL LSAME +* +* .. Intrinsic Functions.. +* + INTRINSIC CONJG +* +* The general scheme used is inspired by the approach inside DGEQRT3 +* which was (at the time of writing this code): +* Based on the algorithm of Elmroth and Gustavson, +* IBM J. Res. Develop. Vol 44 No. 4 July 2000. * .. * .. Executable Statements .. * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN -* - IF( LSAME( DIRECT, 'F' ) ) THEN - PREVLASTV = N - DO I = 1, K - PREVLASTV = MAX( PREVLASTV, I ) - IF( TAU( I ).EQ.ZERO ) THEN -* -* H(i) = I -* - DO J = 1, I - T( J, I ) = ZERO - END DO - ELSE -* -* general case -* - IF( LSAME( STOREV, 'C' ) ) THEN -* Skip any trailing zeros. - DO LASTV = N, I+1, -1 - IF( V( LASTV, I ).NE.ZERO ) EXIT - END DO - DO J = 1, I-1 - T( J, I ) = -TAU( I ) * CONJG( V( I , J ) ) - END DO - J = MIN( LASTV, PREVLASTV ) -* -* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i) -* - CALL CGEMV( 'Conjugate transpose', J-I, I-1, - $ -TAU( I ), V( I+1, 1 ), LDV, - $ V( I+1, I ), 1, - $ ONE, T( 1, I ), 1 ) - ELSE -* Skip any trailing zeros. - DO LASTV = N, I+1, -1 - IF( V( I, LASTV ).NE.ZERO ) EXIT - END DO - DO J = 1, I-1 - T( J, I ) = -TAU( I ) * V( J , I ) - END DO - J = MIN( LASTV, PREVLASTV ) -* -* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H -* - CALL CGEMM( 'N', 'C', I-1, 1, J-I, -TAU( I ), - $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, - $ ONE, T( 1, I ), LDT ) - END IF -* -* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) -* - CALL CTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, - $ LDT, T( 1, I ), 1 ) - T( I, I ) = TAU( I ) - IF( I.GT.1 ) THEN - PREVLASTV = MAX( PREVLASTV, LASTV ) - ELSE - PREVLASTV = LASTV - END IF - END IF + IF(N.EQ.0.OR.K.EQ.0) THEN + RETURN + END IF +* +* Base case +* + IF(N.EQ.1.OR.K.EQ.1) THEN + T(1,1) = TAU(1) + RETURN + END IF +* +* Beginning of executable statements +* + L = K / 2 +* +* Determine what kind of Q we need to compute +* We assume that if the user doesn't provide 'F' for DIRECT, +* then they meant to provide 'B' and if they don't provide +* 'C' for STOREV, then they meant to provide 'R' +* + DIRF = LSAME(DIRECT,'F') + COLV = LSAME(STOREV,'C') +* +* QR happens when we have forward direction in column storage +* + QR = DIRF.AND.COLV +* +* LQ happens when we have forward direction in row storage +* + LQ = DIRF.AND.(.NOT.COLV) +* +* QL happens when we have backward direction in column storage +* + QL = (.NOT.DIRF).AND.COLV +* +* The last case is RQ. Due to how we structured this, if the +* above 3 are false, then RQ must be true, so we never store +* this +* RQ happens when we have backward direction in row storage +* RQ = (.NOT.DIRF).AND.(.NOT.COLV) +* + IF(QR) THEN +* +* Break V apart into 6 components +* +* V = |---------------| +* |V_{1,1} 0 | +* |V_{2,1} V_{2,2}| +* |V_{3,1} V_{3,2}| +* |---------------| +* +* V_{1,1}\in\C^{l,l} unit lower triangular +* V_{2,1}\in\C^{k-l,l} rectangular +* V_{3,1}\in\C^{n-k,l} rectangular +* +* V_{2,2}\in\C^{k-l,k-l} unit lower triangular +* V_{3,2}\in\C^{n-k,k-l} rectangular +* +* We will construct the T matrix +* T = |---------------| +* |T_{1,1} T_{1,2}| +* |0 T_{2,2}| +* |---------------| +* +* T is the triangular factor obtained from block reflectors. +* To motivate the structure, assume we have already computed T_{1,1} +* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 +* +* T_{1,1}\in\C^{l, l} upper triangular +* T_{2,2}\in\C^{k-l, k-l} upper triangular +* T_{1,2}\in\C^{l, k-l} rectangular +* +* Where l = floor(k/2) +* +* Then, consider the product: +* +* (I - V_1*T_{1,1}*V_1')*(I - V_2*T_{2,2}*V_2') +* = I - V_1*T_{1,1}*V_1' - V_2*T_{2,2}*V_2' + V_1*T_{1,1}*V_1'*V_2*T_{2,2}*V_2' +* +* Define T{1,2} = -T_{1,1}*V_1'*V_2*T_{2,2} +* +* Then, we can define the matrix V as +* V = |-------| +* |V_1 V_2| +* |-------| +* +* So, our product is equivalent to the matrix product +* I - V*T*V' +* This means, we can compute T_{1,1} and T_{2,2}, then use this information +* to compute T_{1,2} +* +* Compute T_{1,1} recursively +* + CALL CLARFT(DIRECT, STOREV, N, L, V, LDV, TAU, T, LDT) +* +* Compute T_{2,2} recursively +* + CALL CLARFT(DIRECT, STOREV, N-L, K-L, V(L+1, L+1), LDV, + $ TAU(L+1), T(L+1, L+1), LDT) +* +* Compute T_{1,2} +* T_{1,2} = V_{2,1}' +* + DO J = 1, L + DO I = 1, K-L + T(J, L+I) = CONJG(V(L+I, J)) + END DO END DO - ELSE - PREVLASTV = 1 - DO I = K, 1, -1 - IF( TAU( I ).EQ.ZERO ) THEN -* -* H(i) = I -* - DO J = I, K - T( J, I ) = ZERO - END DO - ELSE -* -* general case -* - IF( I.LT.K ) THEN - IF( LSAME( STOREV, 'C' ) ) THEN -* Skip any leading zeros. - DO LASTV = 1, I-1 - IF( V( LASTV, I ).NE.ZERO ) EXIT - END DO - DO J = I+1, K - T( J, I ) = -TAU( I ) * CONJG( V( N-K+I , J ) ) - END DO - J = MAX( LASTV, PREVLASTV ) -* -* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i) -* - CALL CGEMV( 'Conjugate transpose', N-K+I-J, K-I, - $ -TAU( I ), V( J, I+1 ), LDV, V( J, I ), - $ 1, ONE, T( I+1, I ), 1 ) - ELSE -* Skip any leading zeros. - DO LASTV = 1, I-1 - IF( V( I, LASTV ).NE.ZERO ) EXIT - END DO - DO J = I+1, K - T( J, I ) = -TAU( I ) * V( J, N-K+I ) - END DO - J = MAX( LASTV, PREVLASTV ) -* -* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H -* - CALL CGEMM( 'N', 'C', K-I, 1, N-K+I-J, -TAU( I ), - $ V( I+1, J ), LDV, V( I, J ), LDV, - $ ONE, T( I+1, I ), LDT ) - END IF -* -* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) -* - CALL CTRMV( 'Lower', 'No transpose', 'Non-unit', K-I, - $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) - IF( I.GT.1 ) THEN - PREVLASTV = MIN( PREVLASTV, LASTV ) - ELSE - PREVLASTV = LASTV - END IF - END IF - T( I, I ) = TAU( I ) - END IF +* +* T_{1,2} = T_{1,2}*V_{2,2} +* + CALL CTRMM('Right', 'Lower', 'No transpose', 'Unit', L, + $ K-L, ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) + +* +* T_{1,2} = V_{3,1}'*V_{3,2} + T_{1,2} +* Note: We assume K <= N, and GEMM will do nothing if N=K +* + CALL CGEMM('Conjugate', 'No transpose', L, K-L, N-K, ONE, + $ V(K+1, 1), LDV, V(K+1, L+1), LDV, ONE, T(1, L+1), + $ LDT) +* +* At this point, we have that T_{1,2} = V_1'*V_2 +* All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} +* respectively. +* +* T_{1,2} = -T_{1,1}*T_{1,2} +* + CALL CTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, + $ K-L, NEG_ONE, T, LDT, T(1, L+1), LDT) +* +* T_{1,2} = T_{1,2}*T_{2,2} +* + CALL CTRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, + $ K-L, ONE, T(L+1, L+1), LDT, T(1, L+1), LDT) + + ELSE IF(LQ) THEN +* +* Break V apart into 6 components +* +* V = |----------------------| +* |V_{1,1} V_{1,2} V{1,3}| +* |0 V_{2,2} V{2,3}| +* |----------------------| +* +* V_{1,1}\in\C^{l,l} unit upper triangular +* V_{1,2}\in\C^{l,k-l} rectangular +* V_{1,3}\in\C^{l,n-k} rectangular +* +* V_{2,2}\in\C^{k-l,k-l} unit upper triangular +* V_{2,3}\in\C^{k-l,n-k} rectangular +* +* Where l = floor(k/2) +* +* We will construct the T matrix +* T = |---------------| +* |T_{1,1} T_{1,2}| +* |0 T_{2,2}| +* |---------------| +* +* T is the triangular factor obtained from block reflectors. +* To motivate the structure, assume we have already computed T_{1,1} +* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 +* +* T_{1,1}\in\C^{l, l} upper triangular +* T_{2,2}\in\C^{k-l, k-l} upper triangular +* T_{1,2}\in\C^{l, k-l} rectangular +* +* Then, consider the product: +* +* (I - V_1'*T_{1,1}*V_1)*(I - V_2'*T_{2,2}*V_2) +* = I - V_1'*T_{1,1}*V_1 - V_2'*T_{2,2}*V_2 + V_1'*T_{1,1}*V_1*V_2'*T_{2,2}*V_2 +* +* Define T_{1,2} = -T_{1,1}*V_1*V_2'*T_{2,2} +* +* Then, we can define the matrix V as +* V = |---| +* |V_1| +* |V_2| +* |---| +* +* So, our product is equivalent to the matrix product +* I - V'*T*V +* This means, we can compute T_{1,1} and T_{2,2}, then use this information +* to compute T_{1,2} +* +* Compute T_{1,1} recursively +* + CALL CLARFT(DIRECT, STOREV, N, L, V, LDV, TAU, T, LDT) +* +* Compute T_{2,2} recursively +* + CALL CLARFT(DIRECT, STOREV, N-L, K-L, V(L+1, L+1), LDV, + $ TAU(L+1), T(L+1, L+1), LDT) + +* +* Compute T_{1,2} +* T_{1,2} = V_{1,2} +* + CALL CLACPY('All', L, K-L, V(1, L+1), LDV, T(1, L+1), LDT) +* +* T_{1,2} = T_{1,2}*V_{2,2}' +* + CALL CTRMM('Right', 'Upper', 'Conjugate', 'Unit', L, K-L, + $ ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) + +* +* T_{1,2} = V_{1,3}*V_{2,3}' + T_{1,2} +* Note: We assume K <= N, and GEMM will do nothing if N=K +* + CALL CGEMM('No transpose', 'Conjugate', L, K-L, N-K, ONE, + $ V(1, K+1), LDV, V(L+1, K+1), LDV, ONE, T(1, L+1), LDT) +* +* At this point, we have that T_{1,2} = V_1*V_2' +* All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} +* respectively. +* +* T_{1,2} = -T_{1,1}*T_{1,2} +* + CALL CTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, + $ K-L, NEG_ONE, T, LDT, T(1, L+1), LDT) + +* +* T_{1,2} = T_{1,2}*T_{2,2} +* + CALL CTRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, + $ K-L, ONE, T(L+1,L+1), LDT, T(1, L+1), LDT) + ELSE IF(QL) THEN +* +* Break V apart into 6 components +* +* V = |---------------| +* |V_{1,1} V_{1,2}| +* |V_{2,1} V_{2,2}| +* |0 V_{3,2}| +* |---------------| +* +* V_{1,1}\in\C^{n-k,k-l} rectangular +* V_{2,1}\in\C^{k-l,k-l} unit upper triangular +* +* V_{1,2}\in\C^{n-k,l} rectangular +* V_{2,2}\in\C^{k-l,l} rectangular +* V_{3,2}\in\C^{l,l} unit upper triangular +* +* We will construct the T matrix +* T = |---------------| +* |T_{1,1} 0 | +* |T_{2,1} T_{2,2}| +* |---------------| +* +* T is the triangular factor obtained from block reflectors. +* To motivate the structure, assume we have already computed T_{1,1} +* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 +* +* T_{1,1}\in\C^{k-l, k-l} non-unit lower triangular +* T_{2,2}\in\C^{l, l} non-unit lower triangular +* T_{2,1}\in\C^{k-l, l} rectangular +* +* Where l = floor(k/2) +* +* Then, consider the product: +* +* (I - V_2*T_{2,2}*V_2')*(I - V_1*T_{1,1}*V_1') +* = I - V_2*T_{2,2}*V_2' - V_1*T_{1,1}*V_1' + V_2*T_{2,2}*V_2'*V_1*T_{1,1}*V_1' +* +* Define T_{2,1} = -T_{2,2}*V_2'*V_1*T_{1,1} +* +* Then, we can define the matrix V as +* V = |-------| +* |V_1 V_2| +* |-------| +* +* So, our product is equivalent to the matrix product +* I - V*T*V' +* This means, we can compute T_{1,1} and T_{2,2}, then use this information +* to compute T_{2,1} +* +* Compute T_{1,1} recursively +* + CALL CLARFT(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, T, LDT) +* +* Compute T_{2,2} recursively +* + CALL CLARFT(DIRECT, STOREV, N, L, V(1, K-L+1), LDV, + $ TAU(K-L+1), T(K-L+1, K-L+1), LDT) +* +* Compute T_{2,1} +* T_{2,1} = V_{2,2}' +* + DO J = 1, K-L + DO I = 1, L + T(K-L+I, J) = CONJG(V(N-K+J, K-L+I)) + END DO END DO - END IF - RETURN * -* End of CLARFT +* T_{2,1} = T_{2,1}*V_{2,1} +* + CALL CTRMM('Right', 'Upper', 'No transpose', 'Unit', L, + $ K-L, ONE, V(N-K+1, 1), LDV, T(K-L+1, 1), LDT) + +* +* T_{2,1} = V_{2,2}'*V_{2,1} + T_{2,1} +* Note: We assume K <= N, and GEMM will do nothing if N=K +* + CALL CGEMM('Conjugate', 'No transpose', L, K-L, N-K, ONE, + $ V(1, K-L+1), LDV, V, LDV, ONE, T(K-L+1, 1), + $ LDT) +* +* At this point, we have that T_{2,1} = V_2'*V_1 +* All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} +* respectively. +* +* T_{2,1} = -T_{2,2}*T_{2,1} +* + CALL CTRMM('Left', 'Lower', 'No transpose', 'Non-unit', L, + $ K-L, NEG_ONE, T(K-L+1, K-L+1), LDT, + $ T(K-L+1, 1), LDT) * - END +* T_{2,1} = T_{2,1}*T_{1,1} +* + CALL CTRMM('Right', 'Lower', 'No transpose', 'Non-unit', L, + $ K-L, ONE, T, LDT, T(K-L+1, 1), LDT) + ELSE +* +* Else means RQ case +* +* Break V apart into 6 components +* +* V = |-----------------------| +* |V_{1,1} V_{1,2} 0 | +* |V_{2,1} V_{2,2} V_{2,3}| +* |-----------------------| +* +* V_{1,1}\in\C^{k-l,n-k} rectangular +* V_{1,2}\in\C^{k-l,k-l} unit lower triangular +* +* V_{2,1}\in\C^{l,n-k} rectangular +* V_{2,2}\in\C^{l,k-l} rectangular +* V_{2,3}\in\C^{l,l} unit lower triangular +* +* We will construct the T matrix +* T = |---------------| +* |T_{1,1} 0 | +* |T_{2,1} T_{2,2}| +* |---------------| +* +* T is the triangular factor obtained from block reflectors. +* To motivate the structure, assume we have already computed T_{1,1} +* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 +* +* T_{1,1}\in\C^{k-l, k-l} non-unit lower triangular +* T_{2,2}\in\C^{l, l} non-unit lower triangular +* T_{2,1}\in\C^{k-l, l} rectangular +* +* Where l = floor(k/2) +* +* Then, consider the product: +* +* (I - V_2'*T_{2,2}*V_2)*(I - V_1'*T_{1,1}*V_1) +* = I - V_2'*T_{2,2}*V_2 - V_1'*T_{1,1}*V_1 + V_2'*T_{2,2}*V_2*V_1'*T_{1,1}*V_1 +* +* Define T_{2,1} = -T_{2,2}*V_2*V_1'*T_{1,1} +* +* Then, we can define the matrix V as +* V = |---| +* |V_1| +* |V_2| +* |---| +* +* So, our product is equivalent to the matrix product +* I - V'*T*V +* This means, we can compute T_{1,1} and T_{2,2}, then use this information +* to compute T_{2,1} +* +* Compute T_{1,1} recursively +* + CALL CLARFT(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, T, LDT) +* +* Compute T_{2,2} recursively +* + CALL CLARFT(DIRECT, STOREV, N, L, V(K-L+1,1), LDV, + $ TAU(K-L+1), T(K-L+1, K-L+1), LDT) +* +* Compute T_{2,1} +* T_{2,1} = V_{2,2} +* + CALL CLACPY('All', L, K-L, V(K-L+1, N-K+1), LDV, + $ T(K-L+1, 1), LDT) + +* +* T_{2,1} = T_{2,1}*V_{1,2}' +* + CALL CTRMM('Right', 'Lower', 'Conjugate', 'Unit', L, K-L, + $ ONE, V(1, N-K+1), LDV, T(K-L+1,1), LDT) + +* +* T_{2,1} = V_{2,1}*V_{1,1}' + T_{2,1} +* Note: We assume K <= N, and GEMM will do nothing if N=K +* + CALL CGEMM('No transpose', 'Conjugate', L, K-L, N-K, ONE, + $ V(K-L+1, 1), LDV, V, LDV, ONE, T(K-L+1, 1), + $ LDT) + +* +* At this point, we have that T_{2,1} = V_2*V_1' +* All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} +* respectively. +* +* T_{2,1} = -T_{2,2}*T_{2,1} +* + CALL CTRMM('Left', 'Lower', 'No tranpose', 'Non-unit', L, + $ K-L, NEG_ONE, T(K-L+1, K-L+1), LDT, + $ T(K-L+1, 1), LDT) + +* +* T_{2,1} = T_{2,1}*T_{1,1} +* + CALL CTRMM('Right', 'Lower', 'No tranpose', 'Non-unit', L, + $ K-L, ONE, T, LDT, T(K-L+1, 1), LDT) + END IF + END SUBROUTINE diff --git a/lapack-netlib/SRC/dlarft.f b/lapack-netlib/SRC/dlarft.f index a8d9de61f1..c27bb1a806 100644 --- a/lapack-netlib/SRC/dlarft.f +++ b/lapack-netlib/SRC/dlarft.f @@ -18,7 +18,7 @@ * Definition: * =========== * -* SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) * * .. Scalar Arguments .. * CHARACTER DIRECT, STOREV @@ -130,7 +130,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup doubleOTHERauxiliary +*> \ingroup larft * *> \par Further Details: * ===================== @@ -159,165 +159,470 @@ *> \endverbatim *> * ===================================================================== - SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) + RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, + $ TAU, T, LDT ) * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * -* .. Scalar Arguments .. +* .. Scalar Arguments +* CHARACTER DIRECT, STOREV INTEGER K, LDT, LDV, N * .. * .. Array Arguments .. +* DOUBLE PRECISION T( LDT, * ), TAU( * ), V( LDV, * ) * .. * -* ===================================================================== -* * .. Parameters .. - DOUBLE PRECISION ONE, ZERO - PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) -* .. +* + DOUBLE PRECISION ONE, NEG_ONE, ZERO + PARAMETER(ONE=1.0D+0, ZERO = 0.0D+0, NEG_ONE=-1.0D+0) +* * .. Local Scalars .. - INTEGER I, J, PREVLASTV, LASTV -* .. +* + INTEGER I,J,L + LOGICAL QR,LQ,QL,DIRF,COLV +* * .. External Subroutines .. - EXTERNAL DGEMV, DTRMV -* .. -* .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME +* + EXTERNAL DTRMM,DGEMM,DLACPY +* +* .. External Functions.. +* + LOGICAL LSAME + EXTERNAL LSAME +* +* The general scheme used is inspired by the approach inside DGEQRT3 +* which was (at the time of writing this code): +* Based on the algorithm of Elmroth and Gustavson, +* IBM J. Res. Develop. Vol 44 No. 4 July 2000. * .. * .. Executable Statements .. * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN -* - IF( LSAME( DIRECT, 'F' ) ) THEN - PREVLASTV = N - DO I = 1, K - PREVLASTV = MAX( I, PREVLASTV ) - IF( TAU( I ).EQ.ZERO ) THEN -* -* H(i) = I -* - DO J = 1, I - T( J, I ) = ZERO - END DO - ELSE -* -* general case -* - IF( LSAME( STOREV, 'C' ) ) THEN -* Skip any trailing zeros. - DO LASTV = N, I+1, -1 - IF( V( LASTV, I ).NE.ZERO ) EXIT - END DO - DO J = 1, I-1 - T( J, I ) = -TAU( I ) * V( I , J ) - END DO - J = MIN( LASTV, PREVLASTV ) -* -* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i) -* - CALL DGEMV( 'Transpose', J-I, I-1, -TAU( I ), - $ V( I+1, 1 ), LDV, V( I+1, I ), 1, ONE, - $ T( 1, I ), 1 ) - ELSE -* Skip any trailing zeros. - DO LASTV = N, I+1, -1 - IF( V( I, LASTV ).NE.ZERO ) EXIT - END DO - DO J = 1, I-1 - T( J, I ) = -TAU( I ) * V( J , I ) - END DO - J = MIN( LASTV, PREVLASTV ) -* -* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T -* - CALL DGEMV( 'No transpose', I-1, J-I, -TAU( I ), - $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, ONE, - $ T( 1, I ), 1 ) - END IF -* -* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) -* - CALL DTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, - $ LDT, T( 1, I ), 1 ) - T( I, I ) = TAU( I ) - IF( I.GT.1 ) THEN - PREVLASTV = MAX( PREVLASTV, LASTV ) - ELSE - PREVLASTV = LASTV - END IF - END IF + IF(N.EQ.0.OR.K.EQ.0) THEN + RETURN + END IF +* +* Base case +* + IF(N.EQ.1.OR.K.EQ.1) THEN + T(1,1) = TAU(1) + RETURN + END IF +* +* Beginning of executable statements +* + L = K / 2 +* +* Determine what kind of Q we need to compute +* We assume that if the user doesn't provide 'F' for DIRECT, +* then they meant to provide 'B' and if they don't provide +* 'C' for STOREV, then they meant to provide 'R' +* + DIRF = LSAME(DIRECT,'F') + COLV = LSAME(STOREV,'C') +* +* QR happens when we have forward direction in column storage +* + QR = DIRF.AND.COLV +* +* LQ happens when we have forward direction in row storage +* + LQ = DIRF.AND.(.NOT.COLV) +* +* QL happens when we have backward direction in column storage +* + QL = (.NOT.DIRF).AND.COLV +* +* The last case is RQ. Due to how we structured this, if the +* above 3 are false, then RQ must be true, so we never store +* this +* RQ happens when we have backward direction in row storage +* RQ = (.NOT.DIRF).AND.(.NOT.COLV) +* + IF(QR) THEN +* +* Break V apart into 6 components +* +* V = |---------------| +* |V_{1,1} 0 | +* |V_{2,1} V_{2,2}| +* |V_{3,1} V_{3,2}| +* |---------------| +* +* V_{1,1}\in\R^{l,l} unit lower triangular +* V_{2,1}\in\R^{k-l,l} rectangular +* V_{3,1}\in\R^{n-k,l} rectangular +* +* V_{2,2}\in\R^{k-l,k-l} unit lower triangular +* V_{3,2}\in\R^{n-k,k-l} rectangular +* +* We will construct the T matrix +* T = |---------------| +* |T_{1,1} T_{1,2}| +* |0 T_{2,2}| +* |---------------| +* +* T is the triangular factor obtained from block reflectors. +* To motivate the structure, assume we have already computed T_{1,1} +* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 +* +* T_{1,1}\in\R^{l, l} upper triangular +* T_{2,2}\in\R^{k-l, k-l} upper triangular +* T_{1,2}\in\R^{l, k-l} rectangular +* +* Where l = floor(k/2) +* +* Then, consider the product: +* +* (I - V_1*T_{1,1}*V_1')*(I - V_2*T_{2,2}*V_2') +* = I - V_1*T_{1,1}*V_1' - V_2*T_{2,2}*V_2' + V_1*T_{1,1}*V_1'*V_2*T_{2,2}*V_2' +* +* Define T_{1,2} = -T_{1,1}*V_1'*V_2*T_{2,2} +* +* Then, we can define the matrix V as +* V = |-------| +* |V_1 V_2| +* |-------| +* +* So, our product is equivalent to the matrix product +* I - V*T*V' +* This means, we can compute T_{1,1} and T_{2,2}, then use this information +* to compute T_{1,2} +* +* Compute T_{1,1} recursively +* + CALL DLARFT(DIRECT, STOREV, N, L, V, LDV, TAU, T, LDT) +* +* Compute T_{2,2} recursively +* + CALL DLARFT(DIRECT, STOREV, N-L, K-L, V(L+1, L+1), LDV, + $ TAU(L+1), T(L+1, L+1), LDT) +* +* Compute T_{1,2} +* T_{1,2} = V_{2,1}' +* + DO J = 1, L + DO I = 1, K-L + T(J, L+I) = V(L+I, J) + END DO END DO - ELSE - PREVLASTV = 1 - DO I = K, 1, -1 - IF( TAU( I ).EQ.ZERO ) THEN -* -* H(i) = I -* - DO J = I, K - T( J, I ) = ZERO - END DO - ELSE -* -* general case -* - IF( I.LT.K ) THEN - IF( LSAME( STOREV, 'C' ) ) THEN -* Skip any leading zeros. - DO LASTV = 1, I-1 - IF( V( LASTV, I ).NE.ZERO ) EXIT - END DO - DO J = I+1, K - T( J, I ) = -TAU( I ) * V( N-K+I , J ) - END DO - J = MAX( LASTV, PREVLASTV ) -* -* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i) -* - CALL DGEMV( 'Transpose', N-K+I-J, K-I, -TAU( I ), - $ V( J, I+1 ), LDV, V( J, I ), 1, ONE, - $ T( I+1, I ), 1 ) - ELSE -* Skip any leading zeros. - DO LASTV = 1, I-1 - IF( V( I, LASTV ).NE.ZERO ) EXIT - END DO - DO J = I+1, K - T( J, I ) = -TAU( I ) * V( J, N-K+I ) - END DO - J = MAX( LASTV, PREVLASTV ) -* -* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T -* - CALL DGEMV( 'No transpose', K-I, N-K+I-J, - $ -TAU( I ), V( I+1, J ), LDV, V( I, J ), LDV, - $ ONE, T( I+1, I ), 1 ) - END IF -* -* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) -* - CALL DTRMV( 'Lower', 'No transpose', 'Non-unit', K-I, - $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) - IF( I.GT.1 ) THEN - PREVLASTV = MIN( PREVLASTV, LASTV ) - ELSE - PREVLASTV = LASTV - END IF - END IF - T( I, I ) = TAU( I ) - END IF +* +* T_{1,2} = T_{1,2}*V_{2,2} +* + CALL DTRMM('Right', 'Lower', 'No transpose', 'Unit', L, + $ K-L, ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) + +* +* T_{1,2} = V_{3,1}'*V_{3,2} + T_{1,2} +* Note: We assume K <= N, and GEMM will do nothing if N=K +* + CALL DGEMM('Transpose', 'No transpose', L, K-L, N-K, ONE, + $ V(K+1, 1), LDV, V(K+1, L+1), LDV, ONE, + $ T(1, L+1), LDT) +* +* At this point, we have that T_{1,2} = V_1'*V_2 +* All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} +* respectively. +* +* T_{1,2} = -T_{1,1}*T_{1,2} +* + CALL DTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, + $ K-L, NEG_ONE, T, LDT, T(1, L+1), LDT) +* +* T_{1,2} = T_{1,2}*T_{2,2} +* + CALL DTRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, + $ K-L, ONE, T(L+1, L+1), LDT, T(1, L+1), LDT) + + ELSE IF(LQ) THEN +* +* Break V apart into 6 components +* +* V = |----------------------| +* |V_{1,1} V_{1,2} V{1,3}| +* |0 V_{2,2} V{2,3}| +* |----------------------| +* +* V_{1,1}\in\R^{l,l} unit upper triangular +* V_{1,2}\in\R^{l,k-l} rectangular +* V_{1,3}\in\R^{l,n-k} rectangular +* +* V_{2,2}\in\R^{k-l,k-l} unit upper triangular +* V_{2,3}\in\R^{k-l,n-k} rectangular +* +* Where l = floor(k/2) +* +* We will construct the T matrix +* T = |---------------| +* |T_{1,1} T_{1,2}| +* |0 T_{2,2}| +* |---------------| +* +* T is the triangular factor obtained from block reflectors. +* To motivate the structure, assume we have already computed T_{1,1} +* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 +* +* T_{1,1}\in\R^{l, l} upper triangular +* T_{2,2}\in\R^{k-l, k-l} upper triangular +* T_{1,2}\in\R^{l, k-l} rectangular +* +* Then, consider the product: +* +* (I - V_1'*T_{1,1}*V_1)*(I - V_2'*T_{2,2}*V_2) +* = I - V_1'*T_{1,1}*V_1 - V_2'*T_{2,2}*V_2 + V_1'*T_{1,1}*V_1*V_2'*T_{2,2}*V_2 +* +* Define T_{1,2} = -T_{1,1}*V_1*V_2'*T_{2,2} +* +* Then, we can define the matrix V as +* V = |---| +* |V_1| +* |V_2| +* |---| +* +* So, our product is equivalent to the matrix product +* I - V'*T*V +* This means, we can compute T_{1,1} and T_{2,2}, then use this information +* to compute T_{1,2} +* +* Compute T_{1,1} recursively +* + CALL DLARFT(DIRECT, STOREV, N, L, V, LDV, TAU, T, LDT) +* +* Compute T_{2,2} recursively +* + CALL DLARFT(DIRECT, STOREV, N-L, K-L, V(L+1, L+1), LDV, + $ TAU(L+1), T(L+1, L+1), LDT) + +* +* Compute T_{1,2} +* T_{1,2} = V_{1,2} +* + CALL DLACPY('All', L, K-L, V(1, L+1), LDV, T(1, L+1), LDT) +* +* T_{1,2} = T_{1,2}*V_{2,2}' +* + CALL DTRMM('Right', 'Upper', 'Transpose', 'Unit', L, K-L, + $ ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) + +* +* T_{1,2} = V_{1,3}*V_{2,3}' + T_{1,2} +* Note: We assume K <= N, and GEMM will do nothing if N=K +* + CALL DGEMM('No transpose', 'Transpose', L, K-L, N-K, ONE, + $ V(1, K+1), LDV, V(L+1, K+1), LDV, ONE, + $ T(1, L+1), LDT) +* +* At this point, we have that T_{1,2} = V_1*V_2' +* All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} +* respectively. +* +* T_{1,2} = -T_{1,1}*T_{1,2} +* + CALL DTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, + $ K-L, NEG_ONE, T, LDT, T(1, L+1), LDT) + +* +* T_{1,2} = T_{1,2}*T_{2,2} +* + CALL DTRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, + $ K-L, ONE, T(L+1, L+1), LDT, T(1, L+1), LDT) + ELSE IF(QL) THEN +* +* Break V apart into 6 components +* +* V = |---------------| +* |V_{1,1} V_{1,2}| +* |V_{2,1} V_{2,2}| +* |0 V_{3,2}| +* |---------------| +* +* V_{1,1}\in\R^{n-k,k-l} rectangular +* V_{2,1}\in\R^{k-l,k-l} unit upper triangular +* +* V_{1,2}\in\R^{n-k,l} rectangular +* V_{2,2}\in\R^{k-l,l} rectangular +* V_{3,2}\in\R^{l,l} unit upper triangular +* +* We will construct the T matrix +* T = |---------------| +* |T_{1,1} 0 | +* |T_{2,1} T_{2,2}| +* |---------------| +* +* T is the triangular factor obtained from block reflectors. +* To motivate the structure, assume we have already computed T_{1,1} +* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 +* +* T_{1,1}\in\R^{k-l, k-l} non-unit lower triangular +* T_{2,2}\in\R^{l, l} non-unit lower triangular +* T_{2,1}\in\R^{k-l, l} rectangular +* +* Where l = floor(k/2) +* +* Then, consider the product: +* +* (I - V_2*T_{2,2}*V_2')*(I - V_1*T_{1,1}*V_1') +* = I - V_2*T_{2,2}*V_2' - V_1*T_{1,1}*V_1' + V_2*T_{2,2}*V_2'*V_1*T_{1,1}*V_1' +* +* Define T_{2,1} = -T_{2,2}*V_2'*V_1*T_{1,1} +* +* Then, we can define the matrix V as +* V = |-------| +* |V_1 V_2| +* |-------| +* +* So, our product is equivalent to the matrix product +* I - V*T*V' +* This means, we can compute T_{1,1} and T_{2,2}, then use this information +* to compute T_{2,1} +* +* Compute T_{1,1} recursively +* + CALL DLARFT(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, T, LDT) +* +* Compute T_{2,2} recursively +* + CALL DLARFT(DIRECT, STOREV, N, L, V(1, K-L+1), LDV, + $ TAU(K-L+1), T(K-L+1, K-L+1), LDT) +* +* Compute T_{2,1} +* T_{2,1} = V_{2,2}' +* + DO J = 1, K-L + DO I = 1, L + T(K-L+I, J) = V(N-K+J, K-L+I) + END DO END DO - END IF - RETURN * -* End of DLARFT +* T_{2,1} = T_{2,1}*V_{2,1} +* + CALL DTRMM('Right', 'Upper', 'No transpose', 'Unit', L, + $ K-L, ONE, V(N-K+1, 1), LDV, T(K-L+1, 1), LDT) + +* +* T_{2,1} = V_{2,2}'*V_{2,1} + T_{2,1} +* Note: We assume K <= N, and GEMM will do nothing if N=K +* + CALL DGEMM('Transpose', 'No transpose', L, K-L, N-K, ONE, + $ V(1, K-L+1), LDV, V, LDV, ONE, T(K-L+1, 1), + $ LDT) +* +* At this point, we have that T_{2,1} = V_2'*V_1 +* All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} +* respectively. +* +* T_{2,1} = -T_{2,2}*T_{2,1} +* + CALL DTRMM('Left', 'Lower', 'No transpose', 'Non-unit', L, + $ K-L, NEG_ONE, T(K-L+1, K-L+1), LDT, + $ T(K-L+1, 1), LDT) * - END +* T_{2,1} = T_{2,1}*T_{1,1} +* + CALL DTRMM('Right', 'Lower', 'No transpose', 'Non-unit', L, + $ K-L, ONE, T, LDT, T(K-L+1, 1), LDT) + ELSE +* +* Else means RQ case +* +* Break V apart into 6 components +* +* V = |-----------------------| +* |V_{1,1} V_{1,2} 0 | +* |V_{2,1} V_{2,2} V_{2,3}| +* |-----------------------| +* +* V_{1,1}\in\R^{k-l,n-k} rectangular +* V_{1,2}\in\R^{k-l,k-l} unit lower triangular +* +* V_{2,1}\in\R^{l,n-k} rectangular +* V_{2,2}\in\R^{l,k-l} rectangular +* V_{2,3}\in\R^{l,l} unit lower triangular +* +* We will construct the T matrix +* T = |---------------| +* |T_{1,1} 0 | +* |T_{2,1} T_{2,2}| +* |---------------| +* +* T is the triangular factor obtained from block reflectors. +* To motivate the structure, assume we have already computed T_{1,1} +* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 +* +* T_{1,1}\in\R^{k-l, k-l} non-unit lower triangular +* T_{2,2}\in\R^{l, l} non-unit lower triangular +* T_{2,1}\in\R^{k-l, l} rectangular +* +* Where l = floor(k/2) +* +* Then, consider the product: +* +* (I - V_2'*T_{2,2}*V_2)*(I - V_1'*T_{1,1}*V_1) +* = I - V_2'*T_{2,2}*V_2 - V_1'*T_{1,1}*V_1 + V_2'*T_{2,2}*V_2*V_1'*T_{1,1}*V_1 +* +* Define T_{2,1} = -T_{2,2}*V_2*V_1'*T_{1,1} +* +* Then, we can define the matrix V as +* V = |---| +* |V_1| +* |V_2| +* |---| +* +* So, our product is equivalent to the matrix product +* I - V'*T*V +* This means, we can compute T_{1,1} and T_{2,2}, then use this information +* to compute T_{2,1} +* +* Compute T_{1,1} recursively +* + CALL DLARFT(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, T, LDT) +* +* Compute T_{2,2} recursively +* + CALL DLARFT(DIRECT, STOREV, N, L, V(K-L+1, 1), LDV, + $ TAU(K-L+1), T(K-L+1, K-L+1), LDT) +* +* Compute T_{2,1} +* T_{2,1} = V_{2,2} +* + CALL DLACPY('All', L, K-L, V(K-L+1, N-K+1), LDV, + $ T(K-L+1, 1), LDT) + +* +* T_{2,1} = T_{2,1}*V_{1,2}' +* + CALL DTRMM('Right', 'Lower', 'Transpose', 'Unit', L, K-L, + $ ONE, V(1, N-K+1), LDV, T(K-L+1, 1), LDT) + +* +* T_{2,1} = V_{2,1}*V_{1,1}' + T_{2,1} +* Note: We assume K <= N, and GEMM will do nothing if N=K +* + CALL DGEMM('No transpose', 'Transpose', L, K-L, N-K, ONE, + $ V(K-L+1, 1), LDV, V, LDV, ONE, T(K-L+1, 1), + $ LDT) + +* +* At this point, we have that T_{2,1} = V_2*V_1' +* All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} +* respectively. +* +* T_{2,1} = -T_{2,2}*T_{2,1} +* + CALL DTRMM('Left', 'Lower', 'No tranpose', 'Non-unit', L, + $ K-L, NEG_ONE, T(K-L+1, K-L+1), LDT, + $ T(K-L+1, 1), LDT) + +* +* T_{2,1} = T_{2,1}*T_{1,1} +* + CALL DTRMM('Right', 'Lower', 'No tranpose', 'Non-unit', L, + $ K-L, ONE, T, LDT, T(K-L+1, 1), LDT) + END IF + END SUBROUTINE diff --git a/lapack-netlib/SRC/slarft.f b/lapack-netlib/SRC/slarft.f index 9cfe0ad3f9..ad3a4d924c 100644 --- a/lapack-netlib/SRC/slarft.f +++ b/lapack-netlib/SRC/slarft.f @@ -18,7 +18,7 @@ * Definition: * =========== * -* SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) * * .. Scalar Arguments .. * CHARACTER DIRECT, STOREV @@ -127,10 +127,10 @@ * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver +*> \author Johnathan Rhyne, Univ. of Colorado Denver (original author, 2024) *> \author NAG Ltd. * -*> \ingroup realOTHERauxiliary +*> \ingroup larft * *> \par Further Details: * ===================== @@ -159,165 +159,470 @@ *> \endverbatim *> * ===================================================================== - SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) + RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, + $ TAU, T, LDT ) * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * -* .. Scalar Arguments .. +* .. Scalar Arguments +* CHARACTER DIRECT, STOREV INTEGER K, LDT, LDV, N * .. * .. Array Arguments .. +* REAL T( LDT, * ), TAU( * ), V( LDV, * ) * .. * -* ===================================================================== -* * .. Parameters .. - REAL ONE, ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) -* .. +* + REAL ONE, NEG_ONE, ZERO + PARAMETER(ONE=1.0E+0, ZERO = 0.0E+0, NEG_ONE=-1.0E+0) +* * .. Local Scalars .. - INTEGER I, J, PREVLASTV, LASTV -* .. +* + INTEGER I,J,L + LOGICAL QR,LQ,QL,DIRF,COLV +* * .. External Subroutines .. - EXTERNAL SGEMV, STRMV -* .. -* .. External Functions .. +* + EXTERNAL STRMM,SGEMM,SLACPY +* +* .. External Functions.. +* LOGICAL LSAME EXTERNAL LSAME +* +* The general scheme used is inspired by the approach inside DGEQRT3 +* which was (at the time of writing this code): +* Based on the algorithm of Elmroth and Gustavson, +* IBM J. Res. Develop. Vol 44 No. 4 July 2000. * .. * .. Executable Statements .. * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN -* - IF( LSAME( DIRECT, 'F' ) ) THEN - PREVLASTV = N - DO I = 1, K - PREVLASTV = MAX( I, PREVLASTV ) - IF( TAU( I ).EQ.ZERO ) THEN -* -* H(i) = I -* - DO J = 1, I - T( J, I ) = ZERO - END DO - ELSE -* -* general case -* - IF( LSAME( STOREV, 'C' ) ) THEN -* Skip any trailing zeros. - DO LASTV = N, I+1, -1 - IF( V( LASTV, I ).NE.ZERO ) EXIT - END DO - DO J = 1, I-1 - T( J, I ) = -TAU( I ) * V( I , J ) - END DO - J = MIN( LASTV, PREVLASTV ) -* -* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i) -* - CALL SGEMV( 'Transpose', J-I, I-1, -TAU( I ), - $ V( I+1, 1 ), LDV, V( I+1, I ), 1, ONE, - $ T( 1, I ), 1 ) - ELSE -* Skip any trailing zeros. - DO LASTV = N, I+1, -1 - IF( V( I, LASTV ).NE.ZERO ) EXIT - END DO - DO J = 1, I-1 - T( J, I ) = -TAU( I ) * V( J , I ) - END DO - J = MIN( LASTV, PREVLASTV ) -* -* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T -* - CALL SGEMV( 'No transpose', I-1, J-I, -TAU( I ), - $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, - $ ONE, T( 1, I ), 1 ) - END IF -* -* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) -* - CALL STRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, - $ LDT, T( 1, I ), 1 ) - T( I, I ) = TAU( I ) - IF( I.GT.1 ) THEN - PREVLASTV = MAX( PREVLASTV, LASTV ) - ELSE - PREVLASTV = LASTV - END IF - END IF + IF(N.EQ.0.OR.K.EQ.0) THEN + RETURN + END IF +* +* Base case +* + IF(N.EQ.1.OR.K.EQ.1) THEN + T(1,1) = TAU(1) + RETURN + END IF +* +* Beginning of executable statements +* + L = K / 2 +* +* Determine what kind of Q we need to compute +* We assume that if the user doesn't provide 'F' for DIRECT, +* then they meant to provide 'B' and if they don't provide +* 'C' for STOREV, then they meant to provide 'R' +* + DIRF = LSAME(DIRECT,'F') + COLV = LSAME(STOREV,'C') +* +* QR happens when we have forward direction in column storage +* + QR = DIRF.AND.COLV +* +* LQ happens when we have forward direction in row storage +* + LQ = DIRF.AND.(.NOT.COLV) +* +* QL happens when we have backward direction in column storage +* + QL = (.NOT.DIRF).AND.COLV +* +* The last case is RQ. Due to how we structured this, if the +* above 3 are false, then RQ must be true, so we never store +* this +* RQ happens when we have backward direction in row storage +* RQ = (.NOT.DIRF).AND.(.NOT.COLV) +* + IF(QR) THEN +* +* Break V apart into 6 components +* +* V = |---------------| +* |V_{1,1} 0 | +* |V_{2,1} V_{2,2}| +* |V_{3,1} V_{3,2}| +* |---------------| +* +* V_{1,1}\in\R^{l,l} unit lower triangular +* V_{2,1}\in\R^{k-l,l} rectangular +* V_{3,1}\in\R^{n-k,l} rectangular +* +* V_{2,2}\in\R^{k-l,k-l} unit lower triangular +* V_{3,2}\in\R^{n-k,k-l} rectangular +* +* We will construct the T matrix +* T = |---------------| +* |T_{1,1} T_{1,2}| +* |0 T_{2,2}| +* |---------------| +* +* T is the triangular factor obtained from block reflectors. +* To motivate the structure, assume we have already computed T_{1,1} +* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 +* +* T_{1,1}\in\R^{l, l} upper triangular +* T_{2,2}\in\R^{k-l, k-l} upper triangular +* T_{1,2}\in\R^{l, k-l} rectangular +* +* Where l = floor(k/2) +* +* Then, consider the product: +* +* (I - V_1*T_{1,1}*V_1')*(I - V_2*T_{2,2}*V_2') +* = I - V_1*T_{1,1}*V_1' - V_2*T_{2,2}*V_2' + V_1*T_{1,1}*V_1'*V_2*T_{2,2}*V_2' +* +* Define T_{1,2} = -T_{1,1}*V_1'*V_2*T_{2,2} +* +* Then, we can define the matrix V as +* V = |-------| +* |V_1 V_2| +* |-------| +* +* So, our product is equivalent to the matrix product +* I - V*T*V' +* This means, we can compute T_{1,1} and T_{2,2}, then use this information +* to compute T_{1,2} +* +* Compute T_{1,1} recursively +* + CALL SLARFT(DIRECT, STOREV, N, L, V, LDV, TAU, T, LDT) +* +* Compute T_{2,2} recursively +* + CALL SLARFT(DIRECT, STOREV, N-L, K-L, V(L+1, L+1), LDV, + $ TAU(L+1), T(L+1, L+1), LDT) +* +* Compute T_{1,2} +* T_{1,2} = V_{2,1}' +* + DO J = 1, L + DO I = 1, K-L + T(J, L+I) = V(L+I, J) + END DO END DO - ELSE - PREVLASTV = 1 - DO I = K, 1, -1 - IF( TAU( I ).EQ.ZERO ) THEN -* -* H(i) = I -* - DO J = I, K - T( J, I ) = ZERO - END DO - ELSE -* -* general case -* - IF( I.LT.K ) THEN - IF( LSAME( STOREV, 'C' ) ) THEN -* Skip any leading zeros. - DO LASTV = 1, I-1 - IF( V( LASTV, I ).NE.ZERO ) EXIT - END DO - DO J = I+1, K - T( J, I ) = -TAU( I ) * V( N-K+I , J ) - END DO - J = MAX( LASTV, PREVLASTV ) -* -* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i) -* - CALL SGEMV( 'Transpose', N-K+I-J, K-I, -TAU( I ), - $ V( J, I+1 ), LDV, V( J, I ), 1, ONE, - $ T( I+1, I ), 1 ) - ELSE -* Skip any leading zeros. - DO LASTV = 1, I-1 - IF( V( I, LASTV ).NE.ZERO ) EXIT - END DO - DO J = I+1, K - T( J, I ) = -TAU( I ) * V( J, N-K+I ) - END DO - J = MAX( LASTV, PREVLASTV ) -* -* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T -* - CALL SGEMV( 'No transpose', K-I, N-K+I-J, - $ -TAU( I ), V( I+1, J ), LDV, V( I, J ), LDV, - $ ONE, T( I+1, I ), 1 ) - END IF -* -* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) -* - CALL STRMV( 'Lower', 'No transpose', 'Non-unit', K-I, - $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) - IF( I.GT.1 ) THEN - PREVLASTV = MIN( PREVLASTV, LASTV ) - ELSE - PREVLASTV = LASTV - END IF - END IF - T( I, I ) = TAU( I ) - END IF +* +* T_{1,2} = T_{1,2}*V_{2,2} +* + CALL STRMM('Right', 'Lower', 'No transpose', 'Unit', L, + $ K-L, ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) + +* +* T_{1,2} = V_{3,1}'*V_{3,2} + T_{1,2} +* Note: We assume K <= N, and GEMM will do nothing if N=K +* + CALL SGEMM('Transpose', 'No transpose', L, K-L, N-K, ONE, + $ V(K+1, 1), LDV, V(K+1, L+1), LDV, ONE, + $ T(1, L+1), LDT) +* +* At this point, we have that T_{1,2} = V_1'*V_2 +* All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} +* respectively. +* +* T_{1,2} = -T_{1,1}*T_{1,2} +* + CALL STRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, + $ K-L, NEG_ONE, T, LDT, T(1, L+1), LDT) +* +* T_{1,2} = T_{1,2}*T_{2,2} +* + CALL STRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, + $ K-L, ONE, T(L+1, L+1), LDT, T(1, L+1), LDT) + + ELSE IF(LQ) THEN +* +* Break V apart into 6 components +* +* V = |----------------------| +* |V_{1,1} V_{1,2} V{1,3}| +* |0 V_{2,2} V{2,3}| +* |----------------------| +* +* V_{1,1}\in\R^{l,l} unit upper triangular +* V_{1,2}\in\R^{l,k-l} rectangular +* V_{1,3}\in\R^{l,n-k} rectangular +* +* V_{2,2}\in\R^{k-l,k-l} unit upper triangular +* V_{2,3}\in\R^{k-l,n-k} rectangular +* +* Where l = floor(k/2) +* +* We will construct the T matrix +* T = |---------------| +* |T_{1,1} T_{1,2}| +* |0 T_{2,2}| +* |---------------| +* +* T is the triangular factor obtained from block reflectors. +* To motivate the structure, assume we have already computed T_{1,1} +* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 +* +* T_{1,1}\in\R^{l, l} upper triangular +* T_{2,2}\in\R^{k-l, k-l} upper triangular +* T_{1,2}\in\R^{l, k-l} rectangular +* +* Then, consider the product: +* +* (I - V_1'*T_{1,1}*V_1)*(I - V_2'*T_{2,2}*V_2) +* = I - V_1'*T_{1,1}*V_1 - V_2'*T_{2,2}*V_2 + V_1'*T_{1,1}*V_1*V_2'*T_{2,2}*V_2 +* +* Define T_{1,2} = -T_{1,1}*V_1*V_2'*T_{2,2} +* +* Then, we can define the matrix V as +* V = |---| +* |V_1| +* |V_2| +* |---| +* +* So, our product is equivalent to the matrix product +* I - V'*T*V +* This means, we can compute T_{1,1} and T_{2,2}, then use this information +* to compute T_{1,2} +* +* Compute T_{1,1} recursively +* + CALL SLARFT(DIRECT, STOREV, N, L, V, LDV, TAU, T, LDT) +* +* Compute T_{2,2} recursively +* + CALL SLARFT(DIRECT, STOREV, N-L, K-L, V(L+1, L+1), LDV, + $ TAU(L+1), T(L+1, L+1), LDT) + +* +* Compute T_{1,2} +* T_{1,2} = V_{1,2} +* + CALL SLACPY('All', L, K-L, V(1, L+1), LDV, T(1, L+1), LDT) +* +* T_{1,2} = T_{1,2}*V_{2,2}' +* + CALL STRMM('Right', 'Upper', 'Transpose', 'Unit', L, K-L, + $ ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) + +* +* T_{1,2} = V_{1,3}*V_{2,3}' + T_{1,2} +* Note: We assume K <= N, and GEMM will do nothing if N=K +* + CALL SGEMM('No transpose', 'Transpose', L, K-L, N-K, ONE, + $ V(1, K+1), LDV, V(L+1, K+1), LDV, ONE, + $ T(1, L+1), LDT) +* +* At this point, we have that T_{1,2} = V_1*V_2' +* All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} +* respectively. +* +* T_{1,2} = -T_{1,1}*T_{1,2} +* + CALL STRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, + $ K-L, NEG_ONE, T, LDT, T(1, L+1), LDT) + +* +* T_{1,2} = T_{1,2}*T_{2,2} +* + CALL STRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, + $ K-L, ONE, T(L+1, L+1), LDT, T(1, L+1), LDT) + ELSE IF(QL) THEN +* +* Break V apart into 6 components +* +* V = |---------------| +* |V_{1,1} V_{1,2}| +* |V_{2,1} V_{2,2}| +* |0 V_{3,2}| +* |---------------| +* +* V_{1,1}\in\R^{n-k,k-l} rectangular +* V_{2,1}\in\R^{k-l,k-l} unit upper triangular +* +* V_{1,2}\in\R^{n-k,l} rectangular +* V_{2,2}\in\R^{k-l,l} rectangular +* V_{3,2}\in\R^{l,l} unit upper triangular +* +* We will construct the T matrix +* T = |---------------| +* |T_{1,1} 0 | +* |T_{2,1} T_{2,2}| +* |---------------| +* +* T is the triangular factor obtained from block reflectors. +* To motivate the structure, assume we have already computed T_{1,1} +* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 +* +* T_{1,1}\in\R^{k-l, k-l} non-unit lower triangular +* T_{2,2}\in\R^{l, l} non-unit lower triangular +* T_{2,1}\in\R^{k-l, l} rectangular +* +* Where l = floor(k/2) +* +* Then, consider the product: +* +* (I - V_2*T_{2,2}*V_2')*(I - V_1*T_{1,1}*V_1') +* = I - V_2*T_{2,2}*V_2' - V_1*T_{1,1}*V_1' + V_2*T_{2,2}*V_2'*V_1*T_{1,1}*V_1' +* +* Define T_{2,1} = -T_{2,2}*V_2'*V_1*T_{1,1} +* +* Then, we can define the matrix V as +* V = |-------| +* |V_1 V_2| +* |-------| +* +* So, our product is equivalent to the matrix product +* I - V*T*V' +* This means, we can compute T_{1,1} and T_{2,2}, then use this information +* to compute T_{2,1} +* +* Compute T_{1,1} recursively +* + CALL SLARFT(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, T, LDT) +* +* Compute T_{2,2} recursively +* + CALL SLARFT(DIRECT, STOREV, N, L, V(1, K-L+1), LDV, + $ TAU(K-L+1), T(K-L+1, K-L+1), LDT) +* +* Compute T_{2,1} +* T_{2,1} = V_{2,2}' +* + DO J = 1, K-L + DO I = 1, L + T(K-L+I, J) = V(N-K+J, K-L+I) + END DO END DO - END IF - RETURN * -* End of SLARFT +* T_{2,1} = T_{2,1}*V_{2,1} +* + CALL STRMM('Right', 'Upper', 'No transpose', 'Unit', L, + $ K-L, ONE, V(N-K+1, 1), LDV, T(K-L+1, 1), LDT) + +* +* T_{2,1} = V_{2,2}'*V_{2,1} + T_{2,1} +* Note: We assume K <= N, and GEMM will do nothing if N=K +* + CALL SGEMM('Transpose', 'No transpose', L, K-L, N-K, ONE, + $ V(1, K-L+1), LDV, V, LDV, ONE, T(K-L+1, 1), + $ LDT) +* +* At this point, we have that T_{2,1} = V_2'*V_1 +* All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} +* respectively. +* +* T_{2,1} = -T_{2,2}*T_{2,1} +* + CALL STRMM('Left', 'Lower', 'No transpose', 'Non-unit', L, + $ K-L, NEG_ONE, T(K-L+1, K-L+1), LDT, + $ T(K-L+1, 1), LDT) * - END +* T_{2,1} = T_{2,1}*T_{1,1} +* + CALL STRMM('Right', 'Lower', 'No transpose', 'Non-unit', L, + $ K-L, ONE, T, LDT, T(K-L+1, 1), LDT) + ELSE +* +* Else means RQ case +* +* Break V apart into 6 components +* +* V = |-----------------------| +* |V_{1,1} V_{1,2} 0 | +* |V_{2,1} V_{2,2} V_{2,3}| +* |-----------------------| +* +* V_{1,1}\in\R^{k-l,n-k} rectangular +* V_{1,2}\in\R^{k-l,k-l} unit lower triangular +* +* V_{2,1}\in\R^{l,n-k} rectangular +* V_{2,2}\in\R^{l,k-l} rectangular +* V_{2,3}\in\R^{l,l} unit lower triangular +* +* We will construct the T matrix +* T = |---------------| +* |T_{1,1} 0 | +* |T_{2,1} T_{2,2}| +* |---------------| +* +* T is the triangular factor obtained from block reflectors. +* To motivate the structure, assume we have already computed T_{1,1} +* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 +* +* T_{1,1}\in\R^{k-l, k-l} non-unit lower triangular +* T_{2,2}\in\R^{l, l} non-unit lower triangular +* T_{2,1}\in\R^{k-l, l} rectangular +* +* Where l = floor(k/2) +* +* Then, consider the product: +* +* (I - V_2'*T_{2,2}*V_2)*(I - V_1'*T_{1,1}*V_1) +* = I - V_2'*T_{2,2}*V_2 - V_1'*T_{1,1}*V_1 + V_2'*T_{2,2}*V_2*V_1'*T_{1,1}*V_1 +* +* Define T_{2,1} = -T_{2,2}*V_2*V_1'*T_{1,1} +* +* Then, we can define the matrix V as +* V = |---| +* |V_1| +* |V_2| +* |---| +* +* So, our product is equivalent to the matrix product +* I - V'TV +* This means, we can compute T_{1,1} and T_{2,2}, then use this information +* to compute T_{2,1} +* +* Compute T_{1,1} recursively +* + CALL SLARFT(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, T, LDT) +* +* Compute T_{2,2} recursively +* + CALL SLARFT(DIRECT, STOREV, N, L, V(K-L+1, 1), LDV, + $ TAU(K-L+1), T(K-L+1, K-L+1), LDT) +* +* Compute T_{2,1} +* T_{2,1} = V_{2,2} +* + CALL SLACPY('All', L, K-L, V(K-L+1, N-K+1), LDV, + $ T(K-L+1, 1), LDT) + +* +* T_{2,1} = T_{2,1}*V_{1,2}' +* + CALL STRMM('Right', 'Lower', 'Transpose', 'Unit', L, K-L, + $ ONE, V(1, N-K+1), LDV, T(K-L+1, 1), LDT) + +* +* T_{2,1} = V_{2,1}*V_{1,1}' + T_{2,1} +* Note: We assume K <= N, and GEMM will do nothing if N=K +* + CALL SGEMM('No transpose', 'Transpose', L, K-L, N-K, ONE, + $ V(K-L+1, 1), LDV, V, LDV, ONE, T(K-L+1, 1), + $ LDT) + +* +* At this point, we have that T_{2,1} = V_2*V_1' +* All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} +* respectively. +* +* T_{2,1} = -T_{2,2}*T_{2,1} +* + CALL STRMM('Left', 'Lower', 'No tranpose', 'Non-unit', L, + $ K-L, NEG_ONE, T(K-L+1, K-L+1), LDT, + $ T(K-L+1, 1), LDT) + +* +* T_{2,1} = T_{2,1}*T_{1,1} +* + CALL STRMM('Right', 'Lower', 'No tranpose', 'Non-unit', L, + $ K-L, ONE, T, LDT, T(K-L+1, 1), LDT) + END IF + END SUBROUTINE diff --git a/lapack-netlib/SRC/zlarft.f b/lapack-netlib/SRC/zlarft.f index 5ad0996fab..900795afad 100644 --- a/lapack-netlib/SRC/zlarft.f +++ b/lapack-netlib/SRC/zlarft.f @@ -18,7 +18,7 @@ * Definition: * =========== * -* SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) * * .. Scalar Arguments .. * CHARACTER DIRECT, STOREV @@ -130,7 +130,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHERauxiliary +*> \ingroup larft * *> \par Further Details: * ===================== @@ -159,166 +159,474 @@ *> \endverbatim *> * ===================================================================== - SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) + RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, + $ TAU, T, LDT ) * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * -* .. Scalar Arguments .. - CHARACTER DIRECT, STOREV - INTEGER K, LDT, LDV, N +* .. Scalar Arguments +* + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N * .. * .. Array Arguments .. - COMPLEX*16 T( LDT, * ), TAU( * ), V( LDV, * ) -* .. * -* ===================================================================== + COMPLEX*16 T( LDT, * ), TAU( * ), V( LDV, * ) +* .. * * .. Parameters .. - COMPLEX*16 ONE, ZERO - PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), - $ ZERO = ( 0.0D+0, 0.0D+0 ) ) -* .. +* + COMPLEX*16 ONE, NEG_ONE, ZERO + PARAMETER(ONE=1.0D+0, ZERO = 0.0D+0, NEG_ONE=-1.0D+0) +* * .. Local Scalars .. - INTEGER I, J, PREVLASTV, LASTV -* .. +* + INTEGER I,J,L + LOGICAL QR,LQ,QL,DIRF,COLV +* * .. External Subroutines .. - EXTERNAL ZGEMV, ZTRMV, ZGEMM -* .. -* .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME +* + EXTERNAL ZTRMM,ZGEMM,ZLACPY +* +* .. External Functions.. +* + LOGICAL LSAME + EXTERNAL LSAME +* +* .. Intrinsic Functions.. +* + INTRINSIC CONJG +* +* The general scheme used is inspired by the approach inside DGEQRT3 +* which was (at the time of writing this code): +* Based on the algorithm of Elmroth and Gustavson, +* IBM J. Res. Develop. Vol 44 No. 4 July 2000. * .. * .. Executable Statements .. * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN -* - IF( LSAME( DIRECT, 'F' ) ) THEN - PREVLASTV = N - DO I = 1, K - PREVLASTV = MAX( PREVLASTV, I ) - IF( TAU( I ).EQ.ZERO ) THEN -* -* H(i) = I -* - DO J = 1, I - T( J, I ) = ZERO - END DO - ELSE -* -* general case -* - IF( LSAME( STOREV, 'C' ) ) THEN -* Skip any trailing zeros. - DO LASTV = N, I+1, -1 - IF( V( LASTV, I ).NE.ZERO ) EXIT - END DO - DO J = 1, I-1 - T( J, I ) = -TAU( I ) * CONJG( V( I , J ) ) - END DO - J = MIN( LASTV, PREVLASTV ) -* -* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i) -* - CALL ZGEMV( 'Conjugate transpose', J-I, I-1, - $ -TAU( I ), V( I+1, 1 ), LDV, - $ V( I+1, I ), 1, ONE, T( 1, I ), 1 ) - ELSE -* Skip any trailing zeros. - DO LASTV = N, I+1, -1 - IF( V( I, LASTV ).NE.ZERO ) EXIT - END DO - DO J = 1, I-1 - T( J, I ) = -TAU( I ) * V( J , I ) - END DO - J = MIN( LASTV, PREVLASTV ) -* -* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H -* - CALL ZGEMM( 'N', 'C', I-1, 1, J-I, -TAU( I ), - $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, - $ ONE, T( 1, I ), LDT ) - END IF -* -* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) -* - CALL ZTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, - $ LDT, T( 1, I ), 1 ) - T( I, I ) = TAU( I ) - IF( I.GT.1 ) THEN - PREVLASTV = MAX( PREVLASTV, LASTV ) - ELSE - PREVLASTV = LASTV - END IF - END IF + IF(N.EQ.0.OR.K.EQ.0) THEN + RETURN + END IF +* +* Base case +* + IF(N.EQ.1.OR.K.EQ.1) THEN + T(1,1) = TAU(1) + RETURN + END IF +* +* Beginning of executable statements +* + L = K / 2 +* +* Determine what kind of Q we need to compute +* We assume that if the user doesn't provide 'F' for DIRECT, +* then they meant to provide 'B' and if they don't provide +* 'C' for STOREV, then they meant to provide 'R' +* + DIRF = LSAME(DIRECT,'F') + COLV = LSAME(STOREV,'C') +* +* QR happens when we have forward direction in column storage +* + QR = DIRF.AND.COLV +* +* LQ happens when we have forward direction in row storage +* + LQ = DIRF.AND.(.NOT.COLV) +* +* QL happens when we have backward direction in column storage +* + QL = (.NOT.DIRF).AND.COLV +* +* The last case is RQ. Due to how we structured this, if the +* above 3 are false, then RQ must be true, so we never store +* this +* RQ happens when we have backward direction in row storage +* RQ = (.NOT.DIRF).AND.(.NOT.COLV) +* + IF(QR) THEN +* +* Break V apart into 6 components +* +* V = |---------------| +* |V_{1,1} 0 | +* |V_{2,1} V_{2,2}| +* |V_{3,1} V_{3,2}| +* |---------------| +* +* V_{1,1}\in\C^{l,l} unit lower triangular +* V_{2,1}\in\C^{k-l,l} rectangular +* V_{3,1}\in\C^{n-k,l} rectangular +* +* V_{2,2}\in\C^{k-l,k-l} unit lower triangular +* V_{3,2}\in\C^{n-k,k-l} rectangular +* +* We will construct the T matrix +* T = |---------------| +* |T_{1,1} T_{1,2}| +* |0 T_{2,2}| +* |---------------| +* +* T is the triangular factor obtained from block reflectors. +* To motivate the structure, assume we have already computed T_{1,1} +* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 +* +* T_{1,1}\in\C^{l, l} upper triangular +* T_{2,2}\in\C^{k-l, k-l} upper triangular +* T_{1,2}\in\C^{l, k-l} rectangular +* +* Where l = floor(k/2) +* +* Then, consider the product: +* +* (I - V_1*T_{1,1}*V_1')*(I - V_2*T_{2,2}*V_2') +* = I - V_1*T_{1,1}*V_1' - V_2*T_{2,2}*V_2' + V_1*T_{1,1}*V_1'*V_2*T_{2,2}*V_2' +* +* Define T_{1,2} = -T_{1,1}*V_1'*V_2*T_{2,2} +* +* Then, we can define the matrix V as +* V = |-------| +* |V_1 V_2| +* |-------| +* +* So, our product is equivalent to the matrix product +* I - V*T*V' +* This means, we can compute T_{1,1} and T_{2,2}, then use this information +* to compute T_{1,2} +* +* Compute T_{1,1} recursively +* + CALL ZLARFT(DIRECT, STOREV, N, L, V, LDV, TAU, T, LDT) +* +* Compute T_{2,2} recursively +* + CALL ZLARFT(DIRECT, STOREV, N-L, K-L, V(L+1, L+1), LDV, + $ TAU(L+1), T(L+1, L+1), LDT) +* +* Compute T_{1,2} +* T_{1,2} = V_{2,1}' +* + DO J = 1, L + DO I = 1, K-L + T(J, L+I) = CONJG(V(L+I, J)) + END DO END DO - ELSE - PREVLASTV = 1 - DO I = K, 1, -1 - IF( TAU( I ).EQ.ZERO ) THEN -* -* H(i) = I -* - DO J = I, K - T( J, I ) = ZERO - END DO - ELSE -* -* general case -* - IF( I.LT.K ) THEN - IF( LSAME( STOREV, 'C' ) ) THEN -* Skip any leading zeros. - DO LASTV = 1, I-1 - IF( V( LASTV, I ).NE.ZERO ) EXIT - END DO - DO J = I+1, K - T( J, I ) = -TAU( I ) * CONJG( V( N-K+I , J ) ) - END DO - J = MAX( LASTV, PREVLASTV ) -* -* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i) -* - CALL ZGEMV( 'Conjugate transpose', N-K+I-J, K-I, - $ -TAU( I ), V( J, I+1 ), LDV, V( J, I ), - $ 1, ONE, T( I+1, I ), 1 ) - ELSE -* Skip any leading zeros. - DO LASTV = 1, I-1 - IF( V( I, LASTV ).NE.ZERO ) EXIT - END DO - DO J = I+1, K - T( J, I ) = -TAU( I ) * V( J, N-K+I ) - END DO - J = MAX( LASTV, PREVLASTV ) -* -* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H -* - CALL ZGEMM( 'N', 'C', K-I, 1, N-K+I-J, -TAU( I ), - $ V( I+1, J ), LDV, V( I, J ), LDV, - $ ONE, T( I+1, I ), LDT ) - END IF -* -* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) -* - CALL ZTRMV( 'Lower', 'No transpose', 'Non-unit', K-I, - $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) - IF( I.GT.1 ) THEN - PREVLASTV = MIN( PREVLASTV, LASTV ) - ELSE - PREVLASTV = LASTV - END IF - END IF - T( I, I ) = TAU( I ) - END IF +* +* T_{1,2} = T_{1,2}*V_{2,2} +* + CALL ZTRMM('Right', 'Lower', 'No transpose', 'Unit', L, + $ K-L, ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) + +* +* T_{1,2} = V_{3,1}'*V_{3,2} + T_{1,2} +* Note: We assume K <= N, and GEMM will do nothing if N=K +* + CALL ZGEMM('Conjugate', 'No transpose', L, K-L, N-K, ONE, + $ V(K+1, 1), LDV, V(K+1, L+1), LDV, ONE, + $ T(1, L+1), LDT) +* +* At this point, we have that T_{1,2} = V_1'*V_2 +* All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} +* respectively. +* +* T_{1,2} = -T_{1,1}*T_{1,2} +* + CALL ZTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, + $ K-L, NEG_ONE, T, LDT, T(1, L+1), LDT) +* +* T_{1,2} = T_{1,2}*T_{2,2} +* + CALL ZTRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, + $ K-L, ONE, T(L+1, L+1), LDT, T(1, L+1), LDT) + + ELSE IF(LQ) THEN +* +* Break V apart into 6 components +* +* V = |----------------------| +* |V_{1,1} V_{1,2} V{1,3}| +* |0 V_{2,2} V{2,3}| +* |----------------------| +* +* V_{1,1}\in\C^{l,l} unit upper triangular +* V_{1,2}\in\C^{l,k-l} rectangular +* V_{1,3}\in\C^{l,n-k} rectangular +* +* V_{2,2}\in\C^{k-l,k-l} unit upper triangular +* V_{2,3}\in\C^{k-l,n-k} rectangular +* +* Where l = floor(k/2) +* +* We will construct the T matrix +* T = |---------------| +* |T_{1,1} T_{1,2}| +* |0 T_{2,2}| +* |---------------| +* +* T is the triangular factor obtained from block reflectors. +* To motivate the structure, assume we have already computed T_{1,1} +* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 +* +* T_{1,1}\in\C^{l, l} upper triangular +* T_{2,2}\in\C^{k-l, k-l} upper triangular +* T_{1,2}\in\C^{l, k-l} rectangular +* +* Then, consider the product: +* +* (I - V_1'*T_{1,1}*V_1)*(I - V_2'*T_{2,2}*V_2) +* = I - V_1'*T_{1,1}*V_1 - V_2'*T_{2,2}*V_2 + V_1'*T_{1,1}*V_1*V_2'*T_{2,2}*V_2 +* +* Define T_{1,2} = -T_{1,1}*V_1*V_2'*T_{2,2} +* +* Then, we can define the matrix V as +* V = |---| +* |V_1| +* |V_2| +* |---| +* +* So, our product is equivalent to the matrix product +* I - V'*T*V +* This means, we can compute T_{1,1} and T_{2,2}, then use this information +* to compute T_{1,2} +* +* Compute T_{1,1} recursively +* + CALL ZLARFT(DIRECT, STOREV, N, L, V, LDV, TAU, T, LDT) +* +* Compute T_{2,2} recursively +* + CALL ZLARFT(DIRECT, STOREV, N-L, K-L, V(L+1, L+1), LDV, + $ TAU(L+1), T(L+1, L+1), LDT) + +* +* Compute T_{1,2} +* T_{1,2} = V_{1,2} +* + CALL ZLACPY('All', L, K-L, V(1, L+1), LDV, T(1, L+1), LDT) +* +* T_{1,2} = T_{1,2}*V_{2,2}' +* + CALL ZTRMM('Right', 'Upper', 'Conjugate', 'Unit', L, K-L, + $ ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) + +* +* T_{1,2} = V_{1,3}*V_{2,3}' + T_{1,2} +* Note: We assume K <= N, and GEMM will do nothing if N=K +* + CALL ZGEMM('No transpose', 'Conjugate', L, K-L, N-K, ONE, + $ V(1, K+1), LDV, V(L+1, K+1), LDV, ONE, + $ T(1, L+1), LDT) +* +* At this point, we have that T_{1,2} = V_1*V_2' +* All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} +* respectively. +* +* T_{1,2} = -T_{1,1}*T_{1,2} +* + CALL ZTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, + $ K-L, NEG_ONE, T, LDT, T(1, L+1), LDT) + +* +* T_{1,2} = T_{1,2}*T_{2,2} +* + CALL ZTRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, + $ K-L, ONE, T(L+1, L+1), LDT, T(1, L+1), LDT) + ELSE IF(QL) THEN +* +* Break V apart into 6 components +* +* V = |---------------| +* |V_{1,1} V_{1,2}| +* |V_{2,1} V_{2,2}| +* |0 V_{3,2}| +* |---------------| +* +* V_{1,1}\in\C^{n-k,k-l} rectangular +* V_{2,1}\in\C^{k-l,k-l} unit upper triangular +* +* V_{1,2}\in\C^{n-k,l} rectangular +* V_{2,2}\in\C^{k-l,l} rectangular +* V_{3,2}\in\C^{l,l} unit upper triangular +* +* We will construct the T matrix +* T = |---------------| +* |T_{1,1} 0 | +* |T_{2,1} T_{2,2}| +* |---------------| +* +* T is the triangular factor obtained from block reflectors. +* To motivate the structure, assume we have already computed T_{1,1} +* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 +* +* T_{1,1}\in\C^{k-l, k-l} non-unit lower triangular +* T_{2,2}\in\C^{l, l} non-unit lower triangular +* T_{2,1}\in\C^{k-l, l} rectangular +* +* Where l = floor(k/2) +* +* Then, consider the product: +* +* (I - V_2*T_{2,2}*V_2')*(I - V_1*T_{1,1}*V_1') +* = I - V_2*T_{2,2}*V_2' - V_1*T_{1,1}*V_1' + V_2*T_{2,2}*V_2'*V_1*T_{1,1}*V_1' +* +* Define T_{2,1} = -T_{2,2}*V_2'*V_1*T_{1,1} +* +* Then, we can define the matrix V as +* V = |-------| +* |V_1 V_2| +* |-------| +* +* So, our product is equivalent to the matrix product +* I - V*T*V' +* This means, we can compute T_{1,1} and T_{2,2}, then use this information +* to compute T_{2,1} +* +* Compute T_{1,1} recursively +* + CALL ZLARFT(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, T, LDT) +* +* Compute T_{2,2} recursively +* + CALL ZLARFT(DIRECT, STOREV, N, L, V(1, K-L+1), LDV, + $ TAU(K-L+1), T(K-L+1, K-L+1), LDT) +* +* Compute T_{2,1} +* T_{2,1} = V_{2,2}' +* + DO J = 1, K-L + DO I = 1, L + T(K-L+I, J) = CONJG(V(N-K+J, K-L+I)) + END DO END DO - END IF - RETURN * -* End of ZLARFT +* T_{2,1} = T_{2,1}*V_{2,1} +* + CALL ZTRMM('Right', 'Upper', 'No transpose', 'Unit', L, + $ K-L, ONE, V(N-K+1, 1), LDV, T(K-L+1, 1), LDT) + +* +* T_{2,1} = V_{2,2}'*V_{2,1} + T_{2,1} +* Note: We assume K <= N, and GEMM will do nothing if N=K +* + CALL ZGEMM('Conjugate', 'No transpose', L, K-L, N-K, ONE, + $ V(1, K-L+1), LDV, V, LDV, ONE, T(K-L+1, 1), + $ LDT) +* +* At this point, we have that T_{2,1} = V_2'*V_1 +* All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} +* respectively. +* +* T_{2,1} = -T_{2,2}*T_{2,1} +* + CALL ZTRMM('Left', 'Lower', 'No transpose', 'Non-unit', L, + $ K-L, NEG_ONE, T(K-L+1, K-L+1), LDT, + $ T(K-L+1, 1), LDT) * - END +* T_{2,1} = T_{2,1}*T_{1,1} +* + CALL ZTRMM('Right', 'Lower', 'No transpose', 'Non-unit', L, + $ K-L, ONE, T, LDT, T(K-L+1, 1), LDT) + ELSE +* +* Else means RQ case +* +* Break V apart into 6 components +* +* V = |-----------------------| +* |V_{1,1} V_{1,2} 0 | +* |V_{2,1} V_{2,2} V_{2,3}| +* |-----------------------| +* +* V_{1,1}\in\C^{k-l,n-k} rectangular +* V_{1,2}\in\C^{k-l,k-l} unit lower triangular +* +* V_{2,1}\in\C^{l,n-k} rectangular +* V_{2,2}\in\C^{l,k-l} rectangular +* V_{2,3}\in\C^{l,l} unit lower triangular +* +* We will construct the T matrix +* T = |---------------| +* |T_{1,1} 0 | +* |T_{2,1} T_{2,2}| +* |---------------| +* +* T is the triangular factor obtained from block reflectors. +* To motivate the structure, assume we have already computed T_{1,1} +* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 +* +* T_{1,1}\in\C^{k-l, k-l} non-unit lower triangular +* T_{2,2}\in\C^{l, l} non-unit lower triangular +* T_{2,1}\in\C^{k-l, l} rectangular +* +* Where l = floor(k/2) +* +* Then, consider the product: +* +* (I - V_2'*T_{2,2}*V_2)*(I - V_1'*T_{1,1}*V_1) +* = I - V_2'*T_{2,2}*V_2 - V_1'*T_{1,1}*V_1 + V_2'*T_{2,2}*V_2*V_1'*T_{1,1}*V_1 +* +* Define T_{2,1} = -T_{2,2}*V_2*V_1'*T_{1,1} +* +* Then, we can define the matrix V as +* V = |---| +* |V_1| +* |V_2| +* |---| +* +* So, our product is equivalent to the matrix product +* I - V'*T*V +* This means, we can compute T_{1,1} and T_{2,2}, then use this information +* to compute T_{2,1} +* +* Compute T_{1,1} recursively +* + CALL ZLARFT(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, T, LDT) +* +* Compute T_{2,2} recursively +* + CALL ZLARFT(DIRECT, STOREV, N, L, V(K-L+1, 1), LDV, + $ TAU(K-L+1), T(K-L+1, K-L+1), LDT) +* +* Compute T_{2,1} +* T_{2,1} = V_{2,2} +* + CALL ZLACPY('All', L, K-L, V(K-L+1, N-K+1), LDV, + $ T(K-L+1, 1), LDT) + +* +* T_{2,1} = T_{2,1}*V_{1,2}' +* + CALL ZTRMM('Right', 'Lower', 'Conjugate', 'Unit', L, K-L, + $ ONE, V(1, N-K+1), LDV, T(K-L+1, 1), LDT) + +* +* T_{2,1} = V_{2,1}*V_{1,1}' + T_{2,1} +* Note: We assume K <= N, and GEMM will do nothing if N=K +* + CALL ZGEMM('No transpose', 'Conjugate', L, K-L, N-K, ONE, + $ V(K-L+1, 1), LDV, V, LDV, ONE, T(K-L+1, 1), + $ LDT) + +* +* At this point, we have that T_{2,1} = V_2*V_1' +* All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} +* respectively. +* +* T_{2,1} = -T_{2,2}*T_{2,1} +* + CALL ZTRMM('Left', 'Lower', 'No tranpose', 'Non-unit', L, + $ K-L, NEG_ONE, T(K-L+1, K-L+1), LDT, + $ T(K-L+1, 1), LDT) + +* +* T_{2,1} = T_{2,1}*T_{1,1} +* + CALL ZTRMM('Right', 'Lower', 'No tranpose', 'Non-unit', L, + $ K-L, ONE, T, LDT, T(K-L+1, 1), LDT) + END IF + END SUBROUTINE