Skip to content

Commit

Permalink
Remove manipulation of lambda due to missing guard digit
Browse files Browse the repository at this point in the history
* Rename dlamda -> dlambda
* Remove those calls to lamc3 that are a workaround for
  old Cray machines
* Document the purpose of the remaining lamc3 calls
* Update documentation where dlambda has become an input
  rather than input & output

Thanks to @langou and Prof Demmel for investigating
the purpose of the lamc3 calls and suggesting
a solution.
  • Loading branch information
angsch committed Jun 21, 2023
1 parent ba727de commit e186ae4
Show file tree
Hide file tree
Showing 18 changed files with 202 additions and 353 deletions.
30 changes: 15 additions & 15 deletions SRC/claed8.f
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
* Definition:
* ===========
*
* SUBROUTINE CLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, Z, DLAMDA,
* SUBROUTINE CLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, Z, DLAMBDA,
* Q2, LDQ2, W, INDXP, INDX, INDXQ, PERM, GIVPTR,
* GIVCOL, GIVNUM, INFO )
*
Expand All @@ -29,7 +29,7 @@
* .. Array Arguments ..
* INTEGER GIVCOL( 2, * ), INDX( * ), INDXP( * ),
* $ INDXQ( * ), PERM( * )
* REAL D( * ), DLAMDA( * ), GIVNUM( 2, * ), W( * ),
* REAL D( * ), DLAMBDA( * ), GIVNUM( 2, * ), W( * ),
* $ Z( * )
* COMPLEX Q( LDQ, * ), Q2( LDQ2, * )
* ..
Expand Down Expand Up @@ -122,9 +122,9 @@
*> destroyed during the updating process.
*> \endverbatim
*>
*> \param[out] DLAMDA
*> \param[out] DLAMBDA
*> \verbatim
*> DLAMDA is REAL array, dimension (N)
*> DLAMBDA is REAL array, dimension (N)
*> Contains a copy of the first K eigenvalues which will be used
*> by SLAED3 to form the secular equation.
*> \endverbatim
Expand Down Expand Up @@ -222,7 +222,7 @@
*> \ingroup complexOTHERcomputational
*
* =====================================================================
SUBROUTINE CLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, Z, DLAMDA,
SUBROUTINE CLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, Z, DLAMBDA,
$ Q2, LDQ2, W, INDXP, INDX, INDXQ, PERM, GIVPTR,
$ GIVCOL, GIVNUM, INFO )
*
Expand All @@ -237,7 +237,7 @@ SUBROUTINE CLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, Z, DLAMDA,
* .. Array Arguments ..
INTEGER GIVCOL( 2, * ), INDX( * ), INDXP( * ),
$ INDXQ( * ), PERM( * )
REAL D( * ), DLAMDA( * ), GIVNUM( 2, * ), W( * ),
REAL D( * ), DLAMBDA( * ), GIVNUM( 2, * ), W( * ),
$ Z( * )
COMPLEX Q( LDQ, * ), Q2( LDQ2, * )
* ..
Expand Down Expand Up @@ -322,14 +322,14 @@ SUBROUTINE CLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, Z, DLAMDA,
INDXQ( I ) = INDXQ( I ) + CUTPNT
20 CONTINUE
DO 30 I = 1, N
DLAMDA( I ) = D( INDXQ( I ) )
DLAMBDA( I ) = D( INDXQ( I ) )
W( I ) = Z( INDXQ( I ) )
30 CONTINUE
I = 1
J = CUTPNT + 1
CALL SLAMRG( N1, N2, DLAMDA, 1, 1, INDX )
CALL SLAMRG( N1, N2, DLAMBDA, 1, 1, INDX )
DO 40 I = 1, N
D( I ) = DLAMDA( INDX( I ) )
D( I ) = DLAMBDA( INDX( I ) )
Z( I ) = W( INDX( I ) )
40 CONTINUE
*
Expand Down Expand Up @@ -438,7 +438,7 @@ SUBROUTINE CLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, Z, DLAMDA,
ELSE
K = K + 1
W( K ) = Z( JLAM )
DLAMDA( K ) = D( JLAM )
DLAMBDA( K ) = D( JLAM )
INDXP( K ) = JLAM
JLAM = J
END IF
Expand All @@ -450,19 +450,19 @@ SUBROUTINE CLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, Z, DLAMDA,
*
K = K + 1
W( K ) = Z( JLAM )
DLAMDA( K ) = D( JLAM )
DLAMBDA( K ) = D( JLAM )
INDXP( K ) = JLAM
*
100 CONTINUE
*
* Sort the eigenvalues and corresponding eigenvectors into DLAMDA
* Sort the eigenvalues and corresponding eigenvectors into DLAMBDA
* and Q2 respectively. The eigenvalues/vectors which were not
* deflated go into the first K slots of DLAMDA and Q2 respectively,
* deflated go into the first K slots of DLAMBDA and Q2 respectively,
* while those which were deflated go into the last N - K slots.
*
DO 110 J = 1, N
JP = INDXP( J )
DLAMDA( J ) = D( JP )
DLAMBDA( J ) = D( JP )
PERM( J ) = INDXQ( INDX( JP ) )
CALL CCOPY( QSIZ, Q( 1, PERM( J ) ), 1, Q2( 1, J ), 1 )
110 CONTINUE
Expand All @@ -471,7 +471,7 @@ SUBROUTINE CLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, Z, DLAMDA,
* into the last N - K slots of D and Q respectively.
*
IF( K.LT.N ) THEN
CALL SCOPY( N-K, DLAMDA( K+1 ), 1, D( K+1 ), 1 )
CALL SCOPY( N-K, DLAMBDA( K+1 ), 1, D( K+1 ), 1 )
CALL CLACPY( 'A', QSIZ, N-K, Q2( 1, K+1 ), LDQ2, Q( 1, K+1 ),
$ LDQ )
END IF
Expand Down
10 changes: 10 additions & 0 deletions SRC/clals0.f
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,11 @@ SUBROUTINE CLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B, LDB, BX, LDBX,
$ ( POLES( I, 2 ).EQ.ZERO ) ) THEN
RWORK( I ) = ZERO
ELSE
*
* Use calls to the subroutine SLAMC3 to enforce the
* parentheses (x+y)+z. The goal is to prevent
* optimizing compilers from doing x+(y+z).
*
RWORK( I ) = POLES( I, 2 )*Z( I ) /
$ ( SLAMC3( POLES( I, 2 ), DSIGJ )-
$ DIFLJ ) / ( POLES( I, 2 )+DJ )
Expand Down Expand Up @@ -470,6 +475,11 @@ SUBROUTINE CLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B, LDB, BX, LDBX,
IF( Z( J ).EQ.ZERO ) THEN
RWORK( I ) = ZERO
ELSE
*
* Use calls to the subroutine SLAMC3 to enforce the
* parentheses (x+y)+z. The goal is to prevent optimizing
* compilers from doing x+(y+z).
*
RWORK( I ) = Z( J ) / ( SLAMC3( DSIGJ, -POLES( I+1,
$ 2 ) )-DIFR( I, 1 ) ) /
$ ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 )
Expand Down
30 changes: 15 additions & 15 deletions SRC/dlaed2.f
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
* Definition:
* ===========
*
* SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
* SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMBDA, W,
* Q2, INDX, INDXC, INDXP, COLTYP, INFO )
*
* .. Scalar Arguments ..
Expand All @@ -28,7 +28,7 @@
* .. Array Arguments ..
* INTEGER COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ),
* $ INDXQ( * )
* DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
* DOUBLE PRECISION D( * ), DLAMBDA( * ), Q( LDQ, * ), Q2( * ),
* $ W( * ), Z( * )
* ..
*
Expand Down Expand Up @@ -123,9 +123,9 @@
*> process.
*> \endverbatim
*>
*> \param[out] DLAMDA
*> \param[out] DLAMBDA
*> \verbatim
*> DLAMDA is DOUBLE PRECISION array, dimension (N)
*> DLAMBDA is DOUBLE PRECISION array, dimension (N)
*> A copy of the first K eigenvalues which will be used by
*> DLAED3 to form the secular equation.
*> \endverbatim
Expand All @@ -148,7 +148,7 @@
*> \param[out] INDX
*> \verbatim
*> INDX is INTEGER array, dimension (N)
*> The permutation used to sort the contents of DLAMDA into
*> The permutation used to sort the contents of DLAMBDA into
*> ascending order.
*> \endverbatim
*>
Expand Down Expand Up @@ -207,7 +207,7 @@
*> Modified by Francoise Tisseur, University of Tennessee
*>
* =====================================================================
SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMBDA, W,
$ Q2, INDX, INDXC, INDXP, COLTYP, INFO )
*
* -- LAPACK computational routine --
Expand All @@ -221,7 +221,7 @@ SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
* .. Array Arguments ..
INTEGER COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ),
$ INDXQ( * )
DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
DOUBLE PRECISION D( * ), DLAMBDA( * ), Q( LDQ, * ), Q2( * ),
$ W( * ), Z( * )
* ..
*
Expand Down Expand Up @@ -300,9 +300,9 @@ SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
* re-integrate the deflated parts from the last pass
*
DO 20 I = 1, N
DLAMDA( I ) = D( INDXQ( I ) )
DLAMBDA( I ) = D( INDXQ( I ) )
20 CONTINUE
CALL DLAMRG( N1, N2, DLAMDA, 1, 1, INDXC )
CALL DLAMRG( N1, N2, DLAMBDA, 1, 1, INDXC )
DO 30 I = 1, N
INDX( I ) = INDXQ( INDXC( I ) )
30 CONTINUE
Expand All @@ -324,11 +324,11 @@ SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
DO 40 J = 1, N
I = INDX( J )
CALL DCOPY( N, Q( 1, I ), 1, Q2( IQ2 ), 1 )
DLAMDA( J ) = D( I )
DLAMBDA( J ) = D( I )
IQ2 = IQ2 + N
40 CONTINUE
CALL DLACPY( 'A', N, N, Q2, N, Q, LDQ )
CALL DCOPY( N, DLAMDA, 1, D, 1 )
CALL DCOPY( N, DLAMBDA, 1, D, 1 )
GO TO 190
END IF
*
Expand Down Expand Up @@ -421,7 +421,7 @@ SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
PJ = NJ
ELSE
K = K + 1
DLAMDA( K ) = D( PJ )
DLAMBDA( K ) = D( PJ )
W( K ) = Z( PJ )
INDXP( K ) = PJ
PJ = NJ
Expand All @@ -433,7 +433,7 @@ SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
* Record the last eigenvalue.
*
K = K + 1
DLAMDA( K ) = D( PJ )
DLAMBDA( K ) = D( PJ )
W( K ) = Z( PJ )
INDXP( K ) = PJ
*
Expand Down Expand Up @@ -470,9 +470,9 @@ SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
PSM( CT ) = PSM( CT ) + 1
130 CONTINUE
*
* Sort the eigenvalues and corresponding eigenvectors into DLAMDA
* Sort the eigenvalues and corresponding eigenvectors into DLAMBDA
* and Q2 respectively. The eigenvalues/vectors which were not
* deflated go into the first K slots of DLAMDA and Q2 respectively,
* deflated go into the first K slots of DLAMBDA and Q2 respectively,
* while those which were deflated go into the last N - K slots.
*
I = 1
Expand Down
52 changes: 12 additions & 40 deletions SRC/dlaed3.f
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
* Definition:
* ===========
*
* SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX,
* SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMBDA, Q2, INDX,
* CTOT, W, S, INFO )
*
* .. Scalar Arguments ..
Expand All @@ -27,7 +27,7 @@
* ..
* .. Array Arguments ..
* INTEGER CTOT( * ), INDX( * )
* DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
* DOUBLE PRECISION D( * ), DLAMBDA( * ), Q( LDQ, * ), Q2( * ),
* $ S( * ), W( * )
* ..
*
Expand All @@ -44,12 +44,6 @@
*> being combined by the matrix of eigenvectors of the K-by-K system
*> which is solved here.
*>
*> This code makes very mild assumptions about floating point
*> arithmetic. It will work on machines with a guard digit in
*> add/subtract, or on those binary machines without guard digits
*> which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
*> It could conceivably fail on hexadecimal or decimal machines
*> without guard digits, but we know of none.
*> \endverbatim
*
* Arguments:
Expand Down Expand Up @@ -104,14 +98,12 @@
*> RHO >= 0 required.
*> \endverbatim
*>
*> \param[in,out] DLAMDA
*> \param[in] DLAMBDA
*> \verbatim
*> DLAMDA is DOUBLE PRECISION array, dimension (K)
*> DLAMBDA is DOUBLE PRECISION array, dimension (K)
*> The first K elements of this array contain the old roots
*> of the deflated updating problem. These are the poles
*> of the secular equation. May be changed on output by
*> having lowest order bit set to zero on Cray X-MP, Cray Y-MP,
*> Cray-2, or Cray C-90, as described above.
*> of the secular equation.
*> \endverbatim
*>
*> \param[in] Q2
Expand Down Expand Up @@ -180,7 +172,7 @@
*> Modified by Francoise Tisseur, University of Tennessee
*>
* =====================================================================
SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX,
SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMBDA, Q2, INDX,
$ CTOT, W, S, INFO )
*
* -- LAPACK computational routine --
Expand All @@ -193,7 +185,7 @@ SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX,
* ..
* .. Array Arguments ..
INTEGER CTOT( * ), INDX( * )
DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
DOUBLE PRECISION D( * ), DLAMBDA( * ), Q( LDQ, * ), Q2( * ),
$ S( * ), W( * )
* ..
*
Expand All @@ -208,8 +200,8 @@ SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX,
DOUBLE PRECISION TEMP
* ..
* .. External Functions ..
DOUBLE PRECISION DLAMC3, DNRM2
EXTERNAL DLAMC3, DNRM2
DOUBLE PRECISION DNRM2
EXTERNAL DNRM2
* ..
* .. External Subroutines ..
EXTERNAL DCOPY, DGEMM, DLACPY, DLAED4, DLASET, XERBLA
Expand Down Expand Up @@ -240,29 +232,9 @@ SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX,
IF( K.EQ.0 )
$ RETURN
*
* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can
* be computed with high relative accuracy (barring over/underflow).
* This is a problem on machines without a guard digit in
* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
* The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I),
* which on any of these machines zeros out the bottommost
* bit of DLAMDA(I) if it is 1; this makes the subsequent
* subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation
* occurs. On binary machines with a guard digit (almost all
* machines) it does not change DLAMDA(I) at all. On hexadecimal
* and decimal machines with a guard digit, it slightly
* changes the bottommost bits of DLAMDA(I). It does not account
* for hexadecimal or decimal machines without guard digits
* (we know of none). We use a subroutine call to compute
* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating
* this code.
*
DO 10 I = 1, K
DLAMDA( I ) = DLAMC3( DLAMDA( I ), DLAMDA( I ) ) - DLAMDA( I )
10 CONTINUE
*
DO 20 J = 1, K
CALL DLAED4( K, J, DLAMDA, W, Q( 1, J ), RHO, D( J ), INFO )
CALL DLAED4( K, J, DLAMBDA, W, Q( 1, J ), RHO, D( J ), INFO )
*
* If the zero finder fails, the computation is terminated.
*
Expand Down Expand Up @@ -293,10 +265,10 @@ SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX,
CALL DCOPY( K, Q, LDQ+1, W, 1 )
DO 60 J = 1, K
DO 40 I = 1, J - 1
W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) )
W( I ) = W( I )*( Q( I, J )/( DLAMBDA( I )-DLAMBDA( J ) ) )
40 CONTINUE
DO 50 I = J + 1, K
W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) )
W( I ) = W( I )*( Q( I, J )/( DLAMBDA( I )-DLAMBDA( J ) ) )
50 CONTINUE
60 CONTINUE
DO 70 I = 1, K
Expand Down
Loading

0 comments on commit e186ae4

Please sign in to comment.