Skip to content

Commit

Permalink
Add level-3 BLAS triangular Sylvester equation solver
Browse files Browse the repository at this point in the history
Force compatibility with [ds]trsyl. Use two floating-point
scaling factors (rather than integer scaling factors).
This does not eliminate the problem that scalings can be flushed,
making any result useless. That problem could be eliminated
by replacing the floating-point scale factor with an integer
scale factor.
  • Loading branch information
angsch committed Sep 14, 2022
1 parent 801ac2f commit cda8a83
Show file tree
Hide file tree
Showing 7 changed files with 2,699 additions and 10 deletions.
10 changes: 5 additions & 5 deletions SRC/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@ set(SLASRC
slaqgb.f slaqge.f slaqp2.f slaqps.f slaqsb.f slaqsp.f slaqsy.f
slaqr0.f slaqr1.f slaqr2.f slaqr3.f slaqr4.f slaqr5.f
slaqtr.f slar1v.f slar2v.f ilaslr.f ilaslc.f
slarf.f slarfb.f slarfb_gett.f slarfg.f slarfgp.f slarft.f slarfx.f slarfy.f slargv.f
slarrv.f slartv.f
slarf.f slarfb.f slarfb_gett.f slarfg.f slarfgp.f slarft.f slarfx.f slarfy.f
slargv.f slarmm.f slarrv.f slartv.f
slarz.f slarzb.f slarzt.f slasy2.f
slasyf.f slasyf_rook.f slasyf_rk.f slasyf_aa.f
slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f
Expand Down Expand Up @@ -141,7 +141,7 @@ set(SLASRC
stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f
stptrs.f
strcon.f strevc.f strevc3.f strexc.f strrfs.f strsen.f strsna.f strsyl.f
strti2.f strtri.f strtrs.f stzrzf.f sstemr.f
strsyl3.f strti2.f strtri.f strtrs.f stzrzf.f sstemr.f
slansf.f spftrf.f spftri.f spftrs.f ssfrk.f stfsm.f stftri.f stfttp.f
stfttr.f stpttf.f stpttr.f strttf.f strttp.f
sgejsv.f sgesvj.f sgsvj0.f sgsvj1.f
Expand Down Expand Up @@ -306,7 +306,7 @@ set(DLASRC
dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f
dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f
dlarf.f dlarfb.f dlarfb_gett.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f dlarfy.f
dlargv.f dlarrv.f dlartv.f
dlargv.f dlarmm.f dlarrv.f dlartv.f
dlarz.f dlarzb.f dlarzt.f dlaswp.f dlasy2.f
dlasyf.f dlasyf_rook.f dlasyf_rk.f dlasyf_aa.f
dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f dlauu2.f
Expand Down Expand Up @@ -342,7 +342,7 @@ set(DLASRC
dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f
dtptrs.f
dtrcon.f dtrevc.f dtrevc3.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f
dtrti2.f dtrtri.f dtrtrs.f dtzrzf.f dstemr.f
dtrsyl3.f dtrti2.f dtrtri.f dtrtrs.f dtzrzf.f dstemr.f
dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f
dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f
dtfttr.f dtpttf.f dtpttr.f dtrttf.f dtrttp.f
Expand Down
10 changes: 5 additions & 5 deletions SRC/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,8 @@ SLASRC = \
slaqgb.o slaqge.o slaqp2.o slaqps.o slaqsb.o slaqsp.o slaqsy.o \
slaqr0.o slaqr1.o slaqr2.o slaqr3.o slaqr4.o slaqr5.o \
slaqtr.o slar1v.o slar2v.o ilaslr.o ilaslc.o \
slarf.o slarfb.o slarfb_gett.o slarfg.o slarfgp.o slarft.o slarfx.o slarfy.o slargv.o \
slarrv.o slartv.o \
slarf.o slarfb.o slarfb_gett.o slarfg.o slarfgp.o slarft.o slarfx.o slarfy.o \
slargv.o slarmm.o slarrv.o slartv.o \
slarz.o slarzb.o slarzt.o slaswp.o slasy2.o slasyf.o slasyf_rook.o \
slasyf_rk.o \
slatbs.o slatdf.o slatps.o slatrd.o slatrs.o slatrz.o \
Expand Down Expand Up @@ -174,7 +174,7 @@ SLASRC = \
stgsja.o stgsna.o stgsy2.o stgsyl.o stpcon.o stprfs.o stptri.o \
stptrs.o \
strcon.o strevc.o strevc3.o strexc.o strrfs.o strsen.o strsna.o strsyl.o \
strti2.o strtri.o strtrs.o stzrzf.o sstemr.o \
strsyl3.o strti2.o strtri.o strtrs.o stzrzf.o sstemr.o \
slansf.o spftrf.o spftri.o spftrs.o ssfrk.o stfsm.o stftri.o stfttp.o \
stfttr.o stpttf.o stpttr.o strttf.o strttp.o \
sgejsv.o sgesvj.o sgsvj0.o sgsvj1.o \
Expand Down Expand Up @@ -340,7 +340,7 @@ DLASRC = \
dlaqr0.o dlaqr1.o dlaqr2.o dlaqr3.o dlaqr4.o dlaqr5.o \
dlaqtr.o dlar1v.o dlar2v.o iladlr.o iladlc.o \
dlarf.o dlarfb.o dlarfb_gett.o dlarfg.o dlarfgp.o dlarft.o dlarfx.o dlarfy.o \
dlargv.o dlarrv.o dlartv.o \
dlargv.o dlarmm.o dlarrv.o dlartv.o \
dlarz.o dlarzb.o dlarzt.o dlaswp.o dlasy2.o \
dlasyf.o dlasyf_rook.o dlasyf_rk.o \
dlatbs.o dlatdf.o dlatps.o dlatrd.o dlatrs.o dlatrz.o dlauu2.o \
Expand Down Expand Up @@ -376,7 +376,7 @@ DLASRC = \
dtgsja.o dtgsna.o dtgsy2.o dtgsyl.o dtpcon.o dtprfs.o dtptri.o \
dtptrs.o \
dtrcon.o dtrevc.o dtrevc3.o dtrexc.o dtrrfs.o dtrsen.o dtrsna.o dtrsyl.o \
dtrti2.o dtrtri.o dtrtrs.o dtzrzf.o dstemr.o \
dtrsyl3.o dtrti2.o dtrtri.o dtrtrs.o dtzrzf.o dstemr.o \
dsgesv.o dsposv.o dlag2s.o slag2d.o dlat2s.o \
dlansf.o dpftrf.o dpftri.o dpftrs.o dsfrk.o dtfsm.o dtftri.o dtfttp.o \
dtfttr.o dtpttf.o dtpttr.o dtrttf.o dtrttp.o \
Expand Down
99 changes: 99 additions & 0 deletions SRC/dlarmm.f
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
*> \brief \b DLARMM
*
* Definition:
* ===========
*
* DOUBLE PRECISION FUNCTION DLARMM( ANORM, BNORM, CNORM )
*
* .. Scalar Arguments ..
* DOUBLE PRECISION ANORM, BNORM, CNORM
* ..
*
*> \par Purpose:
* =======
*>
*> \verbatim
*>
*> DLARMM returns a factor s in (0, 1] such that the linear updates
*>
*> (s * C) - A * (s * B) and (s * C) - (s * A) * B
*>
*> cannot overflow, where A, B, and C are matrices of conforming
*> dimensions.
*>
*> This is an auxiliary routine so there is no argument checking.
*> \endverbatim
*
* Arguments:
* =========
*
*> \param[in] ANORM
*> \verbatim
*> ANORM is DOUBLE PRECISION
*> The infinity norm of A. ANORM >= 0.
*> The number of rows of the matrix A. M >= 0.
*> \endverbatim
*>
*> \param[in] BNORM
*> \verbatim
*> BNORM is DOUBLE PRECISION
*> The infinity norm of B. BNORM >= 0.
*> \endverbatim
*>
*> \param[in] CNORM
*> \verbatim
*> CNORM is DOUBLE PRECISION
*> The infinity norm of C. CNORM >= 0.
*> \endverbatim
*>
*>
* =====================================================================
*> References:
*> C. C. Kjelgaard Mikkelsen and L. Karlsson, Blocked Algorithms for
*> Robust Solution of Triangular Linear Systems. In: International
*> Conference on Parallel Processing and Applied Mathematics, pages
*> 68--78. Springer, 2017.
*>
*> \ingroup OTHERauxiliary
* =====================================================================

DOUBLE PRECISION FUNCTION DLARMM( ANORM, BNORM, CNORM )
IMPLICIT NONE
* .. Scalar Arguments ..
DOUBLE PRECISION ANORM, BNORM, CNORM
* .. Parameters ..
DOUBLE PRECISION ONE, HALF, FOUR
PARAMETER ( ONE = 1.0D0, HALF = 0.5D+0, FOUR = 4.0D0 )
* ..
* .. Local Scalars ..
DOUBLE PRECISION BIGNUM, SMLNUM
* ..
* .. External Functions ..
DOUBLE PRECISION DLAMCH
EXTERNAL DLAMCH
* ..
* .. Executable Statements ..
*
*
* Determine machine dependent parameters to control overflow.
*
SMLNUM = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' )
BIGNUM = ( ONE / SMLNUM ) / FOUR
*
* Compute a scale factor.
*
DLARMM = ONE
IF( BNORM .LE. ONE ) THEN
IF( ANORM * BNORM .GT. BIGNUM - CNORM ) THEN
DLARMM = HALF
END IF
ELSE
IF( ANORM .GT. (BIGNUM - CNORM) / BNORM ) THEN
DLARMM = HALF / BNORM
END IF
END IF
RETURN
*
* ==== End of DLARMM ====
*
END
Loading

0 comments on commit cda8a83

Please sign in to comment.