Skip to content

Commit

Permalink
Merge branch 'Reference-LAPACK:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
dbielich authored Mar 21, 2023
2 parents 3b08c51 + cfaa5ae commit bd1204d
Show file tree
Hide file tree
Showing 144 changed files with 7,525 additions and 2,791 deletions.
10 changes: 3 additions & 7 deletions .appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,14 @@ skip_commits:
# Add [av skip] to commit messages
message: /\[av skip\]/

cache:
- '%APPVEYOR_BUILD_FOLDER%\build'

environment:
global:
CONDA_INSTALL_LOCN: C:\\Miniconda36-x64
CONDA_INSTALL_LOCN: C:\\Miniconda37-x64

install:
- call %CONDA_INSTALL_LOCN%\Scripts\activate.bat
- conda config --set auto_update_conda false
- conda config --add channels conda-forge --force
- conda install --yes --quiet flang jom
# - conda config --set auto_update_conda false
- conda install -c conda-forge --yes --quiet flang jom
- call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64
- set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%"
- set "CPATH=%CONDA_INSTALL_LOCN%\Library\include;%CPATH%"
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ on:
- '!**Makefile'
- '!**md'

permissions:
contents: read

env:
CFLAGS: "-Wall -pedantic"
# Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)
Expand Down Expand Up @@ -75,6 +78,7 @@ jobs:
cmake -B build
-D CMAKE_C_COMPILER="gcc-11"
-D CMAKE_Fortran_COMPILER="gfortran-11"
-D USE_FLAT_NAMESPACE:BOOL=ON
# - name: Use Unix Makefiles on Windows
# if: ${{ matrix.os == 'windows-latest' }}
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/makefile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ on:
- '!**CMakeLists.txt'
- '!**md'

permissions:
contents: read

env:
CC: "gcc"
FC: "gfortran"
Expand Down
150 changes: 99 additions & 51 deletions BLAS/SRC/crotg.f90
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
!> \brief \b CROTG
!> \brief \b CROTG generates a Givens rotation with real cosine and complex sine.
!
! =========== DOCUMENTATION ===========
!
Expand All @@ -24,8 +24,8 @@
!> = 1 if x = 0
!> c = |a| / sqrt(|a|**2 + |b|**2)
!> s = sgn(a) * conjg(b) / sqrt(|a|**2 + |b|**2)
!> When a and b are real and r /= 0, the formulas simplify to
!> r = sgn(a)*sqrt(|a|**2 + |b|**2)
!> When a and b are real and r /= 0, the formulas simplify to
!> c = a / r
!> s = b / r
!> the same as in SROTG when |a| > |b|. When |b| >= |a|, the
Expand Down Expand Up @@ -65,12 +65,9 @@
! Authors:
! ========
!
!> \author Edward Anderson, Lockheed Martin
!> \author Weslley Pereira, University of Colorado Denver, USA
!
!> \par Contributors:
! ==================
!>
!> Weslley Pereira, University of Colorado Denver, USA
!> \date December 2021
!
!> \ingroup single_blas_level1
!
Expand All @@ -79,6 +76,8 @@
!>
!> \verbatim
!>
!> Based on the algorithm from
!>
!> Anderson E. (2017)
!> Algorithm 978: Safe Scaling in the Level 1 BLAS
!> ACM Trans Math Softw 44:1--28
Expand Down Expand Up @@ -108,21 +107,14 @@ subroutine CROTG( a, b, c, s )
1-minexponent(0._wp), &
maxexponent(0._wp)-1 &
)
real(wp), parameter :: rtmin = sqrt( real(radix(0._wp),wp)**max( &
minexponent(0._wp)-1, &
1-maxexponent(0._wp) &
) / epsilon(0._wp) )
real(wp), parameter :: rtmax = sqrt( real(radix(0._wp),wp)**max( &
1-minexponent(0._wp), &
maxexponent(0._wp)-1 &
) * epsilon(0._wp) )
real(wp), parameter :: rtmin = sqrt( safmin )
! ..
! .. Scalar Arguments ..
real(wp) :: c
complex(wp) :: a, b, s
! ..
! .. Local Scalars ..
real(wp) :: d, f1, f2, g1, g2, h2, p, u, uu, v, vv, w
real(wp) :: d, f1, f2, g1, g2, h2, u, v, w, rtmax
complex(wp) :: f, fs, g, gs, r, t
! ..
! .. Intrinsic Functions ..
Expand All @@ -144,30 +136,43 @@ subroutine CROTG( a, b, c, s )
r = f
else if( f == czero ) then
c = zero
g1 = max( abs(real(g)), abs(aimag(g)) )
if( g1 > rtmin .and. g1 < rtmax ) then
if( real(g) == zero ) then
r = abs(aimag(g))
s = conjg( g ) / r
elseif( aimag(g) == zero ) then
r = abs(real(g))
s = conjg( g ) / r
else
g1 = max( abs(real(g)), abs(aimag(g)) )
rtmax = sqrt( safmax/2 )
if( g1 > rtmin .and. g1 < rtmax ) then
!
! Use unscaled algorithm
!
g2 = ABSSQ( g )
d = sqrt( g2 )
s = conjg( g ) / d
r = d
else
! The following two lines can be replaced by `d = abs( g )`.
! This algorithm do not use the intrinsic complex abs.
g2 = ABSSQ( g )
d = sqrt( g2 )
s = conjg( g ) / d
r = d
else
!
! Use scaled algorithm
!
u = min( safmax, max( safmin, g1 ) )
uu = one / u
gs = g*uu
g2 = ABSSQ( gs )
d = sqrt( g2 )
s = conjg( gs ) / d
r = d*u
u = min( safmax, max( safmin, g1 ) )
gs = g / u
! The following two lines can be replaced by `d = abs( gs )`.
! This algorithm do not use the intrinsic complex abs.
g2 = ABSSQ( gs )
d = sqrt( g2 )
s = conjg( gs ) / d
r = d*u
end if
end if
else
f1 = max( abs(real(f)), abs(aimag(f)) )
g1 = max( abs(real(g)), abs(aimag(g)) )
rtmax = sqrt( safmax/4 )
if( f1 > rtmin .and. f1 < rtmax .and. &
g1 > rtmin .and. g1 < rtmax ) then
!
Expand All @@ -176,52 +181,95 @@ subroutine CROTG( a, b, c, s )
f2 = ABSSQ( f )
g2 = ABSSQ( g )
h2 = f2 + g2
if( f2 > rtmin .and. h2 < rtmax ) then
d = sqrt( f2*h2 )
! safmin <= f2 <= h2 <= safmax
if( f2 >= h2 * safmin ) then
! safmin <= f2/h2 <= 1, and h2/f2 is finite
c = sqrt( f2 / h2 )
r = f / c
rtmax = rtmax * 2
if( f2 > rtmin .and. h2 < rtmax ) then
! safmin <= sqrt( f2*h2 ) <= safmax
s = conjg( g ) * ( f / sqrt( f2*h2 ) )
else
s = conjg( g ) * ( r / h2 )
end if
else
d = sqrt( f2 )*sqrt( h2 )
! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow.
! Moreover,
! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax,
! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax).
! Also,
! g2 >> f2, which means that h2 = g2.
d = sqrt( f2 * h2 )
c = f2 / d
if( c >= safmin ) then
r = f / c
else
! f2 / sqrt(f2 * h2) < safmin, then
! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax
r = f * ( h2 / d )
end if
s = conjg( g ) * ( f / d )
end if
p = 1 / d
c = f2*p
s = conjg( g )*( f*p )
r = f*( h2*p )
else
!
! Use scaled algorithm
!
u = min( safmax, max( safmin, f1, g1 ) )
uu = one / u
gs = g*uu
gs = g / u
g2 = ABSSQ( gs )
if( f1*uu < rtmin ) then
if( f1 / u < rtmin ) then
!
! f is not well-scaled when scaled by g1.
! Use a different scaling for f.
!
v = min( safmax, max( safmin, f1 ) )
vv = one / v
w = v * uu
fs = f*vv
w = v / u
fs = f / v
f2 = ABSSQ( fs )
h2 = f2*w**2 + g2
else
!
! Otherwise use the same scaling for f and g.
!
w = one
fs = f*uu
fs = f / u
f2 = ABSSQ( fs )
h2 = f2 + g2
end if
if( f2 > rtmin .and. h2 < rtmax ) then
d = sqrt( f2*h2 )
! safmin <= f2 <= h2 <= safmax
if( f2 >= h2 * safmin ) then
! safmin <= f2/h2 <= 1, and h2/f2 is finite
c = sqrt( f2 / h2 )
r = fs / c
rtmax = rtmax * 2
if( f2 > rtmin .and. h2 < rtmax ) then
! safmin <= sqrt( f2*h2 ) <= safmax
s = conjg( gs ) * ( fs / sqrt( f2*h2 ) )
else
s = conjg( gs ) * ( r / h2 )
end if
else
d = sqrt( f2 )*sqrt( h2 )
! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow.
! Moreover,
! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax,
! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax).
! Also,
! g2 >> f2, which means that h2 = g2.
d = sqrt( f2 * h2 )
c = f2 / d
if( c >= safmin ) then
r = fs / c
else
! f2 / sqrt(f2 * h2) < safmin, then
! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax
r = fs * ( h2 / d )
end if
s = conjg( gs ) * ( fs / d )
end if
p = 1 / d
c = ( f2*p )*w
s = conjg( gs )*( fs*p )
r = ( fs*( h2*p ) )*u
! Rescale c and r
c = c * w
r = r * u
end if
end if
a = r
Expand Down
Loading

0 comments on commit bd1204d

Please sign in to comment.