math/rand/v2: add, optimize N, UintN, Uint32N, Uint64N

Now that we can break the value stream, we can take advantage of better algorithms that have been suggested since the original code was written. Also optimizes IntN, Int32N, Int64N, Perm (indirectly). All the N variants (IntN, Int32N, Int64N, UintN, N, etc) now return the same values given a Source and parameter n, so that for example uint(r.IntN(10)) and r.UintN(10) and r.N(uint(10)) are completely interchangeable. Int64N4e18 gets slower but that is a near worst case for the algorithm and is extremely unlikely in practice. 32-bit Int32N variants got slower too, by 15-30%, in exchange for speeding up everything on 64-bit systems and consistency across the N functions. Also rename previously missed benchmark GlobalInt63Parallel to GlobalInt64Parallel. goos: linux goarch: amd64 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.amd64 │ 4d84a369d1.amd64 │ │ sec/op │ sec/op vs base │ SourceUint64-32 1.335n ± 1% 1.348n ± 2% ~ (p=0.335 n=20) GlobalInt64-32 2.046n ± 1% 2.082n ± 2% ~ (p=0.310 n=20) GlobalInt63Parallel-32 0.1037n ± 1% GlobalInt64Parallel-32 0.1036n ± 1% GlobalUint64-32 2.075n ± 0% 2.077n ± 2% ~ (p=0.228 n=20) GlobalUint64Parallel-32 0.1013n ± 1% 0.1012n ± 1% ~ (p=0.878 n=20) Int64-32 1.726n ± 2% 1.750n ± 0% +1.39% (p=0.000 n=20) Uint64-32 1.673n ± 1% 1.707n ± 2% +2.03% (p=0.002 n=20) GlobalIntN1000-32 3.895n ± 2% 3.192n ± 1% -18.05% (p=0.000 n=20) IntN1000-32 3.403n ± 1% 2.462n ± 2% -27.65% (p=0.000 n=20) Int64N1000-32 3.053n ± 2% 2.470n ± 1% -19.11% (p=0.000 n=20) Int64N1e8-32 2.718n ± 1% 2.503n ± 2% -7.91% (p=0.000 n=20) Int64N1e9-32 2.712n ± 1% 2.487n ± 1% -8.31% (p=0.000 n=20) Int64N2e9-32 2.690n ± 1% 2.487n ± 1% -7.57% (p=0.000 n=20) Int64N1e18-32 3.084n ± 2% 3.006n ± 2% -2.53% (p=0.000 n=20) Int64N2e18-32 4.026n ± 1% 3.368n ± 1% -16.33% (p=0.000 n=20) Int64N4e18-32 4.049n ± 2% 4.763n ± 1% +17.62% (p=0.000 n=20) Int32N1000-32 2.730n ± 0% 2.403n ± 1% -11.94% (p=0.000 n=20) Int32N1e8-32 2.916n ± 2% 2.405n ± 1% -17.53% (p=0.000 n=20) Int32N1e9-32 3.375n ± 1% 2.402n ± 2% -28.83% (p=0.000 n=20) Int32N2e9-32 3.292n ± 1% 2.384n ± 1% -27.58% (p=0.000 n=20) Float32-32 2.673n ± 1% 2.641n ± 2% ~ (p=0.147 n=20) Float64-32 2.485n ± 1% 2.483n ± 1% ~ (p=0.804 n=20) ExpFloat64-32 3.577n ± 2% 3.486n ± 2% -2.57% (p=0.000 n=20) NormFloat64-32 3.797n ± 2% 3.648n ± 1% -3.92% (p=0.000 n=20) Perm3-32 35.79n ± 2% 33.04n ± 1% -7.68% (p=0.000 n=20) Perm30-32 205.1n ± 1% 171.9n ± 1% -16.14% (p=0.000 n=20) Perm30ViaShuffle-32 111.2n ± 2% 100.3n ± 1% -9.76% (p=0.000 n=20) ShuffleOverhead-32 100.5n ± 2% 102.5n ± 1% +1.99% (p=0.007 n=20) Concurrent-32 2.188n ± 5% 2.101n ± 0% ~ (p=0.013 n=20) goos: darwin goarch: arm64 pkg: math/rand/v2 cpu: Apple M1 │ 11ad9fdddc.arm64 │ 4d84a369d1.arm64 │ │ sec/op │ sec/op vs base │ SourceUint64-8 2.272n ± 1% 2.261n ± 1% ~ (p=0.172 n=20) GlobalInt64-8 2.155n ± 1% 2.160n ± 1% ~ (p=0.482 n=20) GlobalInt63Parallel-8 0.4352n ± 0% GlobalInt64Parallel-8 0.4299n ± 0% GlobalUint64-8 2.173n ± 1% 2.169n ± 1% ~ (p=0.262 n=20) GlobalUint64Parallel-8 0.4340n ± 0% 0.4293n ± 1% -1.08% (p=0.000 n=20) Int64-8 2.544n ± 1% 2.473n ± 1% -2.83% (p=0.000 n=20) Uint64-8 2.552n ± 1% 2.453n ± 1% -3.90% (p=0.000 n=20) GlobalIntN1000-8 3.856n ± 0% 2.814n ± 2% -27.02% (p=0.000 n=20) IntN1000-8 3.820n ± 0% 2.933n ± 2% -23.22% (p=0.000 n=20) Int64N1000-8 3.219n ± 2% 2.934n ± 2% -8.85% (p=0.000 n=20) Int64N1e8-8 3.221n ± 2% 2.935n ± 2% -8.91% (p=0.000 n=20) Int64N1e9-8 3.276n ± 2% 2.934n ± 2% -10.44% (p=0.000 n=20) Int64N2e9-8 3.217n ± 0% 2.935n ± 2% -8.78% (p=0.000 n=20) Int64N1e18-8 3.502n ± 2% 3.778n ± 1% +7.91% (p=0.000 n=20) Int64N2e18-8 4.968n ± 1% 4.359n ± 1% -12.26% (p=0.000 n=20) Int64N4e18-8 4.963n ± 0% 6.546n ± 1% +31.92% (p=0.000 n=20) Int32N1000-8 3.189n ± 1% 2.940n ± 2% -7.81% (p=0.000 n=20) Int32N1e8-8 3.514n ± 1% 2.937n ± 2% -16.41% (p=0.000 n=20) Int32N1e9-8 4.133n ± 0% 2.938n ± 0% -28.91% (p=0.000 n=20) Int32N2e9-8 4.137n ± 0% 2.938n ± 2% -28.97% (p=0.000 n=20) Float32-8 3.468n ± 1% 3.486n ± 0% +0.52% (p=0.000 n=20) Float64-8 3.478n ± 0% 3.480n ± 0% ~ (p=0.063 n=20) ExpFloat64-8 4.563n ± 0% 4.533n ± 0% -0.67% (p=0.000 n=20) NormFloat64-8 4.768n ± 0% 4.764n ± 0% -0.07% (p=0.001 n=20) Perm3-8 28.94n ± 0% 26.66n ± 0% -7.88% (p=0.000 n=20) Perm30-8 175.9n ± 0% 143.4n ± 0% -18.50% (p=0.000 n=20) Perm30ViaShuffle-8 152.6n ± 1% 142.9n ± 0% -6.29% (p=0.000 n=20) ShuffleOverhead-8 119.6n ± 1% 120.7n ± 0% +0.96% (p=0.000 n=20) Concurrent-8 2.452n ± 3% 2.360n ± 2% -3.73% (p=0.007 n=20) goos: linux goarch: 386 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.386 │ 4d84a369d1.386 │ │ sec/op │ sec/op vs base │ SourceUint64-32 2.091n ± 1% 2.101n ± 2% ~ (p=0.672 n=20) GlobalInt64-32 3.514n ± 2% 3.518n ± 2% ~ (p=0.723 n=20) GlobalInt63Parallel-32 0.3197n ± 0% GlobalInt64Parallel-32 0.3206n ± 0% GlobalUint64-32 3.542n ± 1% 3.538n ± 1% ~ (p=0.304 n=20) GlobalUint64Parallel-32 0.3218n ± 0% 0.3231n ± 0% ~ (p=0.071 n=20) Int64-32 2.552n ± 2% 2.554n ± 2% ~ (p=0.693 n=20) Uint64-32 2.566n ± 1% 2.575n ± 2% ~ (p=0.606 n=20) GlobalIntN1000-32 5.965n ± 2% 6.292n ± 1% +5.46% (p=0.000 n=20) IntN1000-32 4.652n ± 1% 4.735n ± 1% +1.77% (p=0.000 n=20) Int64N1000-32 14.485n ± 1% 5.489n ± 2% -62.11% (p=0.000 n=20) Int64N1e8-32 14.675n ± 1% 5.528n ± 2% -62.33% (p=0.000 n=20) Int64N1e9-32 16.805n ± 2% 5.438n ± 2% -67.64% (p=0.000 n=20) Int64N2e9-32 14.515n ± 1% 5.474n ± 1% -62.28% (p=0.000 n=20) Int64N1e18-32 16.165n ± 1% 9.053n ± 1% -44.00% (p=0.000 n=20) Int64N2e18-32 17.945n ± 2% 9.685n ± 2% -46.03% (p=0.000 n=20) Int64N4e18-32 18.35n ± 2% 12.18n ± 1% -33.62% (p=0.000 n=20) Int32N1000-32 3.608n ± 1% 4.862n ± 1% +34.77% (p=0.000 n=20) Int32N1e8-32 3.767n ± 1% 4.758n ± 2% +26.31% (p=0.000 n=20) Int32N1e9-32 4.130n ± 2% 4.772n ± 1% +15.54% (p=0.000 n=20) Int32N2e9-32 4.206n ± 1% 4.847n ± 0% +15.24% (p=0.000 n=20) Float32-32 22.18n ± 4% 22.18n ± 4% ~ (p=0.195 n=20) Float64-32 20.75n ± 4% 21.21n ± 3% ~ (p=0.394 n=20) ExpFloat64-32 12.58n ± 3% 12.39n ± 2% ~ (p=0.032 n=20) NormFloat64-32 7.920n ± 3% 7.422n ± 1% -6.29% (p=0.000 n=20) Perm3-32 40.27n ± 1% 38.00n ± 2% -5.65% (p=0.000 n=20) Perm30-32 213.2n ± 2% 212.7n ± 1% ~ (p=0.995 n=20) Perm30ViaShuffle-32 164.2n ± 2% 187.5n ± 2% +14.22% (p=0.000 n=20) ShuffleOverhead-32 134.7n ± 2% 159.7n ± 1% +18.52% (p=0.000 n=20) Concurrent-32 3.301n ± 2% 3.470n ± 0% +5.10% (p=0.000 n=20) For #61716. Change-Id: Id1481b04202883cd0b23e21bb58d1bca4e482bd3 Reviewed-on: https://go-review.googlesource.com/c/go/+/502500 Reviewed-by: Rob Pike <[email protected]> Auto-Submit: Russ Cox <[email protected]> Reviewed-by: David Chase <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]>
golang · Oct 30, 2023 · c266587 · c266587
1 parent c7dddb0
commit c266587
Show file tree

Hide file tree

Showing 6 changed files with 406 additions and 215 deletions.
diff --git a/api/next/61716.txt b/api/next/61716.txt
@@ -7,14 +7,18 @@ pkg math/rand/v2, func Int32N(int32) int32 #61716
 pkg math/rand/v2, func Int64() int64 #61716
 pkg math/rand/v2, func Int64N(int64) int64 #61716
 pkg math/rand/v2, func IntN(int) int #61716
+pkg math/rand/v2, func N[$0 intType]($0) $0 #61716
 pkg math/rand/v2, func New(Source) *Rand #61716
 pkg math/rand/v2, func NewSource(int64) Source #61716
 pkg math/rand/v2, func NewZipf(*Rand, float64, float64, uint64) *Zipf #61716
 pkg math/rand/v2, func NormFloat64() float64 #61716
 pkg math/rand/v2, func Perm(int) []int #61716
 pkg math/rand/v2, func Shuffle(int, func(int, int)) #61716
 pkg math/rand/v2, func Uint32() uint32 #61716
+pkg math/rand/v2, func Uint32N(uint32) uint32 #61716
 pkg math/rand/v2, func Uint64() uint64 #61716
+pkg math/rand/v2, func Uint64N(uint64) uint64 #61716
+pkg math/rand/v2, func UintN(uint) uint #61716
 pkg math/rand/v2, method (*Rand) ExpFloat64() float64 #61716
 pkg math/rand/v2, method (*Rand) Float32() float32 #61716
 pkg math/rand/v2, method (*Rand) Float64() float64 #61716
@@ -28,7 +32,10 @@ pkg math/rand/v2, method (*Rand) NormFloat64() float64 #61716
 pkg math/rand/v2, method (*Rand) Perm(int) []int #61716
 pkg math/rand/v2, method (*Rand) Shuffle(int, func(int, int)) #61716
 pkg math/rand/v2, method (*Rand) Uint32() uint32 #61716
+pkg math/rand/v2, method (*Rand) Uint32N(uint32) uint32 #61716
 pkg math/rand/v2, method (*Rand) Uint64() uint64 #61716
+pkg math/rand/v2, method (*Rand) Uint64N(uint64) uint64 #61716
+pkg math/rand/v2, method (*Rand) UintN(uint) uint #61716
 pkg math/rand/v2, method (*Zipf) Uint64() uint64 #61716
 pkg math/rand/v2, type Rand struct #61716
 pkg math/rand/v2, type Source interface { Uint64 } #61716

diff --git a/src/math/rand/v2/example_test.go b/src/math/rand/v2/example_test.go
@@ -10,6 +10,7 @@ import (
 	"os"
 	"strings"
 	"text/tabwriter"
+	"time"
 )
 
 // These tests serve as an example but also make sure we don't change
@@ -84,15 +85,15 @@ func Example_rand() {
 	// Output:
 	// Float32     0.2635776           0.6358173           0.6718283
 	// Float64     0.628605430454327   0.4504798828572669  0.9562755949377957
-	// ExpFloat64  0.3362240648200941  1.4256072328483647  0.24354758816173044
-	// NormFloat64 0.17233959114940064 1.577014951434847   0.04259129641113857
-	// Int32       1501292890          1486668269          182840835
-	// Int64       3546343826724305832 5724354148158589552 5239846799706671610
-	// Uint32      2760229429          296659907           1922395059
-	// IntN(10)    1                   2                   5
-	// Int32N(10)  4                   7                   8
-	// Int64N(10)  7                   6                   3
-	// Perm        [1 4 2 3 0]         [4 2 1 3 0]         [1 2 4 0 3]
+	// ExpFloat64  0.10400903165715357 0.28855743344575835 0.20489656480442942
+	// NormFloat64 -0.5602299711828513 -0.9211692958208376 -1.4262061075859056
+	// Int32       1817075958          91420417            1486590581
+	// Int64       5724354148158589552 5239846799706671610 5927547564735367388
+	// Uint32      2295813601          961197529           3493134579
+	// IntN(10)    4                   5                   1
+	// Int32N(10)  8                   5                   4
+	// Int64N(10)  2                   6                   3
+	// Perm        [3 4 2 1 0]         [4 1 2 0 3]         [0 2 1 3 4]
 }
 
 func ExamplePerm() {
@@ -105,6 +106,14 @@ func ExamplePerm() {
 	// 0
 }
 
+func ExampleN() {
+	// Print an int64 in the half-open interval [0, 100).
+	fmt.Println(rand.N(int64(100)))
+
+	// Sleep for a random duration between 0 and 100 milliseconds.
+	time.Sleep(rand.N(100 * time.Millisecond))
+}
+
 func ExampleShuffle() {
 	words := strings.Fields("ink runs from the corners of my mouth")
 	rand.Shuffle(len(words), func(i, j int) {

diff --git a/src/math/rand/v2/export_test.go b/src/math/rand/v2/export_test.go
@@ -4,10 +4,6 @@
 
 package rand
 
-func Int32NForTest(r *Rand, n int32) int32 {
-	return r.int31n(n)
-}
-
 func GetNormalDistributionParameters() (float64, [128]uint32, [128]float32, [128]float32) {
 	return rn, kn, wn, fn
 }

diff --git a/src/math/rand/v2/rand.go b/src/math/rand/v2/rand.go
@@ -18,6 +18,7 @@
 package rand
 
 import (
+	"math/bits"
 	_ "unsafe" // for go:linkname
 )
 
@@ -58,37 +59,122 @@ func New(src Source) *Rand {
 func (r *Rand) Int64() int64 { return int64(r.src.Uint64() &^ (1 << 63)) }
 
 // Uint32 returns a pseudo-random 32-bit value as a uint32.
-func (r *Rand) Uint32() uint32 { return uint32(r.Int64() >> 31) }
+func (r *Rand) Uint32() uint32 { return uint32(r.src.Uint64() >> 32) }
 
 // Uint64 returns a pseudo-random 64-bit value as a uint64.
-func (r *Rand) Uint64() uint64 {
-	return r.src.Uint64()
-}
+func (r *Rand) Uint64() uint64 { return r.src.Uint64() }
 
 // Int32 returns a non-negative pseudo-random 31-bit integer as an int32.
-func (r *Rand) Int32() int32 { return int32(r.Int64() >> 32) }
+func (r *Rand) Int32() int32 { return int32(r.src.Uint64() >> 33) }
 
 // Int returns a non-negative pseudo-random int.
-func (r *Rand) Int() int {
-	u := uint(r.Int64())
-	return int(u << 1 >> 1) // clear sign bit if int == int32
-}
+func (r *Rand) Int() int { return int(uint(r.src.Uint64()) << 1 >> 1) }
 
 // Int64N returns, as an int64, a non-negative pseudo-random number in the half-open interval [0,n).
 // It panics if n <= 0.
 func (r *Rand) Int64N(n int64) int64 {
 	if n <= 0 {
 		panic("invalid argument to Int64N")
 	}
+	return int64(r.uint64n(uint64(n)))
+}
+
+// Uint64N returns, as a uint64, a non-negative pseudo-random number in the half-open interval [0,n).
+// It panics if n == 0.
+func (r *Rand) Uint64N(n uint64) uint64 {
+	if n == 0 {
+		panic("invalid argument to Uint64N")
+	}
+	return r.uint64n(n)
+}
+
+// uint64n is the no-bounds-checks version of Uint64N.
+func (r *Rand) uint64n(n uint64) uint64 {
+	if is32bit && uint64(uint32(n)) == n {
+		return uint64(r.uint32n(uint32(n)))
+	}
 	if n&(n-1) == 0 { // n is power of two, can mask
-		return r.Int64() & (n - 1)
+		return r.Uint64() & (n - 1)
 	}
-	max := int64((1 << 63) - 1 - (1<<63)%uint64(n))
-	v := r.Int64()
-	for v > max {
-		v = r.Int64()
+
+	// Suppose we have a uint64 x uniform in the range [0,2⁶⁴)
+	// and want to reduce it to the range [0,n) preserving exact uniformity.
+	// We can simulate a scaling arbitrary precision x * (n/2⁶⁴) by
+	// the high bits of a double-width multiply of x*n, meaning (x*n)/2⁶⁴.
+	// Since there are 2⁶⁴ possible inputs x and only n possible outputs,
+	// the output is necessarily biased if n does not divide 2⁶⁴.
+	// In general (x*n)/2⁶⁴ = k for x*n in [k*2⁶⁴,(k+1)*2⁶⁴).
+	// There are either floor(2⁶⁴/n) or ceil(2⁶⁴/n) possible products
+	// in that range, depending on k.
+	// But suppose we reject the sample and try again when
+	// x*n is in [k*2⁶⁴, k*2⁶⁴+(2⁶⁴%n)), meaning rejecting fewer than n possible
+	// outcomes out of the 2⁶⁴.
+	// Now there are exactly floor(2⁶⁴/n) possible ways to produce
+	// each output value k, so we've restored uniformity.
+	// To get valid uint64 math, 2⁶⁴ % n = (2⁶⁴ - n) % n = -n % n,
+	// so the direct implementation of this algorithm would be:
+	//
+	//	hi, lo := bits.Mul64(r.Uint64(), n)
+	//	thresh := -n % n
+	//	for lo < thresh {
+	//		hi, lo = bits.Mul64(r.Uint64(), n)
+	//	}
+	//
+	// That still leaves an expensive 64-bit division that we would rather avoid.
+	// We know that thresh < n, and n is usually much less than 2⁶⁴, so we can
+	// avoid the last four lines unless lo < n.
+	//
+	// See also:
+	// https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction
+	// https://lemire.me/blog/2016/06/30/fast-random-shuffling
+	hi, lo := bits.Mul64(r.Uint64(), n)
+	if lo < n {
+		thresh := -n % n
+		for lo < thresh {
+			hi, lo = bits.Mul64(r.Uint64(), n)
+		}
 	}
-	return v % n
+	return hi
+}
+
+// uint32n is an identical computation to uint64n
+// but optimized for 32-bit systems.
+func (r *Rand) uint32n(n uint32) uint32 {
+	if n&(n-1) == 0 { // n is power of two, can mask
+		return uint32(r.Uint64()) & (n - 1)
+	}
+	// On 64-bit systems we still use the uint64 code below because
+	// the probability of a random uint64 lo being < a uint32 n is near zero,
+	// meaning the unbiasing loop almost never runs.
+	// On 32-bit systems, here we need to implement that same logic in 32-bit math,
+	// both to preserve the exact output sequence observed on 64-bit machines
+	// and to preserve the optimization that the unbiasing loop almost never runs.
+	//
+	// We want to compute
+	// 	hi, lo := bits.Mul64(r.Uint64(), n)
+	// In terms of 32-bit halves, this is:
+	// 	x1:x0 := r.Uint64()
+	// 	0:hi, lo1:lo0 := bits.Mul64(x1:x0, 0:n)
+	// Writing out the multiplication in terms of bits.Mul32 allows
+	// using direct hardware instructions and avoiding
+	// the computations involving these zeros.
+	x := r.Uint64()
+	lo1a, lo0 := bits.Mul32(uint32(x), n)
+	hi, lo1b := bits.Mul32(uint32(x>>32), n)
+	lo1, c := bits.Add32(lo1a, lo1b, 0)
+	hi += c
+	if lo1 == 0 && lo0 < uint32(n) {
+		n64 := uint64(n)
+		thresh := uint32(-n64 % n64)
+		for lo1 == 0 && lo0 < thresh {
+			x := r.Uint64()
+			lo1a, lo0 = bits.Mul32(uint32(x), n)
+			hi, lo1b = bits.Mul32(uint32(x>>32), n)
+			lo1, c = bits.Add32(lo1a, lo1b, 0)
+			hi += c
+		}
+	}
+	return hi
 }
 
 // Int32N returns, as an int32, a non-negative pseudo-random number in the half-open interval [0,n).
@@ -97,51 +183,36 @@ func (r *Rand) Int32N(n int32) int32 {
 	if n <= 0 {
 		panic("invalid argument to Int32N")
 	}
-	if n&(n-1) == 0 { // n is power of two, can mask
-		return r.Int32() & (n - 1)
-	}
-	max := int32((1 << 31) - 1 - (1<<31)%uint32(n))
-	v := r.Int32()
-	for v > max {
-		v = r.Int32()
-	}
-	return v % n
+	return int32(r.uint64n(uint64(n)))
 }
 
-// int31n returns, as an int32, a non-negative pseudo-random number in the half-open interval [0,n).
-// n must be > 0, but int31n does not check this; the caller must ensure it.
-// int31n exists because Int32N is inefficient, but Go 1 compatibility
-// requires that the stream of values produced by math/rand/v2 remain unchanged.
-// int31n can thus only be used internally, by newly introduced APIs.
-//
-// For implementation details, see:
-// https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction
-// https://lemire.me/blog/2016/06/30/fast-random-shuffling
-func (r *Rand) int31n(n int32) int32 {
-	v := r.Uint32()
-	prod := uint64(v) * uint64(n)
-	low := uint32(prod)
-	if low < uint32(n) {
-		thresh := uint32(-n) % uint32(n)
-		for low < thresh {
-			v = r.Uint32()
-			prod = uint64(v) * uint64(n)
-			low = uint32(prod)
-		}
+// Uint32N returns, as a uint32, a non-negative pseudo-random number in the half-open interval [0,n).
+// It panics if n == 0.
+func (r *Rand) Uint32N(n uint32) uint32 {
+	if n == 0 {
+		panic("invalid argument to Uint32N")
 	}
-	return int32(prod >> 32)
+	return uint32(r.uint64n(uint64(n)))
 }
 
+const is32bit = ^uint(0)>>32 == 0
+
 // IntN returns, as an int, a non-negative pseudo-random number in the half-open interval [0,n).
 // It panics if n <= 0.
 func (r *Rand) IntN(n int) int {
 	if n <= 0 {
 		panic("invalid argument to IntN")
 	}
-	if n <= 1<<31-1 {
-		return int(r.Int32N(int32(n)))
+	return int(r.uint64n(uint64(n)))
+}
+
+// UintN returns, as a uint, a non-negative pseudo-random number in the half-open interval [0,n).
+// It panics if n == 0.
+func (r *Rand) UintN(n uint) uint {
+	if n == 0 {
+		panic("invalid argument to UintN")
 	}
-	return int(r.Int64N(int64(n)))
+	return uint(r.uint64n(uint64(n)))
 }
 
 // Float64 returns, as a float64, a pseudo-random number in the half-open interval [0.0,1.0).
@@ -214,13 +285,8 @@ func (r *Rand) Shuffle(n int, swap func(i, j int)) {
 	// there's no way that any PRNG can have a big enough internal state to
 	// generate even a minuscule percentage of the possible permutations.
 	// Nevertheless, the right API signature accepts an int n, so handle it as best we can.
-	i := n - 1
-	for ; i > 1<<31-1-1; i-- {
-		j := int(r.Int64N(int64(i + 1)))
-		swap(i, j)
-	}
-	for ; i > 0; i-- {
-		j := int(r.int31n(int32(i + 1)))
+	for i := n - 1; i > 0; i-- {
+		j := int(r.uint64n(uint64(i + 1)))
 		swap(i, j)
 	}
 }
@@ -255,6 +321,16 @@ func Int64() int64 { return globalRand.Int64() }
 // from the default Source.
 func Uint32() uint32 { return globalRand.Uint32() }
 
+// Uint64N returns, as a uint64, a pseudo-random number in the half-open interval [0,n)
+// from the default Source.
+// It panics if n <= 0.
+func Uint64N(n uint64) uint64 { return globalRand.Uint64N(n) }
+
+// Uint32N returns, as a uint32, a pseudo-random number in the half-open interval [0,n)
+// from the default Source.
+// It panics if n <= 0.
+func Uint32N(n uint32) uint32 { return globalRand.Uint32N(n) }
+
 // Uint64 returns a pseudo-random 64-bit value as a uint64
 // from the default Source.
 func Uint64() uint64 { return globalRand.Uint64() }
@@ -266,21 +342,41 @@ func Int32() int32 { return globalRand.Int32() }
 // Int returns a non-negative pseudo-random int from the default Source.
 func Int() int { return globalRand.Int() }
 
-// Int64N returns, as an int64, a non-negative pseudo-random number in the half-open interval [0,n)
+// Int64N returns, as an int64, a pseudo-random number in the half-open interval [0,n)
 // from the default Source.
 // It panics if n <= 0.
 func Int64N(n int64) int64 { return globalRand.Int64N(n) }
 
-// Int32N returns, as an int32, a non-negative pseudo-random number in the half-open interval [0,n)
+// Int32N returns, as an int32, a pseudo-random number in the half-open interval [0,n)
 // from the default Source.
 // It panics if n <= 0.
 func Int32N(n int32) int32 { return globalRand.Int32N(n) }
 
-// IntN returns, as an int, a non-negative pseudo-random number in the half-open interval [0,n)
+// IntN returns, as an int, a pseudo-random number in the half-open interval [0,n)
 // from the default Source.
 // It panics if n <= 0.
 func IntN(n int) int { return globalRand.IntN(n) }
 
+// UintN returns, as a uint, a pseudo-random number in the half-open interval [0,n)
+// from the default Source.
+// It panics if n <= 0.
+func UintN(n uint) uint { return globalRand.UintN(n) }
+
+// N returns a pseudo-random number in the half-open interval [0,n) from the default Source.
+// The type parameter Int can be any integer type.
+// It panics if n <= 0.
+func N[Int intType](n Int) Int {
+	if n <= 0 {
+		panic("invalid argument to N")
+	}
+	return Int(globalRand.uint64n(uint64(n)))
+}
+
+type intType interface {
+	~int | ~int8 | ~int16 | ~int32 | ~int64 |
+		~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 | ~uintptr
+}
+
 // Float64 returns, as a float64, a pseudo-random number in the half-open interval [0.0,1.0)
 // from the default Source.
 func Float64() float64 { return globalRand.Float64() }