diff --git a/README.md b/README.md
index 198d8d3..a9299b5 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@ including forward recurrence for initial value-problems, [Olver's algorithm](htt
 value problems, and [Clenshaw's algorithm](https://en.wikipedia.org/wiki/Clenshaw_algorithm) for computing dot products with a given vector as needed for
 evaluating expansions in orthogonal polynomials.
 
-## 1. Forward recurrence
+## Forward recurrence
 
 As an example, consider computing the first million Chebyshev U polynomials:
 
@@ -37,7 +37,7 @@ julia> @time forwardrecurrence(fill(2, n), zeros(n), ones(n), x)
 ```
 Note this is faster than the explicit formula:
 ```julia
-ulia> @time sin.((1:n) .* acos(x)) ./ sin(acos(x))
+julia> θ = acos(x); @time sin.((1:n) .* θ) ./ sin(θ)
   0.010828 seconds (9 allocations: 7.630 MiB)
 1000000-element Vector{Float64}:
   1.0
@@ -57,16 +57,113 @@ ulia> @time sin.((1:n) .* acos(x)) ./ sin(acos(x))
  -0.713910144815891
  -0.7752581766080119
 ```
-Note forward recurrence is actually more accurate than the explicit formula which can be seen by comparison with a high precision calculation:
+Note forward recurrence is actually more accurate than the explicit formula which can be seen by comparison with a high precision calculation (though the accuracy is worse near ±1):
 ```julia
 julia> norm(forwardrecurrence(fill(2, n), zeros(n), ones(n), x) - forwardrecurrence(fill(2, n), zeros(n), ones(n), big(x)))
 5.93151258729473921191879934738972139533978757730288453476821870826190721098765e-10
 
-julia> norm(sin.((1:n) .* acos(x)) ./ sin(acos(x)) - forwardrecurrence(fill(2, n), zeros(n), ones(n), big(x)))
+julia> norm(sin.((1:n) .* θ) ./ sin(θ) - forwardrecurrence(fill(2, n), zeros(n), ones(n), big(x)))
 4.538171757754684777956652395339636096999624380286573911589424226541646390097931e-08
 ```
 
 We can make it even faster using FillArrays.jl:
 ```julia
+julia> using FillArrays
 
+julia> @time forwardrecurrence(Fill(2, n), Zeros(n), Ones(n), x)
+  0.003387 seconds (5 allocations: 7.630 MiB)
+1000000-element Vector{Float64}:
+  1.0
+  0.2
+ -0.96
+  ⋮
+  0.6324761476556076
+ -0.7139101449074483
+ -0.7752581766370972
 ```
+
+We can also use LazyArrays.jl to represent Chebyshev T recurrence lazily:
+```julia
+julia> using LazyArrays
+
+julia> @time forwardrecurrence(Vcat(1, Fill(2, n-1)), Zeros(n), Ones(n), x)
+  0.002740 seconds (103 allocations: 7.634 MiB)
+1000000-element Vector{Float64}:
+  1.0
+  0.1
+ -0.98
+ -0.296
+  0.9208
+  0.48016
+ -0.824768
+ -0.6451136
+  0.6957452799999999
+  ⋮
+  0.9968292069233
+  0.17885499217086823
+ -0.9610582084891264
+ -0.3710666338686935
+  0.8868448817153877
+  0.548435610211771
+ -0.7771577596730335
+ -0.7038671621463777
+ ```
+ And this matches the explicit formula:
+ ```julia
+julia> @time cos.((0:n-1) .* θ)
+  0.042121 seconds (6 allocations: 7.630 MiB, 75.68% gc time)
+1000000-element Vector{Float64}:
+  1.0
+  0.1
+ -0.98
+ -0.29600000000000004
+  0.9208
+  0.48016000000000003
+ -0.824768
+ -0.6451136000000001
+  0.6957452799999999
+  ⋮
+  0.9968292069266631
+  0.178854992187329
+ -0.9610582084686914
+ -0.37106663377504573
+  0.8868448817354809
+  0.5484356102238197
+ -0.7771577596278382
+ -0.7038671620731024
+ ```
+
+
+
+## Olver's algorithm
+
+## Clenshaw's algorithm
+
+Clenshaw's algorithm is an efficient way to compute expansions in orthogonal polynomials. Here we compute
+$$
+∑_{k=0}^{n-1} {U_k(x) \over k+1}
+$$
+for $x = 0.1$ and $n = 1,000,000$:
+
+```julia
+julia> @time clenshaw(inv.(1:n), fill(2, n), zeros(n), ones(n+1), x)
+  0.006446 seconds (12 allocations: 30.518 MiB)
+0.8396901361362448
+```
+
+This matches the explicit expression:
+```julia
+julia> @time sum(sin((k+1)*θ)/(k+1) for k=0:n-1)/sin(θ)
+  0.161225 seconds (8.03 M allocations: 124.408 MiB, 2.74% gc time, 16.44% compilation time)
+0.8396901361362544
+```
+
+Again, using FillArrays.jl is faster. And we can use InfiniteArrays.jl to allow any length of
+coefficients:
+```julia
+julia> using InfiniteArrays
+
+julia> @time clenshaw(inv.(1:n), Fill(2, ∞), Zeros(∞), Ones(∞), x)
+  0.004574 seconds (5 allocations: 7.630 MiB)
+0.8396901361362448
+```
\ No newline at end of file
diff --git a/ext/RecurrenceRelationshipsFillArraysExt.jl b/ext/RecurrenceRelationshipsFillArraysExt.jl
index fa3842a..82bf6c6 100644
--- a/ext/RecurrenceRelationshipsFillArraysExt.jl
+++ b/ext/RecurrenceRelationshipsFillArraysExt.jl
@@ -2,15 +2,15 @@ module RecurrenceRelationshipsFillArraysExt
 using RecurrenceRelationships, FillArrays
 using FillArrays: AbstractFill, getindex_value
 
-import RecurrenceRelationships: _forwardrecurrence_next, _clenshaw_next
+import RecurrenceRelationships: forwardrecurrence_next, clenshaw_next
 
 # special case for B[n] == 0
-Base.@propagate_inbounds _forwardrecurrence_next(n, A, ::Zeros, C, x, p0, p1) = muladd(A[n]*x, p1, -C[n]*p0)
+Base.@propagate_inbounds forwardrecurrence_next(n, A, ::Zeros, C, x, p0, p1) = muladd(A[n]*x, p1, -C[n]*p0)
 # special case for Chebyshev U
-Base.@propagate_inbounds _forwardrecurrence_next(n, A::AbstractFill, ::Zeros, C::Ones, x, p0, p1) = muladd(getindex_value(A)*x, p1, -p0)
+Base.@propagate_inbounds forwardrecurrence_next(n, A::AbstractFill, ::Zeros, C::Ones, x, p0, p1) = muladd(getindex_value(A)*x, p1, -p0)
 
-Base.@propagate_inbounds _clenshaw_next(n, A, ::Zeros, C, x, c, bn1, bn2) = muladd(A[n]*x, bn1, muladd(-C[n+1],bn2,c[n]))
+Base.@propagate_inbounds clenshaw_next(n, A, ::Zeros, C, x, c, bn1, bn2) = muladd(A[n]*x, bn1, muladd(-C[n+1],bn2,c[n]))
 # Chebyshev U
-Base.@propagate_inbounds _clenshaw_next(n, A::AbstractFill, ::Zeros, C::Ones, x, c, bn1, bn2) = muladd(getindex_value(A)*x, bn1, -bn2+c[n])
+Base.@propagate_inbounds clenshaw_next(n, A::AbstractFill, ::Zeros, C::Ones, x, c, bn1, bn2) = muladd(getindex_value(A)*x, bn1, -bn2+c[n])
 
 end
\ No newline at end of file
diff --git a/ext/RecurrenceRelationshipsLazyArraysExt.jl b/ext/RecurrenceRelationshipsLazyArraysExt.jl
index 750a0c4..8235389 100644
--- a/ext/RecurrenceRelationshipsLazyArraysExt.jl
+++ b/ext/RecurrenceRelationshipsLazyArraysExt.jl
@@ -2,16 +2,16 @@ module RecurrenceRelationshipsLazyArraysExt
 using RecurrenceRelationships, LazyArrays
 using LazyArrays.FillArrays
 
-import RecurrenceRelationships: _forwardrecurrence_next, _clenshaw_next
+import RecurrenceRelationships: forwardrecurrence_next, clenshaw_next
 import LazyArrays.FillArrays: AbstractFill
 
 ##
 # For Chebyshev T. Note the shift in indexing is fine due to the AbstractFill
 ##
-Base.@propagate_inbounds _forwardrecurrence_next(n, A::Vcat{<:Any,1,<:Tuple{<:Number,<:AbstractFill}}, B::Zeros, C::Ones, x, p0, p1) = 
-    _forwardrecurrence_next(n, A.args[2], B, C, x, p0, p1)
+Base.@propagate_inbounds forwardrecurrence_next(n, A::Vcat{<:Any,1,<:Tuple{<:Number,<:AbstractFill}}, B::Zeros, C::Ones, x, p0, p1) = 
+    forwardrecurrence_next(n, A.args[2], B, C, x, p0, p1)
 
 
-Base.@propagate_inbounds _clenshaw_next(n, A::Vcat{<:Any,1,<:Tuple{<:Number,<:AbstractFill}}, B::Zeros, C::Ones, x, c, bn1, bn2) = 
-    _clenshaw_next(n, A.args[2], B, C, x, c, bn1, bn2)
+Base.@propagate_inbounds clenshaw_next(n, A::Vcat{<:Any,1,<:Tuple{<:Number,<:AbstractFill}}, B::Zeros, C::Ones, x, c, bn1, bn2) = 
+    clenshaw_next(n, A.args[2], B, C, x, c, bn1, bn2)
 end
\ No newline at end of file
diff --git a/src/clenshaw.jl b/src/clenshaw.jl
index 9b77bfe..9ccf864 100644
--- a/src/clenshaw.jl
+++ b/src/clenshaw.jl
@@ -51,7 +51,7 @@ function clenshaw!(c::AbstractMatrix, A::AbstractVector, B::AbstractVector, C::A
     f
 end
 
-Base.@propagate_inbounds _clenshaw_next(n, A, B, C, x, c, bn1, bn2) = muladd(muladd(A[n],x,B[n]), bn1, muladd(-C[n+1],bn2,c[n]))
+Base.@propagate_inbounds clenshaw_next(n, A, B, C, x, c, bn1, bn2) = muladd(muladd(A[n],x,B[n]), bn1, muladd(-C[n+1],bn2,c[n]))
 
 # allow special casing first arg, for ChebyshevT in OrthogonalPolynomialsQuasi
 Base.@propagate_inbounds _clenshaw_first(A, B, C, x, c, bn1, bn2) = muladd(muladd(A[1],x,B[1]), bn1, muladd(-C[2],bn2,c[1]))
@@ -75,7 +75,6 @@ as defined in DLMF.
 If `c` is a matrix this treats each column as a separate vector of coefficients, returning a vector
 if `x` is a number and a matrix if `x` is a vector.
 """
-
 function clenshaw(c::AbstractVector, A::AbstractVector, B::AbstractVector, C::AbstractVector, x::Number)
     N = length(c)
     T = promote_type(eltype(c),eltype(A),eltype(B),eltype(C),typeof(x))
@@ -86,7 +85,7 @@ function clenshaw(c::AbstractVector, A::AbstractVector, B::AbstractVector, C::Ab
         bn1 = convert(T,c[N])
         N == 1 && return bn1
         for n = N-1:-1:2
-            bn1,bn2 = _clenshaw_next(n, A, B, C, x, c, bn1, bn2),bn1
+            bn1,bn2 = clenshaw_next(n, A, B, C, x, c, bn1, bn2),bn1
         end
         bn1 = _clenshaw_first(A, B, C, x, c, bn1, bn2)
     end
diff --git a/src/forward.jl b/src/forward.jl
index 5ecaf9d..b7b53fb 100644
--- a/src/forward.jl
+++ b/src/forward.jl
@@ -11,29 +11,29 @@ function forwardrecurrence!(v::AbstractVector{T}, A::AbstractVector, B::Abstract
     N == 0 && return v
     length(A)+1 ≥ N && length(B)+1 ≥ N && length(C)+1 ≥ N || throw(ArgumentError("A, B, C must contain at least $(N-1) entries"))
     p1 = convert(T, N == 1 ? p0 : muladd(A[1],x,B[1])*p0) # avoid accessing A[1]/B[1] if empty
-    _forwardrecurrence!(v, A, B, C, x, convert(T, p0), p1)
+    forwardrecurrence!(v, A, B, C, x, convert(T, p0), p1)
 end
 
 
-Base.@propagate_inbounds _forwardrecurrence_next(n, A, B, C, x, p0, p1) = muladd(muladd(A[n],x,B[n]), p1, -C[n]*p0)
+Base.@propagate_inbounds forwardrecurrence_next(n, A, B, C, x, p0, p1) = muladd(muladd(A[n],x,B[n]), p1, -C[n]*p0)
 
 
 # this supports adaptivity: we can populate `v` for large `n`
-function _forwardrecurrence!(v::AbstractVector, A::AbstractVector, B::AbstractVector, C::AbstractVector, x, p0, p1)
+function forwardrecurrence!(v::AbstractVector, A::AbstractVector, B::AbstractVector, C::AbstractVector, x, p0, p1)
     N = length(v)
     N == 0 && return v
     v[1] = p0
     N == 1 && return v
     v[2] = p1
-    _forwardrecurrence!(v, A, B, C, x, 2:N)
+    forwardrecurrence_partial!(v, A, B, C, x, 2:N)
 end
 
-function _forwardrecurrence!(v::AbstractVector, A::AbstractVector, B::AbstractVector, C::AbstractVector, x, kr::AbstractUnitRange)
+function forwardrecurrence_partial!(v::AbstractVector, A::AbstractVector, B::AbstractVector, C::AbstractVector, x, kr::AbstractUnitRange)
     n₀, N = first(kr), last(kr)
     @boundscheck N > length(v) && throw(BoundsError(v, N))
     p0, p1 = v[n₀-1], v[n₀]
     @inbounds for n = n₀:N-1
-        p1,p0 = _forwardrecurrence_next(n, A, B, C, x, p0, p1),p1
+        p1,p0 = forwardrecurrence_next(n, A, B, C, x, p0, p1),p1
         v[n+1] = p1
     end
     v
@@ -54,15 +54,3 @@ forwardrecurrence(N::Integer, A::AbstractVector, B::AbstractVector, C::AbstractV
 forwardrecurrence(A::AbstractVector, B::AbstractVector, C::AbstractVector, x) = forwardrecurrence(min(length(A), length(B), length(C)), A, B, C, x)
 
 
-
-
-function initiateforwardrecurrence(N, A, B, C, x, μ)
-    T = promote_type(eltype(A), eltype(B), eltype(C), typeof(x))
-    p0 = convert(T, μ)
-    N == 0 && return zero(T), p0
-    p1 = convert(T, muladd(A[1],x,B[1])*p0)
-    @inbounds for n = 2:N
-        p1,p0 = _forwardrecurrence_next(n, A, B, C, x, p0, p1),p1
-    end
-    p0,p1
-end
diff --git a/test/runtests.jl b/test/runtests.jl
index 6c061c6..5496bf9 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -3,14 +3,19 @@ using FillArrays, LazyArrays
 
 @testset "forward/clenshaw" begin
     @testset "Chebyshev T" begin
+        c = [1,2,3]
+        @test @inferred(clenshaw(c,1)) ≡ 1 + 2 + 3
+        @test @inferred(clenshaw(c,0)) ≡ 1 + 0 - 3
+        @test @inferred(clenshaw(c,[-1,0,1])) == clenshaw!(c,[-1,0,1]) == [2,-2,6]
+        @test @inferred(clenshaw(c,0.1)) == 1 + 2*0.1 + 3*cos(2acos(0.1))
+        @test clenshaw(c,[-1,0,1]) isa Vector{Int}
+
+        @test clenshaw(Int[], 0) ≡ 0
+        @test clenshaw([1], 0) ≡ 1
+        @test clenshaw([1,2], 0) ≡ 1
+
         for elty in (Float64, Float32)
-            c = [1,2,3]
             cf = elty.(c)
-            @test @inferred(clenshaw(c,1)) ≡ 1 + 2 + 3
-            @test @inferred(clenshaw(c,0)) ≡ 1 + 0 - 3
-            @test @inferred(clenshaw(c,0.1)) == 1 + 2*0.1 + 3*cos(2acos(0.1))
-            @test @inferred(clenshaw(c,[-1,0,1])) == clenshaw!(c,[-1,0,1]) == [2,-2,6]
-            @test clenshaw(c,[-1,0,1]) isa Vector{Int}
             @test @inferred(clenshaw(elty[],1)) ≡ zero(elty)
 
             x = elty[1,0,0.1]
@@ -30,12 +35,11 @@ using FillArrays, LazyArrays
                 # modifies x and xv
                 @test clenshaw!(cv2, xv) == xv == x == clenshaw([1,3], elty[1,0,0.1])
             end
-
-            @testset "matrix coefficients" begin
-                c = [1 2; 3 4; 5 6]
-                @test clenshaw(c,0.1) ≈ [clenshaw(c[:,1],0.1), clenshaw(c[:,2],0.1)]
-                @test clenshaw(c,[0.1,0.2]) ≈ [clenshaw(c[:,1], 0.1) clenshaw(c[:,2], 0.1); clenshaw(c[:,1], 0.2) clenshaw(c[:,2], 0.2)]
-            end
+        end
+        @testset "matrix coefficients" begin
+            c = [1 2; 3 4; 5 6]
+            @test clenshaw(c,0.1) ≈ [clenshaw(c[:,1],0.1), clenshaw(c[:,2],0.1)]
+            @test clenshaw(c,[0.1,0.2]) ≈ [clenshaw(c[:,1], 0.1) clenshaw(c[:,2], 0.1); clenshaw(c[:,1], 0.2) clenshaw(c[:,2], 0.2)]
         end
     end
 
@@ -136,6 +140,14 @@ using FillArrays, LazyArrays
         c = randn(N)
         @test clenshaw(c, A, B, C, 0.1) == clenshaw(c, A, Vector(B), C, 0.1)
     end
+
+    @testset "LazyArrays" begin
+        n = 1000
+        x = 0.1
+        θ = acos(x)
+        @test forwardrecurrence(Vcat(1, Fill(2, n-1)), Zeros(n), Ones(n), x) ≈ cos.((0:n-1) .* θ)
+        @test clenshaw((1:n), Vcat(1, Fill(2, n-1)), Zeros(n), Ones(n+1), x) ≈ sum(cos(k * θ) * (k+1) for k = 0:n-1)
+    end
 end
 
 @testset "olver" begin