Skip to content

Commit

Permalink
support skipchars and peek(io, Char) (#77)
Browse files Browse the repository at this point in the history
* support skipchars and peek(io, Char)

* add Random for tests

* readchar fix from julia##50552

* slight additional optimization
  • Loading branch information
stevengj authored Aug 14, 2023
1 parent 703f00f commit 9602735
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 7 deletions.
3 changes: 2 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ julia = "1.6"

[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"

[targets]
test = ["Test"]
test = ["Test", "Random"]
68 changes: 63 additions & 5 deletions src/bufferedinputstream.jl
Original file line number Diff line number Diff line change
Expand Up @@ -54,17 +54,18 @@ function Base.show(io::IO, stream::BufferedInputStream{T}) where T
end

"""
Refill the buffer, optionally moving and retaining part of the data.
Refill the buffer, optionally moving and retaining part of the data,
ensuring buffer space to read at least `minalloc` bytes.
"""
function fillbuffer!(stream::BufferedInputStream)
function fillbuffer!(stream::BufferedInputStream, minalloc::Int = 1)
if eof(stream.source)
return 0
end

shiftdata!(stream)
margin = length(stream.buffer) - stream.available
if margin == 0
resize!(stream.buffer, length(stream.buffer) * 2)
if margin < minalloc
resize!(stream.buffer, length(stream.buffer) * 2 + minalloc-1)
end

nbytes = readbytes!(
Expand Down Expand Up @@ -208,6 +209,63 @@ for T in [Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128, Float16,
end
end

# fast char reader, split into lower-level routine for read(s, Char) + peek(s, Char) etc.
function _readchar(stream::BufferedInputStream)
ensurebuffered!(stream, 4)
p, avail = stream.position, stream.available
p > avail && throw(EOFError())

# code adapted from Base.read(io::IO, ::Type{Char}):
@inbounds b0 = stream.buffer[p]
p += 1
l = 0x08 * (0x04 - (leading_ones(b0) % UInt8))
c = UInt32(b0) << 24
if l < 0x18
s = 16
while s l && p avail
@inbounds b = stream.buffer[p]
b & 0xc0 == 0x80 || break
p += 1
c |= UInt32(b) << s
s -= 8
end
end
return reinterpret(Char, c), p
end
function Base.read(stream::BufferedInputStream, ::Type{Char})
checkopen(stream)
c, stream.position = _readchar(stream)
return c
end
function Base.peek(stream::BufferedInputStream, ::Type{Char})
checkopen(stream)
c, _ = _readchar(stream)
return c
end
function Base.skipchars(predicate, stream::BufferedInputStream; linecomment=nothing)
checkopen(stream)
while !eof(stream)
c, p = _readchar(stream)
if c === linecomment
stream.position = p # next Char
while ensurebuffered!(stream, 1)
@views lf = findnext(==(0x0a), stream.buffer[1:stream.available], stream.position)
if isnothing(lf)
stream.position = stream.available + 1 # fill buffer again
else
stream.position = lf + 1 # skip to next line
break
end
end
elseif predicate(c)
stream.position = p # skip to next Char
else
break
end
end
return stream
end

if isdefined(Base, :unsafe_read)
function Base.unsafe_read(stream::BufferedInputStream, ptr::Ptr{UInt8}, nb::UInt)
p = ptr
Expand Down Expand Up @@ -384,7 +442,7 @@ end

@inline function ensurebuffered!(stream::BufferedInputStream, nb::Integer)
if available_bytes(stream) < nb
fillbuffer!(stream)
fillbuffer!(stream, nb)
if available_bytes(stream) < nb
return false
end
Expand Down
28 changes: 27 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
using BufferedStreams
using Test
using Test, Random

Random.seed!(314159)

struct InfiniteStream <: IO
byte::UInt8
Expand Down Expand Up @@ -418,6 +420,30 @@ end
stream = BufferedInputStream(IOBuffer("some data"))
@test readavailable(stream) == b"some data"
end

@testset "read/peek/skipchars" begin
ascii = randstring(100)
unicode = randstring("xα∆🐨", 100) * 'β' # mix of 1/2/3/4-byte chars
invalid = String(rand(UInt8, 100)) # contains invalid UTF-8 data
for data in (ascii, unicode, invalid), bufsize in (1,2,4,15,1024)
io = BufferedInputStream(IOBuffer(data), bufsize)
@test join(collect(readeach(io, Char))) == data
end
for bufsize in (1,2,3,4,15)
data = "xα∆🐨" * unicode * invalid
io = BufferedInputStream(IOBuffer(data), bufsize)
for c in data
@test peek(io, Char) == peek(io, Char) == c
@test read(io, Char) == c
end
end
for bufsize in (1,2,3,4,15), c in "xα∆🐨", n in 1:5, linecomment in (nothing, '#')
data = c^n * "#" * c^n * "\r\n" * "😄😢"
io = BufferedInputStream(IOBuffer(data), bufsize)
@test skipchars(==(c), io; linecomment) === io
@test read(io, Char) == (isnothing(linecomment) ? '#' : '😄')
end
end
end


Expand Down

0 comments on commit 9602735

Please sign in to comment.