From 734eb8322192b9c22a09737777ce8e0ebc7a2ebd Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Fri, 13 Jan 2023 20:41:51 -0500 Subject: [PATCH 01/24] in-place `readuntil!` --- base/exports.jl | 1 + base/io.jl | 34 ++++++++++++++++++++++++++++++++++ base/iostream.jl | 6 ++++++ src/sys.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 86 insertions(+) diff --git a/base/exports.jl b/base/exports.jl index 10f43825e12df..9043aa80fec7e 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -857,6 +857,7 @@ export readline, readlines, readuntil, + readuntil!, redirect_stdio, redirect_stderr, redirect_stdin, diff --git a/base/io.jl b/base/io.jl index 60a24831587cb..c027e4aab41bb 100644 --- a/base/io.jl +++ b/base/io.jl @@ -835,6 +835,40 @@ function readuntil(s::IO, delim::AbstractChar; keep::Bool=false) return String(take!(out)) end +# read at most length(buffer) bytes; there is also an optimize method +# for IOStream. +function _readuntil!(s::IO, buffer::AbstractVector{UInt8}, delim::UInt8) + buflen = length(buffer) + iszero(buflen) && return 0 + nwritten = 0 + for c in readeach(s, UInt8) + @inbounds buffer[begin+nwritten] = c + nwritten += 1 + if c == delim || nwritten == buflen + break + end + end + return nwritten +end + +""" + readuntil!(s::IO, buffer::AbstractVector{UInt8}, delim::UInt8) + +Read bytes from `s` and write them into `buffer` until a byte `== delim` +is written. Returns the number of bytes written into `buffer`. + +The size of `buffer` will be increased (via `resize!`) if needed, but it will +never be decreased. +""" +function readuntil!(s::IO, buffer::AbstractVector{UInt8}, delim::UInt8) + n = 0 + @inbounds while true + n += _readuntil!(s, @view(buffer[begin+n:end]), delim) + (buffer[end] == delim || eof(s)) && return n + resize!(buffer, 2*length(buffer)+1) + end +end + function readuntil(s::IO, delim::T; keep::Bool=false) where T out = (T === UInt8 ? StringVector(0) : Vector{T}()) for c in readeach(s, T) diff --git a/base/iostream.jl b/base/iostream.jl index 23dfb53256e82..8cd56002e0ce3 100644 --- a/base/iostream.jl +++ b/base/iostream.jl @@ -448,6 +448,12 @@ function readline(s::IOStream; keep::Bool=false) @_lock_ios s ccall(:jl_readuntil, Ref{String}, (Ptr{Cvoid}, UInt8, UInt8, UInt8), s.ios, '\n', 1, keep ? 0 : 2) end +function _readuntil!(buffer::Union{Vector{UInt8},FastContiguousSubArray{UInt8,1,<:Vector{UInt8}}}, + s::IOStream, delim::UInt8) + @_lock_ios s return Int(ccall(:jl_readuntil_buf, Csize_t, (Ptr{Cvoid}, UInt8, Ptr{UInt8}, Csize_t), + s.ios, delim, buf, length(buf) % Csize_t)) +end + function readbytes_all!(s::IOStream, b::Union{Array{UInt8}, FastContiguousSubArray{UInt8,<:Any,<:Array{UInt8}}}, nb::Integer) diff --git a/src/sys.c b/src/sys.c index 2de4bc61a20b8..df621ca9cfc67 100644 --- a/src/sys.c +++ b/src/sys.c @@ -316,6 +316,51 @@ JL_DLLEXPORT jl_value_t *jl_readuntil(ios_t *s, uint8_t delim, uint8_t str, uint return (jl_value_t*)a; } +// read up to buflen bytes, including delim, into buf. returns number of bytes read. +JL_DLLEXPORT size_t jl_readuntil_buf(ios_t *s, uint8_t delim, uint8_t *buf, size_t buflen) +{ + // manually inlined common case + char *pd = (char*)memchr(s->buf + s->bpos, delim, (size_t)(s->size - s->bpos)); + if (pd) { + size_t n = pd - (s->buf + s->bpos) + 1; + if (n < buflen) n = buflen; + memcpy(buf, s->buf + s->bpos, n); + s->bpos += n; + return n; + } + else { + // code derived from ios_copyuntil + size_t total = 0, avail = (size_t)(s->size - s->bpos); + while (!ios_eof(s)) { + if (avail == 0) { + avail = ios_readprep(s, 160); + if (avail == 0) return total; + } + else if (avail > buflen) + avail = buflen; + char *pd = (char*)memchr(s->buf+s->bpos, delim, avail); + if (pd == NULL) { + memcpy(buf, s->buf+s->bpos, avail); + s->bpos += avail; + total += avail; + buflen -= avail; + if (buflen == 0) return total; + buf += avail; + avail = 0; + } + else { + size_t ntowrite = pd - (s->buf+s->bpos) + 1; + memcpy(buf, s->buf+s->bpos, ntowrite); + s->bpos += ntowrite; + total += ntowrite; + return total; + } + } + s->_eof = 1; + return total; + } +} + JL_DLLEXPORT int jl_ios_buffer_n(ios_t *s, const size_t n) { size_t space, ret; From e0b054276588c97010becdf83b2441d667c17d83 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 14 Jan 2023 08:32:09 -0500 Subject: [PATCH 02/24] docstring tweaks --- base/io.jl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/base/io.jl b/base/io.jl index c027e4aab41bb..222f4b51fee96 100644 --- a/base/io.jl +++ b/base/io.jl @@ -835,8 +835,8 @@ function readuntil(s::IO, delim::AbstractChar; keep::Bool=false) return String(take!(out)) end -# read at most length(buffer) bytes; there is also an optimize method -# for IOStream. +# read at most length(buffer) bytes; there is also an optimized method +# for IOStreams in iostream.jl function _readuntil!(s::IO, buffer::AbstractVector{UInt8}, delim::UInt8) buflen = length(buffer) iszero(buflen) && return 0 @@ -855,7 +855,8 @@ end readuntil!(s::IO, buffer::AbstractVector{UInt8}, delim::UInt8) Read bytes from `s` and write them into `buffer` until a byte `== delim` -is written. Returns the number of bytes written into `buffer`. +is written or [`eof(s)`](@ref) is reached. Returns the number of bytes +written into `buffer`. The size of `buffer` will be increased (via `resize!`) if needed, but it will never be decreased. From b097bcc41137313641cf8c859c51223ff1a2ac99 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 14 Jan 2023 11:27:39 -0500 Subject: [PATCH 03/24] fix bootstrapping --- base/io.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/io.jl b/base/io.jl index 222f4b51fee96..a77ac7c5dc11b 100644 --- a/base/io.jl +++ b/base/io.jl @@ -864,7 +864,7 @@ never be decreased. function readuntil!(s::IO, buffer::AbstractVector{UInt8}, delim::UInt8) n = 0 @inbounds while true - n += _readuntil!(s, @view(buffer[begin+n:end]), delim) + n += _readuntil!(s, view(buffer, firstindex(buffer)+n:lastindex(buffer)), delim) (buffer[end] == delim || eof(s)) && return n resize!(buffer, 2*length(buffer)+1) end From 81bb0a692d40d0b7b7fdaec9700f9a461559db7f Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 14 Jan 2023 11:40:36 -0500 Subject: [PATCH 04/24] bugfix --- base/io.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/io.jl b/base/io.jl index a77ac7c5dc11b..1eaf2c957dace 100644 --- a/base/io.jl +++ b/base/io.jl @@ -865,7 +865,7 @@ function readuntil!(s::IO, buffer::AbstractVector{UInt8}, delim::UInt8) n = 0 @inbounds while true n += _readuntil!(s, view(buffer, firstindex(buffer)+n:lastindex(buffer)), delim) - (buffer[end] == delim || eof(s)) && return n + (buffer[n] == delim || eof(s)) && return n resize!(buffer, 2*length(buffer)+1) end end From 9bb9ca67907217701671d3db34ccdb93ade1e8e9 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 14 Jan 2023 12:17:58 -0500 Subject: [PATCH 05/24] more general delimiters --- base/io.jl | 70 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 51 insertions(+), 19 deletions(-) diff --git a/base/io.jl b/base/io.jl index 1eaf2c957dace..f9ecb232bd867 100644 --- a/base/io.jl +++ b/base/io.jl @@ -504,6 +504,9 @@ The delimiter can be a `UInt8`, `AbstractChar`, string, or vector. Keyword argument `keep` controls whether the delimiter is included in the result. The text is assumed to be encoded in UTF-8. +See also [`readuntil!`](@ref) to write in-place into a buffer rather than +allocating a string. + # Examples ```jldoctest julia> write("my_file.txt", "JuliaLang is a GitHub organization.\\nIt has many members.\\n"); @@ -835,6 +838,36 @@ function readuntil(s::IO, delim::AbstractChar; keep::Bool=false) return String(take!(out)) end +function readuntil(s::IO, delim::T; keep::Bool=false) where T + out = (T === UInt8 ? StringVector(0) : Vector{T}()) + for c in readeach(s, T) + if c == delim + keep && push!(out, c) + break + end + push!(out, c) + end + return out +end + +""" + readuntil!(stream::IO, buffer::AbstractVector{UInt8}, delim) + +Read bytes from `stream` and write them into `buffer` until the given +delimiter is read and written, or until the end of the stream is reached. +Returns the number of bytes written into `buffer` (including the delimiter). + +The delimiter can be a `UInt8`, `AbstractChar`, string, or vector of `UInt8`. +The input stream is assumed to be encoded in UTF-8. + +See also the similar function [`readuntil`](@ref), which returns a +newly allocated `String` rather than writing into a buffer. + +!!! compat "Julia 1.10" + The `readuntil!` function was added in Julia 1.10. +""" +function readuntil! end + # read at most length(buffer) bytes; there is also an optimized method # for IOStreams in iostream.jl function _readuntil!(s::IO, buffer::AbstractVector{UInt8}, delim::UInt8) @@ -851,16 +884,6 @@ function _readuntil!(s::IO, buffer::AbstractVector{UInt8}, delim::UInt8) return nwritten end -""" - readuntil!(s::IO, buffer::AbstractVector{UInt8}, delim::UInt8) - -Read bytes from `s` and write them into `buffer` until a byte `== delim` -is written or [`eof(s)`](@ref) is reached. Returns the number of bytes -written into `buffer`. - -The size of `buffer` will be increased (via `resize!`) if needed, but it will -never be decreased. -""" function readuntil!(s::IO, buffer::AbstractVector{UInt8}, delim::UInt8) n = 0 @inbounds while true @@ -870,16 +893,25 @@ function readuntil!(s::IO, buffer::AbstractVector{UInt8}, delim::UInt8) end end -function readuntil(s::IO, delim::T; keep::Bool=false) where T - out = (T === UInt8 ? StringVector(0) : Vector{T}()) - for c in readeach(s, T) - if c == delim - keep && push!(out, c) - break - end - push!(out, c) +function readuntil!(s::IO, buffer::AbstractVector{UInt8}, delim::AbstractVector{UInt8}) + out = IOBuffer(buffer, write=true) + readuntil_vector!(s, delim, true, out) + return out.size +end + +function readuntil!(s::IO, buffer::AbstractVector{UInt8}, delim::AbstractString) + codeunit(delim) === UInt8 || throw(ArgumentError("readuntil! delimiter must have UInt8 codeunits")) + return readuntil!(s, buffer, codeunits(delim)) +end + +function readuntil!(s::IO, buffer::AbstractVector{UInt8}, delim::AbstractChar) + delim ≤ '\x7f' && return readuntil!(s, buffer, delim % UInt8) + out = IOBuffer(buffer, write=true) + for c in readeach(s, Char) + write(out, c) + c == delim && break end - return out + return out.size end # requires that indices for target are the integer unit range from firstindex to lastindex From fc3ea49015c66933dc4634aadd66a2e8928e54e6 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 14 Jan 2023 19:22:48 -0500 Subject: [PATCH 06/24] fix argument order --- base/iostream.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/base/iostream.jl b/base/iostream.jl index 8cd56002e0ce3..2424aa228556b 100644 --- a/base/iostream.jl +++ b/base/iostream.jl @@ -448,8 +448,9 @@ function readline(s::IOStream; keep::Bool=false) @_lock_ios s ccall(:jl_readuntil, Ref{String}, (Ptr{Cvoid}, UInt8, UInt8, UInt8), s.ios, '\n', 1, keep ? 0 : 2) end -function _readuntil!(buffer::Union{Vector{UInt8},FastContiguousSubArray{UInt8,1,<:Vector{UInt8}}}, - s::IOStream, delim::UInt8) +function _readuntil!(s::IOStream, + buffer::Union{Vector{UInt8},FastContiguousSubArray{UInt8,1,<:Vector{UInt8}}}, + delim::UInt8) @_lock_ios s return Int(ccall(:jl_readuntil_buf, Csize_t, (Ptr{Cvoid}, UInt8, Ptr{UInt8}, Csize_t), s.ios, delim, buf, length(buf) % Csize_t)) end From 942beadb39622a696677432f16f947d048051121 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 14 Jan 2023 19:23:13 -0500 Subject: [PATCH 07/24] add at least 128 bytes on resize, in case caller starts with an empty buffer --- base/io.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/io.jl b/base/io.jl index f9ecb232bd867..919deeb08a4b6 100644 --- a/base/io.jl +++ b/base/io.jl @@ -889,7 +889,7 @@ function readuntil!(s::IO, buffer::AbstractVector{UInt8}, delim::UInt8) @inbounds while true n += _readuntil!(s, view(buffer, firstindex(buffer)+n:lastindex(buffer)), delim) (buffer[n] == delim || eof(s)) && return n - resize!(buffer, 2*length(buffer)+1) + resize!(buffer, 2*length(buffer)+128) end end From 1e7831f40cac833395444d0ba09a659bb1c64f45 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 14 Jan 2023 20:09:20 -0500 Subject: [PATCH 08/24] bug fixes and improvements in jl_readuntil_buf --- base/iostream.jl | 2 +- src/sys.c | 29 ++++++++++++++--------------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/base/iostream.jl b/base/iostream.jl index 2424aa228556b..f47da4899da16 100644 --- a/base/iostream.jl +++ b/base/iostream.jl @@ -452,7 +452,7 @@ function _readuntil!(s::IOStream, buffer::Union{Vector{UInt8},FastContiguousSubArray{UInt8,1,<:Vector{UInt8}}}, delim::UInt8) @_lock_ios s return Int(ccall(:jl_readuntil_buf, Csize_t, (Ptr{Cvoid}, UInt8, Ptr{UInt8}, Csize_t), - s.ios, delim, buf, length(buf) % Csize_t)) + s.ios, delim, buffer, length(buffer) % Csize_t)) end function readbytes_all!(s::IOStream, diff --git a/src/sys.c b/src/sys.c index df621ca9cfc67..e4de6b351b58c 100644 --- a/src/sys.c +++ b/src/sys.c @@ -320,37 +320,36 @@ JL_DLLEXPORT jl_value_t *jl_readuntil(ios_t *s, uint8_t delim, uint8_t str, uint JL_DLLEXPORT size_t jl_readuntil_buf(ios_t *s, uint8_t delim, uint8_t *buf, size_t buflen) { // manually inlined common case - char *pd = (char*)memchr(s->buf + s->bpos, delim, (size_t)(s->size - s->bpos)); + size_t avail = (size_t)(s->size - s->bpos); + if (avail > buflen) avail = buflen; + char *pd = (char*)memchr(s->buf + s->bpos, delim, avail); if (pd) { size_t n = pd - (s->buf + s->bpos) + 1; - if (n < buflen) n = buflen; memcpy(buf, s->buf + s->bpos, n); s->bpos += n; return n; } else { + size_t total = avail; + memcpy(buf, s->buf + s->bpos, avail); + s->bpos += avail; + if (avail == buflen) return total; + // code derived from ios_copyuntil - size_t total = 0, avail = (size_t)(s->size - s->bpos); while (!ios_eof(s)) { - if (avail == 0) { - avail = ios_readprep(s, 160); - if (avail == 0) return total; - } - else if (avail > buflen) - avail = buflen; + avail = ios_readprep(s, 160); // read LINE_CHUNK_SIZE + if (avail == 0) break; + if (total+avail > buflen) avail = buflen-total; char *pd = (char*)memchr(s->buf+s->bpos, delim, avail); if (pd == NULL) { - memcpy(buf, s->buf+s->bpos, avail); + memcpy(buf+total, s->buf+s->bpos, avail); s->bpos += avail; total += avail; - buflen -= avail; - if (buflen == 0) return total; - buf += avail; - avail = 0; + if (buflen == total) return total; } else { size_t ntowrite = pd - (s->buf+s->bpos) + 1; - memcpy(buf, s->buf+s->bpos, ntowrite); + memcpy(buf+total, s->buf+s->bpos, ntowrite); s->bpos += ntowrite; total += ntowrite; return total; From d7d7b72e6569af812ec380beb73de05c47e2290d Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 14 Jan 2023 21:51:57 -0500 Subject: [PATCH 09/24] add readuntil! to manual --- doc/src/base/io-network.md | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/src/base/io-network.md b/doc/src/base/io-network.md index 4e371039f1a9b..67c99c5e82bdf 100644 --- a/doc/src/base/io-network.md +++ b/doc/src/base/io-network.md @@ -69,6 +69,7 @@ Base.dump Meta.@dump Base.readline Base.readuntil +Base.readuntil! Base.readlines Base.eachline Base.displaysize From 88b215522d4470c6208c030d58c7938370d456c0 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sun, 15 Jan 2023 16:53:50 -0500 Subject: [PATCH 10/24] optimized IOBuffer readuntil --- base/iobuffer.jl | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/base/iobuffer.jl b/base/iobuffer.jl index 6c95285f232f2..312d8d0c472f8 100644 --- a/base/iobuffer.jl +++ b/base/iobuffer.jl @@ -516,32 +516,26 @@ function occursin(delim::UInt8, buf::GenericIOBuffer) return false end +function readuntil!(io::GenericIOBuffer, buffer::AbstractVector{UInt8}, delim::UInt8) + data = view(io.data, io.ptr:io.size) + # note: findfirst + copyto! is much faster than a single loop + # except for nout ≲ 20. A single loop is 2x faster for nout=5. + nout = something(findfirst(==(delim), data), length(data)) + nout > length(buffer) && resize!(buffer, nout) + copyto!(buffer, firstindex(buffer), data, 1, nout) + io.ptr += nout + return nout +end + function readuntil(io::GenericIOBuffer, delim::UInt8; keep::Bool=false) lb = 70 A = StringVector(lb) - nread = 0 - nout = 0 - data = io.data - for i = io.ptr : io.size - @inbounds b = data[i] - nread += 1 - if keep || b != delim - nout += 1 - if nout > lb - lb = nout*2 - resize!(A, lb) - end - @inbounds A[nout] = b - end - if b == delim - break - end - end - io.ptr += nread - if lb != nout + nout = readuntil!(io, A, delim) + @inbounds nout -= !keep & (A[nout] == delim) + if length(A) != nout resize!(A, nout) end - A + return A end # copy-free crc32c of IOBuffer: From 53e1731e9376e0d342408c785a56bfde3bf29ebe Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sun, 15 Jan 2023 17:59:48 -0500 Subject: [PATCH 11/24] tests, fixes --- base/io.jl | 12 +++++++++++- test/read.jl | 10 ++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/base/io.jl b/base/io.jl index 919deeb08a4b6..735563d003931 100644 --- a/base/io.jl +++ b/base/io.jl @@ -888,7 +888,7 @@ function readuntil!(s::IO, buffer::AbstractVector{UInt8}, delim::UInt8) n = 0 @inbounds while true n += _readuntil!(s, view(buffer, firstindex(buffer)+n:lastindex(buffer)), delim) - (buffer[n] == delim || eof(s)) && return n + ((n > 0 && buffer[n] == delim) || eof(s)) && return n resize!(buffer, 2*length(buffer)+128) end end @@ -900,6 +900,11 @@ function readuntil!(s::IO, buffer::AbstractVector{UInt8}, delim::AbstractVector{ end function readuntil!(s::IO, buffer::AbstractVector{UInt8}, delim::AbstractString) + # small-string delim optimizations + x = Iterators.peel(delim) + isnothing(x) && return 0 + c, rest = x + isempty(rest) && return readuntil!(s, buffer, c) codeunit(delim) === UInt8 || throw(ArgumentError("readuntil! delimiter must have UInt8 codeunits")) return readuntil!(s, buffer, codeunits(delim)) end @@ -914,6 +919,11 @@ function readuntil!(s::IO, buffer::AbstractVector{UInt8}, delim::AbstractChar) return out.size end +readuntil!(io::AbstractPipe, buffer::AbstractVector{UInt8}, delim::UInt8) = readuntil!(pipe_reader(io)::IO, buffer, delim) +readuntil!(io::AbstractPipe, buffer::AbstractVector{UInt8}, delim::AbstractChar) = readuntil!(pipe_reader(io)::IO, buffer, ardelimg) +readuntil!(io::AbstractPipe, buffer::AbstractVector{UInt8}, delim::AbstractString) = readuntil!(pipe_reader(io)::IO, buffer, delim) +readuntil!(io::AbstractPipe, buffer::AbstractVector{UInt8}, delim::AbstractVector{UInt8}) = readuntil!(pipe_reader(io)::IO, buffer, delim) + # requires that indices for target are the integer unit range from firstindex to lastindex # returns whether the delimiter was matched # uses the Knuth–Morris–Pratt_algorithm, with the first and second cache entries unrolled diff --git a/test/read.jl b/test/read.jl index b8060a023333f..1a18bc852af23 100644 --- a/test/read.jl +++ b/test/read.jl @@ -145,6 +145,7 @@ for (name, f) in l verbose && println("$name readuntil...") for (t, s, m, kept) in [ + ("a", "", "", ""), ("a", "ab", "a", "a"), ("b", "ab", "b", "b"), ("α", "αγ", "α", "α"), @@ -152,16 +153,19 @@ for (name, f) in l ("bc", "abc", "bc", "bc"), ("αβ", "αβγ", "αβ", "αβ"), ("aaabc", "ab", "aa", "aaab"), + ("aaabc", "b", "aaa", "aaab"), ("aaabc", "ac", "aaabc", "aaabc"), ("aaabc", "aab", "a", "aaab"), ("aaabc", "aac", "aaabc", "aaabc"), ("αααβγ", "αβ", "αα", "αααβ"), + ("αααβγ", "β", "ααα", "αααβ"), ("αααβγ", "ααβ", "α", "αααβ"), ("αααβγ", "αγ", "αααβγ", "αααβγ"), ("barbarbarians", "barbarian", "bar", "barbarbarian"), ("abcaabcaabcxl", "abcaabcx", "abca", "abcaabcaabcx"), ("abbaabbaabbabbaax", "abbaabbabbaax", "abba", "abbaabbaabbabbaax"), ("abbaabbabbaabbaabbabbaax", "abbaabbabbaax", "abbaabbabba", "abbaabbabbaabbaabbabbaax"), + ('a'^500 * 'x' * "bbbb", "x", 'a'^500, 'a'^500 * 'x'), ] local t, s, m, kept @test readuntil(io(t), s) == m @@ -174,6 +178,12 @@ for (name, f) in l @test readuntil(io(t), unsafe_wrap(Vector{UInt8},s), keep=true) == unsafe_wrap(Vector{UInt8},kept) @test readuntil(io(t), collect(s)::Vector{Char}) == Vector{Char}(m) @test readuntil(io(t), collect(s)::Vector{Char}, keep=true) == Vector{Char}(kept) + + for blen in (0,1,100) + b = Vector{UInt8}(undef, blen) + n = readuntil!(io(t), b, s) + @test b[1:n] == codeunits(kept) + end end cleanup() From 635d50e42c34908aea826b249474b4ff582ccb10 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sun, 15 Jan 2023 18:01:40 -0500 Subject: [PATCH 12/24] NEWS --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 8907275925155..3c4e95ff158ac 100644 --- a/NEWS.md +++ b/NEWS.md @@ -21,6 +21,7 @@ Build system changes New library functions --------------------- +* `copyuntil(out, io, delim)` copies data into an `out` stream` until `delim` ([#48273]). New library features -------------------- From 2489441450e494ea2f60e0e745cd244bad77f32b Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sun, 15 Jan 2023 20:07:23 -0500 Subject: [PATCH 13/24] bugfix --- base/iobuffer.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/iobuffer.jl b/base/iobuffer.jl index 312d8d0c472f8..870c87f048f18 100644 --- a/base/iobuffer.jl +++ b/base/iobuffer.jl @@ -531,7 +531,7 @@ function readuntil(io::GenericIOBuffer, delim::UInt8; keep::Bool=false) lb = 70 A = StringVector(lb) nout = readuntil!(io, A, delim) - @inbounds nout -= !keep & (A[nout] == delim) + @inbounds nout -= !keep & (nout > 0 && A[nout] == delim) if length(A) != nout resize!(A, nout) end From dcdff4432fde528d9ad43f08e6f6dde7e85850ac Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 11 Feb 2023 15:43:05 -0500 Subject: [PATCH 14/24] readuntil(out::IO, ...) instead of readuntil! --- base/exports.jl | 1 - base/io.jl | 140 +++++++++++-------------------------- base/iobuffer.jl | 18 +---- base/iostream.jl | 37 ++++++++-- doc/src/base/io-network.md | 1 - src/flisp/iostream.c | 4 +- src/support/ios.c | 8 +-- src/support/ios.h | 2 +- src/sys.c | 2 +- test/read.jl | 14 ++-- 10 files changed, 92 insertions(+), 135 deletions(-) diff --git a/base/exports.jl b/base/exports.jl index 9043aa80fec7e..10f43825e12df 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -857,7 +857,6 @@ export readline, readlines, readuntil, - readuntil!, redirect_stdio, redirect_stderr, redirect_stdin, diff --git a/base/io.jl b/base/io.jl index 735563d003931..c4d7943a01399 100644 --- a/base/io.jl +++ b/base/io.jl @@ -440,10 +440,10 @@ for f in ( end read(io::AbstractPipe, byte::Type{UInt8}) = read(pipe_reader(io)::IO, byte)::UInt8 unsafe_read(io::AbstractPipe, p::Ptr{UInt8}, nb::UInt) = unsafe_read(pipe_reader(io)::IO, p, nb) -readuntil(io::AbstractPipe, arg::UInt8; kw...) = readuntil(pipe_reader(io)::IO, arg; kw...) -readuntil(io::AbstractPipe, arg::AbstractChar; kw...) = readuntil(pipe_reader(io)::IO, arg; kw...) -readuntil(io::AbstractPipe, arg::AbstractString; kw...) = readuntil(pipe_reader(io)::IO, arg; kw...) -readuntil(io::AbstractPipe, arg::AbstractVector; kw...) = readuntil(pipe_reader(io)::IO, arg; kw...) +readuntil(out::IO, io::AbstractPipe, arg::UInt8; kw...) = readuntil(out, pipe_reader(io)::IO, arg; kw...) +readuntil(out::IO, io::AbstractPipe, arg::AbstractChar; kw...) = readuntil(out, pipe_reader(io)::IO, arg; kw...) +readuntil(out::IO, io::AbstractPipe, arg::AbstractString; kw...) = readuntil(out, pipe_reader(io)::IO, arg; kw...) +readuntil(out::IO, io::AbstractPipe, arg::AbstractVector; kw...) = readuntil(out, pipe_reader(io)::IO, arg; kw...) readuntil_vector!(io::AbstractPipe, target::AbstractVector, keep::Bool, out) = readuntil_vector!(pipe_reader(io)::IO, target, keep, out) readbytes!(io::AbstractPipe, target::AbstractVector{UInt8}, n=length(target)) = readbytes!(pipe_reader(io)::IO, target, n) peek(io::AbstractPipe, ::Type{T}) where {T} = peek(pipe_reader(io)::IO, T)::T @@ -496,16 +496,19 @@ function read! end read!(filename::AbstractString, a) = open(io->read!(io, a), convert(String, filename)::String) """ - readuntil(stream::IO, delim; keep::Bool = false) - readuntil(filename::AbstractString, delim; keep::Bool = false) + readuntil([out::IO], stream::IO, delim; keep::Bool = false) + readuntil([out::IO], filename::AbstractString, delim; keep::Bool = false) -Read a string from an I/O stream or a file, up to the given delimiter. +Read a string from an I/O `stream` or a file, up to the given delimiter. The delimiter can be a `UInt8`, `AbstractChar`, string, or vector. Keyword argument `keep` controls whether the delimiter is included in the result. The text is assumed to be encoded in UTF-8. -See also [`readuntil!`](@ref) to write in-place into a buffer rather than -allocating a string. +By default, returns a `String` if `delim` is an `AbstractChar` or a string +or otherwise returns a `Vector{typeof(delim)}`. +If the optional `out` argument is supplied, then the data is instead written to the `out` +stream, returning `out`. (This can be used, for example, to read data into +a pre-allocated [`IOBuffer`](@ref).) # Examples ```jldoctest @@ -520,10 +523,13 @@ julia> readuntil("my_file.txt", '.', keep = true) julia> rm("my_file.txt") ``` """ -readuntil(filename::AbstractString, args...; kw...) = open(io->readuntil(io, args...; kw...), convert(String, filename)::String) +readuntil(stream::IO, delim; kw...) = take!(readuntil(IOBuffer(), stream, delim; kw...)) +readuntil(stream::IO, delim::Union{AbstractChar, AbstractString}; kw...) = String(take!(readuntil(IOBuffer(StringVector(70),write=true), stream, delim; kw...))) +readuntil(out::IO, filename::AbstractString, delim; kw...) = open(io->readuntil(out, io, delim; kw...), convert(String, filename)::String) +readuntil(filename::AbstractString, delim; kw...) = open(io->readuntil(io, delim; kw...), convert(String, filename)::String) """ - readline(io::IO=stdin; keep::Bool=false) + readline([out::IO], io::IO=stdin; keep::Bool=false) readline(filename::AbstractString; keep::Bool=false) Read a single line of text from the given I/O stream or file (defaults to `stdin`). @@ -533,6 +539,8 @@ false (as it is by default), these trailing newline characters are removed from line before it is returned. When `keep` is true, they are returned as part of the line. +See also [`readuntil`](@ref) for reading until more general delimiters. + # Examples ```jldoctest julia> write("my_file.txt", "JuliaLang is a GitHub organization.\\nIt has many members.\\n"); @@ -819,15 +827,10 @@ end # read(io, T) is not defined for other AbstractChar: implementations # must provide their own encoding-specific method. -# readuntil_string is useful below since it has -# an optimized method for s::IOStream -readuntil_string(s::IO, delim::UInt8, keep::Bool) = String(readuntil(s, delim, keep=keep))::String - -function readuntil(s::IO, delim::AbstractChar; keep::Bool=false) +function readuntil(out::IO, s::IO, delim::AbstractChar; keep::Bool=false) if delim ≤ '\x7f' - return readuntil_string(s, delim % UInt8, keep) + return readuntil(out, s, delim % UInt8; keep) end - out = IOBuffer() for c in readeach(s, Char) if c == delim keep && write(out, c) @@ -835,94 +838,27 @@ function readuntil(s::IO, delim::AbstractChar; keep::Bool=false) end write(out, c) end - return String(take!(out)) + return out end -function readuntil(s::IO, delim::T; keep::Bool=false) where T - out = (T === UInt8 ? StringVector(0) : Vector{T}()) +# note: optimized methods of _readuntil for IOStreams and delim::UInt8 in iostream.jl +function _readuntil(out, s::IO, delim::T, keep::Bool) where T + output! = isa(out, IO) ? write : push! for c in readeach(s, T) if c == delim - keep && push!(out, c) + keep && output(out, c) break end - push!(out, c) + output(out, c) end return out end - -""" - readuntil!(stream::IO, buffer::AbstractVector{UInt8}, delim) - -Read bytes from `stream` and write them into `buffer` until the given -delimiter is read and written, or until the end of the stream is reached. -Returns the number of bytes written into `buffer` (including the delimiter). - -The delimiter can be a `UInt8`, `AbstractChar`, string, or vector of `UInt8`. -The input stream is assumed to be encoded in UTF-8. - -See also the similar function [`readuntil`](@ref), which returns a -newly allocated `String` rather than writing into a buffer. - -!!! compat "Julia 1.10" - The `readuntil!` function was added in Julia 1.10. -""" -function readuntil! end - -# read at most length(buffer) bytes; there is also an optimized method -# for IOStreams in iostream.jl -function _readuntil!(s::IO, buffer::AbstractVector{UInt8}, delim::UInt8) - buflen = length(buffer) - iszero(buflen) && return 0 - nwritten = 0 - for c in readeach(s, UInt8) - @inbounds buffer[begin+nwritten] = c - nwritten += 1 - if c == delim || nwritten == buflen - break - end - end - return nwritten -end - -function readuntil!(s::IO, buffer::AbstractVector{UInt8}, delim::UInt8) - n = 0 - @inbounds while true - n += _readuntil!(s, view(buffer, firstindex(buffer)+n:lastindex(buffer)), delim) - ((n > 0 && buffer[n] == delim) || eof(s)) && return n - resize!(buffer, 2*length(buffer)+128) - end -end - -function readuntil!(s::IO, buffer::AbstractVector{UInt8}, delim::AbstractVector{UInt8}) - out = IOBuffer(buffer, write=true) - readuntil_vector!(s, delim, true, out) - return out.size -end - -function readuntil!(s::IO, buffer::AbstractVector{UInt8}, delim::AbstractString) - # small-string delim optimizations - x = Iterators.peel(delim) - isnothing(x) && return 0 - c, rest = x - isempty(rest) && return readuntil!(s, buffer, c) - codeunit(delim) === UInt8 || throw(ArgumentError("readuntil! delimiter must have UInt8 codeunits")) - return readuntil!(s, buffer, codeunits(delim)) -end - -function readuntil!(s::IO, buffer::AbstractVector{UInt8}, delim::AbstractChar) - delim ≤ '\x7f' && return readuntil!(s, buffer, delim % UInt8) - out = IOBuffer(buffer, write=true) - for c in readeach(s, Char) - write(out, c) - c == delim && break - end - return out.size -end - -readuntil!(io::AbstractPipe, buffer::AbstractVector{UInt8}, delim::UInt8) = readuntil!(pipe_reader(io)::IO, buffer, delim) -readuntil!(io::AbstractPipe, buffer::AbstractVector{UInt8}, delim::AbstractChar) = readuntil!(pipe_reader(io)::IO, buffer, ardelimg) -readuntil!(io::AbstractPipe, buffer::AbstractVector{UInt8}, delim::AbstractString) = readuntil!(pipe_reader(io)::IO, buffer, delim) -readuntil!(io::AbstractPipe, buffer::AbstractVector{UInt8}, delim::AbstractVector{UInt8}) = readuntil!(pipe_reader(io)::IO, buffer, delim) +readuntil(s::IO, delim::T; keep::Bool=false) where T = + _readuntil(Vector{T}(), s, delim, keep) +readuntil(s::IO, delim::UInt8; keep::Bool=false) = + _readuntil(StringVector(0), s, delim, keep) +readuntil(out::IO, s::IO, delim::T; keep::Bool=false) where T = + _readuntil(out, s, delim, keep) # requires that indices for target are the integer unit range from firstindex to lastindex # returns whether the delimiter was matched @@ -1010,20 +946,20 @@ function readuntil_vector!(io::IO, target::AbstractVector{T}, keep::Bool, out) w return false end -function readuntil(io::IO, target::AbstractString; keep::Bool=false) +function readuntil(out::IO, io::IO, target::AbstractString; keep::Bool=false) # small-string target optimizations x = Iterators.peel(target) - isnothing(x) && return "" + isnothing(x) && return out c, rest = x if isempty(rest) && c <= '\x7f' - return readuntil_string(io, c % UInt8, keep) + return readuntil(out, io, c % UInt8; keep) end # convert String to a utf8-byte-iterator if !(target isa String) && !(target isa SubString{String}) target = String(target) end target = codeunits(target)::AbstractVector - return String(readuntil(io, target, keep=keep)) + return readuntil(out, io, target, keep=keep) end function readuntil(io::IO, target::AbstractVector{T}; keep::Bool=false) where T @@ -1031,6 +967,8 @@ function readuntil(io::IO, target::AbstractVector{T}; keep::Bool=false) where T readuntil_vector!(io, target, keep, out) return out end +readuntil(out::IO, io::IO, target::AbstractVector; keep::Bool=false) = + (readuntil_vector!(io, target, keep, out); out) """ readchomp(x) diff --git a/base/iobuffer.jl b/base/iobuffer.jl index 870c87f048f18..cd07569bc982e 100644 --- a/base/iobuffer.jl +++ b/base/iobuffer.jl @@ -516,26 +516,14 @@ function occursin(delim::UInt8, buf::GenericIOBuffer) return false end -function readuntil!(io::GenericIOBuffer, buffer::AbstractVector{UInt8}, delim::UInt8) +function readuntil(out::IO, io::GenericIOBuffer, buffer::AbstractVector{UInt8}, delim::UInt8) data = view(io.data, io.ptr:io.size) # note: findfirst + copyto! is much faster than a single loop # except for nout ≲ 20. A single loop is 2x faster for nout=5. nout = something(findfirst(==(delim), data), length(data)) - nout > length(buffer) && resize!(buffer, nout) - copyto!(buffer, firstindex(buffer), data, 1, nout) + write(out, view(io.data, io.ptr:io.ptr+nout-1)) io.ptr += nout - return nout -end - -function readuntil(io::GenericIOBuffer, delim::UInt8; keep::Bool=false) - lb = 70 - A = StringVector(lb) - nout = readuntil!(io, A, delim) - @inbounds nout -= !keep & (nout > 0 && A[nout] == delim) - if length(A) != nout - resize!(A, nout) - end - return A + return out end # copy-free crc32c of IOBuffer: diff --git a/base/iostream.jl b/base/iostream.jl index f47da4899da16..73c99dac9dda6 100644 --- a/base/iostream.jl +++ b/base/iostream.jl @@ -443,16 +443,43 @@ end function readuntil_string(s::IOStream, delim::UInt8, keep::Bool) @_lock_ios s ccall(:jl_readuntil, Ref{String}, (Ptr{Cvoid}, UInt8, UInt8, UInt8), s.ios, delim, 1, !keep) end +readuntil(s::IOStream, delim::AbstractChar; keep::Bool=false) = + delim ≤ '\x7f' ? readuntil_string(s, delim, keep) : + String(take!(readuntil(IOBuffer(StringVector(70),write=true), s, delim; keep))) function readline(s::IOStream; keep::Bool=false) @_lock_ios s ccall(:jl_readuntil, Ref{String}, (Ptr{Cvoid}, UInt8, UInt8, UInt8), s.ios, '\n', 1, keep ? 0 : 2) end -function _readuntil!(s::IOStream, - buffer::Union{Vector{UInt8},FastContiguousSubArray{UInt8,1,<:Vector{UInt8}}}, - delim::UInt8) - @_lock_ios s return Int(ccall(:jl_readuntil_buf, Csize_t, (Ptr{Cvoid}, UInt8, Ptr{UInt8}, Csize_t), - s.ios, delim, buffer, length(buffer) % Csize_t)) +function readuntil(out::IOBuffer, s::IOStream, delim::UInt8; keep::Bool=false) + d = out.data + ptr = (out.append ? out.size+1 : out.ptr) + len = length(d) + while true + GC.@preserve data @_lock_ios s n= + Int(ccall(:jl_readuntil_buf, Csize_t, (Ptr{Cvoid}, UInt8, Ptr{UInt8}, Csize_t), + s.ios, delim, pointer(d, ptr), (len - ptr + 1) % Csize_t)) + iszero(n) && break + ptr += n + if d[ptr-1] == delim + keep || (ptr -= 1) + break; + end + (eof(s) || len == out.maxsize) && break + len = min(2len + 128, out.maxsize) + resize!(d, len) + end + out.size = max(out.size, ptr - 1) + if !out.append + out.ptr = ptr + end + return out +end + +function readuntil(out::IOStream, s::IOStream, delim::UInt8; keep::Bool=false) + @_lock_ios out @_lock_ios s ccall(:ios_copyuntil, Csize_t, + (Ptr{Cvoid}, Ptr{Cvoid}, UInt8, Cint), out.ios, s.ios, delim, keep) + return out end function readbytes_all!(s::IOStream, diff --git a/doc/src/base/io-network.md b/doc/src/base/io-network.md index 67c99c5e82bdf..4e371039f1a9b 100644 --- a/doc/src/base/io-network.md +++ b/doc/src/base/io-network.md @@ -69,7 +69,6 @@ Base.dump Meta.@dump Base.readline Base.readuntil -Base.readuntil! Base.readlines Base.eachline Base.displaysize diff --git a/src/flisp/iostream.c b/src/flisp/iostream.c index b2b2477bb43c6..c1c6d965d2917 100644 --- a/src/flisp/iostream.c +++ b/src/flisp/iostream.c @@ -354,7 +354,7 @@ value_t fl_ioreaduntil(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) ios_setbuf(&dest, data, 80, 0); char delim = get_delim_arg(fl_ctx, args[1], "io.readuntil"); ios_t *src = toiostream(fl_ctx, args[0], "io.readuntil"); - size_t n = ios_copyuntil(&dest, src, delim); + size_t n = ios_copyuntil(&dest, src, delim, 1); cv->len = n; if (dest.buf != data) { // outgrew initial space @@ -376,7 +376,7 @@ value_t fl_iocopyuntil(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) ios_t *dest = toiostream(fl_ctx, args[0], "io.copyuntil"); ios_t *src = toiostream(fl_ctx, args[1], "io.copyuntil"); char delim = get_delim_arg(fl_ctx, args[2], "io.copyuntil"); - return size_wrap(fl_ctx, ios_copyuntil(dest, src, delim)); + return size_wrap(fl_ctx, ios_copyuntil(dest, src, delim, 1)); } value_t fl_iocopy(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) diff --git a/src/support/ios.c b/src/support/ios.c index b5a168f705603..c98c529991642 100644 --- a/src/support/ios.c +++ b/src/support/ios.c @@ -832,7 +832,7 @@ size_t ios_copyall(ios_t *to, ios_t *from) #define LINE_CHUNK_SIZE 160 -size_t ios_copyuntil(ios_t *to, ios_t *from, char delim) +size_t ios_copyuntil(ios_t *to, ios_t *from, char delim, int keep) { size_t total = 0, avail = (size_t)(from->size - from->bpos); while (!ios_eof(from)) { @@ -850,9 +850,9 @@ size_t ios_copyuntil(ios_t *to, ios_t *from, char delim) avail = 0; } else { - size_t ntowrite = pd - (from->buf+from->bpos) + 1; + size_t ntowrite = pd - (from->buf+from->bpos) + (keep != 0); written = ios_write(to, from->buf+from->bpos, ntowrite); - from->bpos += ntowrite; + from->bpos += ntowrite + (keep == 0); total += written; return total; } @@ -1217,7 +1217,7 @@ char *ios_readline(ios_t *s) { ios_t dest; ios_mem(&dest, 0); - ios_copyuntil(&dest, s, '\n'); + ios_copyuntil(&dest, s, '\n', 1); size_t n; return ios_take_buffer(&dest, &n); } diff --git a/src/support/ios.h b/src/support/ios.h index 2547555b5585d..6eab9e21c45b6 100644 --- a/src/support/ios.h +++ b/src/support/ios.h @@ -108,7 +108,7 @@ JL_DLLEXPORT int ios_get_writable(ios_t *s); JL_DLLEXPORT void ios_set_readonly(ios_t *s); JL_DLLEXPORT size_t ios_copy(ios_t *to, ios_t *from, size_t nbytes); JL_DLLEXPORT size_t ios_copyall(ios_t *to, ios_t *from); -JL_DLLEXPORT size_t ios_copyuntil(ios_t *to, ios_t *from, char delim) JL_NOTSAFEPOINT; +JL_DLLEXPORT size_t ios_copyuntil(ios_t *to, ios_t *from, char delim, int keep) JL_NOTSAFEPOINT; JL_DLLEXPORT size_t ios_nchomp(ios_t *from, size_t ntowrite); // ensure at least n bytes are buffered if possible. returns # available. JL_DLLEXPORT size_t ios_readprep(ios_t *from, size_t n); diff --git a/src/sys.c b/src/sys.c index e4de6b351b58c..d55c5df7ab066 100644 --- a/src/sys.c +++ b/src/sys.c @@ -288,7 +288,7 @@ JL_DLLEXPORT jl_value_t *jl_readuntil(ios_t *s, uint8_t delim, uint8_t str, uint ios_t dest; ios_mem(&dest, 0); ios_setbuf(&dest, (char*)a->data, 80, 0); - size_t n = ios_copyuntil(&dest, s, delim); + size_t n = ios_copyuntil(&dest, s, delim, 1); if (chomp && n > 0 && dest.buf[n - 1] == delim) { n--; if (chomp == 2 && n > 0 && dest.buf[n - 1] == '\r') { diff --git a/test/read.jl b/test/read.jl index 1a18bc852af23..22f263b22a404 100644 --- a/test/read.jl +++ b/test/read.jl @@ -179,11 +179,17 @@ for (name, f) in l @test readuntil(io(t), collect(s)::Vector{Char}) == Vector{Char}(m) @test readuntil(io(t), collect(s)::Vector{Char}, keep=true) == Vector{Char}(kept) - for blen in (0,1,100) - b = Vector{UInt8}(undef, blen) - n = readuntil!(io(t), b, s) - @test b[1:n] == codeunits(kept) + buf = IOBuffer() + @test String(take!(readuntil(buf, io(t), s))) == m + @test String(take!(readuntil(buf, io(t), s, keep=true))) == kept + file = tempname() + for (k,m) in ((false, m), (true, kept)) + open(file, "w") do f + @test f == readuntil(f, io(t), s, keep=k) + end + @test read(file, String) == m end + rm(file) end cleanup() From b4b3e5858001ff7b2f6e997a5e46fed629e28e53 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sun, 12 Feb 2023 16:10:11 -0500 Subject: [PATCH 15/24] bugfixes --- base/io.jl | 10 +++++----- base/iostream.jl | 7 ++++--- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/base/io.jl b/base/io.jl index c4d7943a01399..ad66f63b7ef55 100644 --- a/base/io.jl +++ b/base/io.jl @@ -523,7 +523,7 @@ julia> readuntil("my_file.txt", '.', keep = true) julia> rm("my_file.txt") ``` """ -readuntil(stream::IO, delim; kw...) = take!(readuntil(IOBuffer(), stream, delim; kw...)) +readuntil(stream::IO, delim; kw...) = take!(readuntil(IOBuffer(sizehint=70), stream, delim; kw...)) readuntil(stream::IO, delim::Union{AbstractChar, AbstractString}; kw...) = String(take!(readuntil(IOBuffer(StringVector(70),write=true), stream, delim; kw...))) readuntil(out::IO, filename::AbstractString, delim; kw...) = open(io->readuntil(out, io, delim; kw...), convert(String, filename)::String) readuntil(filename::AbstractString, delim; kw...) = open(io->readuntil(io, delim; kw...), convert(String, filename)::String) @@ -846,17 +846,17 @@ function _readuntil(out, s::IO, delim::T, keep::Bool) where T output! = isa(out, IO) ? write : push! for c in readeach(s, T) if c == delim - keep && output(out, c) + keep && output!(out, c) break end - output(out, c) + output!(out, c) end return out end readuntil(s::IO, delim::T; keep::Bool=false) where T = _readuntil(Vector{T}(), s, delim, keep) readuntil(s::IO, delim::UInt8; keep::Bool=false) = - _readuntil(StringVector(0), s, delim, keep) + _readuntil(resize!(StringVector(70), 0), s, delim, keep) readuntil(out::IO, s::IO, delim::T; keep::Bool=false) where T = _readuntil(out, s, delim, keep) @@ -963,7 +963,7 @@ function readuntil(out::IO, io::IO, target::AbstractString; keep::Bool=false) end function readuntil(io::IO, target::AbstractVector{T}; keep::Bool=false) where T - out = (T === UInt8 ? StringVector(0) : Vector{T}()) + out = (T === UInt8 ? resize!(StringVector(70), 0) : Vector{T}()) readuntil_vector!(io, target, keep, out) return out end diff --git a/base/iostream.jl b/base/iostream.jl index 73c99dac9dda6..b074b2d153d3e 100644 --- a/base/iostream.jl +++ b/base/iostream.jl @@ -444,7 +444,7 @@ function readuntil_string(s::IOStream, delim::UInt8, keep::Bool) @_lock_ios s ccall(:jl_readuntil, Ref{String}, (Ptr{Cvoid}, UInt8, UInt8, UInt8), s.ios, delim, 1, !keep) end readuntil(s::IOStream, delim::AbstractChar; keep::Bool=false) = - delim ≤ '\x7f' ? readuntil_string(s, delim, keep) : + delim ≤ '\x7f' ? readuntil_string(s, delim % UInt8, keep) : String(take!(readuntil(IOBuffer(StringVector(70),write=true), s, delim; keep))) function readline(s::IOStream; keep::Bool=false) @@ -454,9 +454,10 @@ end function readuntil(out::IOBuffer, s::IOStream, delim::UInt8; keep::Bool=false) d = out.data ptr = (out.append ? out.size+1 : out.ptr) + isempty(d) && resize!(d, min(70, out.maxsize)) len = length(d) while true - GC.@preserve data @_lock_ios s n= + GC.@preserve d @_lock_ios s n= Int(ccall(:jl_readuntil_buf, Csize_t, (Ptr{Cvoid}, UInt8, Ptr{UInt8}, Csize_t), s.ios, delim, pointer(d, ptr), (len - ptr + 1) % Csize_t)) iszero(n) && break @@ -466,7 +467,7 @@ function readuntil(out::IOBuffer, s::IOStream, delim::UInt8; keep::Bool=false) break; end (eof(s) || len == out.maxsize) && break - len = min(2len + 128, out.maxsize) + len = min(2len + 64, out.maxsize) resize!(d, len) end out.size = max(out.size, ptr - 1) From 20644c823a1a30c0dbb8190e56bc11fc6742c691 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Mon, 13 Feb 2023 20:48:49 -0500 Subject: [PATCH 16/24] rm stray semicolon --- base/iostream.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/iostream.jl b/base/iostream.jl index b074b2d153d3e..eeeedb600abfe 100644 --- a/base/iostream.jl +++ b/base/iostream.jl @@ -464,7 +464,7 @@ function readuntil(out::IOBuffer, s::IOStream, delim::UInt8; keep::Bool=false) ptr += n if d[ptr-1] == delim keep || (ptr -= 1) - break; + break end (eof(s) || len == out.maxsize) && break len = min(2len + 64, out.maxsize) From b17097131fa44716eada7231baac1925e0369616 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Mon, 13 Feb 2023 22:09:25 -0500 Subject: [PATCH 17/24] readline(out, ...) --- base/io.jl | 43 ++++++++++++++++++++++++++++--------------- base/iobuffer.jl | 44 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 69 insertions(+), 18 deletions(-) diff --git a/base/io.jl b/base/io.jl index ad66f63b7ef55..54165d1c6fba3 100644 --- a/base/io.jl +++ b/base/io.jl @@ -530,7 +530,7 @@ readuntil(filename::AbstractString, delim; kw...) = open(io->readuntil(io, delim """ readline([out::IO], io::IO=stdin; keep::Bool=false) - readline(filename::AbstractString; keep::Bool=false) + readline([out::IO], filename::AbstractString; keep::Bool=false) Read a single line of text from the given I/O stream or file (defaults to `stdin`). When reading from a file, the text is assumed to be encoded in UTF-8. Lines in the @@ -539,6 +539,10 @@ false (as it is by default), these trailing newline characters are removed from line before it is returned. When `keep` is true, they are returned as part of the line. +By default, returns a `String`. If the optional `out` argument is supplied, then +the line is instead written to the `out` stream, returning `out`. +(This can be used, for example, to read data into a pre-allocated [`IOBuffer`](@ref).) + See also [`readuntil`](@ref) for reading until more general delimiters. # Examples @@ -562,21 +566,30 @@ Logan "Logan" ``` """ -function readline(filename::AbstractString; keep::Bool=false) - open(filename) do f - readline(f, keep=keep) - end -end - -function readline(s::IO=stdin; keep::Bool=false) - line = readuntil(s, 0x0a, keep=true)::Vector{UInt8} - i = length(line) - if keep || i == 0 || line[i] != 0x0a - return String(line) - elseif i < 2 || line[i-1] != 0x0d - return String(resize!(line,i-1)) +readline(filename::AbstractString; keep::Bool=false) = + open(io -> readline(io; keep), filename) +readline(out::IO, filename::AbstractString; keep::Bool=false) = + open(io -> readline(out, io; keep), filename) +readline(s::IO=stdin; keep::Bool=false) = + String(take!(readline(IOBuffer(StringVector(70),write=true), s; keep))) + +# fallback to optimized methods for IOBuffer in iobuffer.jl +function readline(out::IO, s::IO; keep::Bool=false) + if keep + return readuntil(out, s, 0x0a, keep=true) else - return String(resize!(line,i-2)) + # more complicated to deal with CRLF logic + while !eof(s) + b = read(s, UInt8) + b == 0x0a && break + if b == 0x0d && !eof(s) + b = read(s, UInt8) + b == 0x0a && break + write(out, 0x0d) + end + write(out, b) + end + return out end end diff --git a/base/iobuffer.jl b/base/iobuffer.jl index cd07569bc982e..b07eae4d93ed1 100644 --- a/base/iobuffer.jl +++ b/base/iobuffer.jl @@ -516,15 +516,53 @@ function occursin(delim::UInt8, buf::GenericIOBuffer) return false end -function readuntil(out::IO, io::GenericIOBuffer, buffer::AbstractVector{UInt8}, delim::UInt8) +function readuntil(out::IO, io::GenericIOBuffer, delim::UInt8; keep::Bool=false) data = view(io.data, io.ptr:io.size) # note: findfirst + copyto! is much faster than a single loop # except for nout ≲ 20. A single loop is 2x faster for nout=5. - nout = something(findfirst(==(delim), data), length(data)) + nout = nread = something(findfirst(==(delim), data), length(data)) + if !keep && nout > 0 && data[nout] == delim + nout -= 1 + end + write(out, view(io.data, io.ptr:io.ptr+nout-1)) + io.ptr += nread + return out +end + +function readline(out::GenericIOBuffer, s::IO; keep::Bool=false) + readuntil(out, s, 0x0a, keep=true) + line = out.data + i = out.size + if keep || i == 0 || line[i] != 0x0a + return out + elseif i < 2 || line[i-1] != 0x0d + i -= 1 + else + i -= 2 + end + out.size = i + if !out.append + out.ptr = i+1 + end + return out +end + +function _readline(out::IO, io::GenericIOBuffer; keep::Bool=false) + data = view(io.data, io.ptr:io.size) + # note: findfirst + copyto! is much faster than a single loop + # except for nout ≲ 20. A single loop is 2x faster for nout=5. + nout = nread = something(findfirst(==(0x0a), data), length(data)) + if !keep && nout > 0 && data[nout] == 0x0a + nout -= 1 + nout > 0 && data[nout] == 0x0d && (nout -= 1) + end write(out, view(io.data, io.ptr:io.ptr+nout-1)) - io.ptr += nout + io.ptr += nread return out end +readline(out::IO, io::GenericIOBuffer; keep::Bool=false) = _readline(out, io; keep) +readline(out::GenericIOBuffer, io::GenericIOBuffer; keep::Bool=false) = _readline(out, io; keep) + # copy-free crc32c of IOBuffer: function _crc32c(io::IOBuffer, nb::Integer, crc::UInt32=0x00000000) From 06703c58a29a5face2f254ec5bf9c071fe69ec68 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Thu, 16 Feb 2023 22:08:28 -0500 Subject: [PATCH 18/24] readuntil -> copyuntil --- NEWS.md | 2 +- base/exports.jl | 2 + base/io.jl | 131 ++++++++++++++++++++++++++++++++++------------- base/iobuffer.jl | 12 ++--- base/iostream.jl | 6 +-- test/read.jl | 41 +++++++++++++-- 6 files changed, 143 insertions(+), 51 deletions(-) diff --git a/NEWS.md b/NEWS.md index 3c4e95ff158ac..52180a852b0e7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -21,7 +21,7 @@ Build system changes New library functions --------------------- -* `copyuntil(out, io, delim)` copies data into an `out` stream` until `delim` ([#48273]). +* `copyuntil(out, io, delim)` and `copyline(out, io)` copy data into an `out::IO` stream ([#48273]). New library features -------------------- diff --git a/base/exports.jl b/base/exports.jl index 10f43825e12df..0959fa1c391e2 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -857,6 +857,8 @@ export readline, readlines, readuntil, + copyuntil, + copyline, redirect_stdio, redirect_stderr, redirect_stdin, diff --git a/base/io.jl b/base/io.jl index 54165d1c6fba3..6097f3099c451 100644 --- a/base/io.jl +++ b/base/io.jl @@ -440,10 +440,10 @@ for f in ( end read(io::AbstractPipe, byte::Type{UInt8}) = read(pipe_reader(io)::IO, byte)::UInt8 unsafe_read(io::AbstractPipe, p::Ptr{UInt8}, nb::UInt) = unsafe_read(pipe_reader(io)::IO, p, nb) -readuntil(out::IO, io::AbstractPipe, arg::UInt8; kw...) = readuntil(out, pipe_reader(io)::IO, arg; kw...) -readuntil(out::IO, io::AbstractPipe, arg::AbstractChar; kw...) = readuntil(out, pipe_reader(io)::IO, arg; kw...) -readuntil(out::IO, io::AbstractPipe, arg::AbstractString; kw...) = readuntil(out, pipe_reader(io)::IO, arg; kw...) -readuntil(out::IO, io::AbstractPipe, arg::AbstractVector; kw...) = readuntil(out, pipe_reader(io)::IO, arg; kw...) +copyuntil(out::IO, io::AbstractPipe, arg::UInt8; kw...) = copyuntil(out, pipe_reader(io)::IO, arg; kw...) +copyuntil(out::IO, io::AbstractPipe, arg::AbstractChar; kw...) = copyuntil(out, pipe_reader(io)::IO, arg; kw...) +copyuntil(out::IO, io::AbstractPipe, arg::AbstractString; kw...) = copyuntil(out, pipe_reader(io)::IO, arg; kw...) +copyuntil(out::IO, io::AbstractPipe, arg::AbstractVector; kw...) = copyuntil(out, pipe_reader(io)::IO, arg; kw...) readuntil_vector!(io::AbstractPipe, target::AbstractVector, keep::Bool, out) = readuntil_vector!(pipe_reader(io)::IO, target, keep, out) readbytes!(io::AbstractPipe, target::AbstractVector{UInt8}, n=length(target)) = readbytes!(pipe_reader(io)::IO, target, n) peek(io::AbstractPipe, ::Type{T}) where {T} = peek(pipe_reader(io)::IO, T)::T @@ -496,19 +496,17 @@ function read! end read!(filename::AbstractString, a) = open(io->read!(io, a), convert(String, filename)::String) """ - readuntil([out::IO], stream::IO, delim; keep::Bool = false) - readuntil([out::IO], filename::AbstractString, delim; keep::Bool = false) + readuntil(stream::IO, delim; keep::Bool = false) + readuntil(filename::AbstractString, delim; keep::Bool = false) Read a string from an I/O `stream` or a file, up to the given delimiter. The delimiter can be a `UInt8`, `AbstractChar`, string, or vector. Keyword argument `keep` controls whether the delimiter is included in the result. The text is assumed to be encoded in UTF-8. -By default, returns a `String` if `delim` is an `AbstractChar` or a string -or otherwise returns a `Vector{typeof(delim)}`. -If the optional `out` argument is supplied, then the data is instead written to the `out` -stream, returning `out`. (This can be used, for example, to read data into -a pre-allocated [`IOBuffer`](@ref).) +Returns a `String` if `delim` is an `AbstractChar` or a string +or otherwise returns a `Vector{typeof(delim)}`. See also [`copyuntil`](@ref) +to instead write in-place to another stream (which can be a preallocated [`IOBuffer`](@ref)). # Examples ```jldoctest @@ -523,14 +521,43 @@ julia> readuntil("my_file.txt", '.', keep = true) julia> rm("my_file.txt") ``` """ -readuntil(stream::IO, delim; kw...) = take!(readuntil(IOBuffer(sizehint=70), stream, delim; kw...)) -readuntil(stream::IO, delim::Union{AbstractChar, AbstractString}; kw...) = String(take!(readuntil(IOBuffer(StringVector(70),write=true), stream, delim; kw...))) -readuntil(out::IO, filename::AbstractString, delim; kw...) = open(io->readuntil(out, io, delim; kw...), convert(String, filename)::String) +readuntil(stream::IO, delim; kw...) = take!(copyuntil(IOBuffer(sizehint=70), stream, delim; kw...)) +readuntil(stream::IO, delim::Union{AbstractChar, AbstractString}; kw...) = String(take!(copyuntil(IOBuffer(StringVector(70),write=true), stream, delim; kw...))) readuntil(filename::AbstractString, delim; kw...) = open(io->readuntil(io, delim; kw...), convert(String, filename)::String) + """ - readline([out::IO], io::IO=stdin; keep::Bool=false) - readline([out::IO], filename::AbstractString; keep::Bool=false) + copyuntil(out::IO, stream::IO, delim; keep::Bool = false) + copyuntil(out::IO, filename::AbstractString, delim; keep::Bool = false) + +Copy a string from an I/O `stream` or a file, up to the given delimiter, to +the `out` stream, returning `out`. +The delimiter can be a `UInt8`, `AbstractChar`, string, or vector. +Keyword argument `keep` controls whether the delimiter is included in the result. +The text is assumed to be encoded in UTF-8. + +Similar to [`readuntil`](@ref), which returns a `String`; in contrast, +`copyuntil` writes directly to `out`, without allocating a string. +(This can be used, for example, to read data into a pre-allocated [`IOBuffer`](@ref).) + +# Examples +```jldoctest +julia> write("my_file.txt", "JuliaLang is a GitHub organization.\\nIt has many members.\\n"); + +julia> String(take!(copyuntil(IOBuffer(), "my_file.txt", 'L'))) +"Julia" + +julia> String(take!(copyuntil(IOBuffer(), "my_file.txt", '.', keep = true))) +"JuliaLang is a GitHub organization." + +julia> rm("my_file.txt") +``` +""" +copyuntil(out::IO, filename::AbstractString, delim; kw...) = open(io->copyuntil(out, io, delim; kw...), convert(String, filename)::String) + +""" + readline(io::IO=stdin; keep::Bool=false) + readline(filename::AbstractString; keep::Bool=false) Read a single line of text from the given I/O stream or file (defaults to `stdin`). When reading from a file, the text is assumed to be encoded in UTF-8. Lines in the @@ -539,9 +566,8 @@ false (as it is by default), these trailing newline characters are removed from line before it is returned. When `keep` is true, they are returned as part of the line. -By default, returns a `String`. If the optional `out` argument is supplied, then -the line is instead written to the `out` stream, returning `out`. -(This can be used, for example, to read data into a pre-allocated [`IOBuffer`](@ref).) +Returns a `String`. See also [`copyline`](@ref) to instead write in-place +to another stream (which can be a preallocated [`IOBuffer`](@ref)). See also [`readuntil`](@ref) for reading until more general delimiters. @@ -568,15 +594,48 @@ Logan """ readline(filename::AbstractString; keep::Bool=false) = open(io -> readline(io; keep), filename) -readline(out::IO, filename::AbstractString; keep::Bool=false) = - open(io -> readline(out, io; keep), filename) readline(s::IO=stdin; keep::Bool=false) = - String(take!(readline(IOBuffer(StringVector(70),write=true), s; keep))) + String(take!(copyline(IOBuffer(StringVector(70),write=true), s; keep))) + +""" + copyline(out::IO, io::IO=stdin; keep::Bool=false) + copyline(out::IO, filename::AbstractString; keep::Bool=false) + +Copy a single line of text from an I/O `stream` or a file to the `out` stream, +returning `out`. + +When reading from a file, the text is assumed to be encoded in UTF-8. Lines in the +input end with `'\\n'` or `"\\r\\n"` or the end of an input stream. When `keep` is +false (as it is by default), these trailing newline characters are removed from the +line before it is returned. When `keep` is true, they are returned as part of the +line. + +Similar to [`readline`](@ref), which returns a `String`; in contrast, +`copyline` writes directly to `out`, without allocating a string. +(This can be used, for example, to read data into a pre-allocated [`IOBuffer`](@ref).) + +See also [`copyuntil`](@ref) for reading until more general delimiters. + +# Examples +```jldoctest +julia> write("my_file.txt", "JuliaLang is a GitHub organization.\\nIt has many members.\\n"); + +julia> String(take!(copyline(IOBuffer(), "my_file.txt"))) +"JuliaLang is a GitHub organization." + +julia> String(take!(copyline(IOBuffer(), "my_file.txt", keep=true))) +"JuliaLang is a GitHub organization.\\n" + +julia> rm("my_file.txt") +``` +""" +copyline(out::IO, filename::AbstractString; keep::Bool=false) = + open(io -> copyline(out, io; keep), filename) # fallback to optimized methods for IOBuffer in iobuffer.jl -function readline(out::IO, s::IO; keep::Bool=false) +function copyline(out::IO, s::IO; keep::Bool=false) if keep - return readuntil(out, s, 0x0a, keep=true) + return copyuntil(out, s, 0x0a, keep=true) else # more complicated to deal with CRLF logic while !eof(s) @@ -840,9 +899,9 @@ end # read(io, T) is not defined for other AbstractChar: implementations # must provide their own encoding-specific method. -function readuntil(out::IO, s::IO, delim::AbstractChar; keep::Bool=false) +function copyuntil(out::IO, s::IO, delim::AbstractChar; keep::Bool=false) if delim ≤ '\x7f' - return readuntil(out, s, delim % UInt8; keep) + return copyuntil(out, s, delim % UInt8; keep) end for c in readeach(s, Char) if c == delim @@ -854,8 +913,8 @@ function readuntil(out::IO, s::IO, delim::AbstractChar; keep::Bool=false) return out end -# note: optimized methods of _readuntil for IOStreams and delim::UInt8 in iostream.jl -function _readuntil(out, s::IO, delim::T, keep::Bool) where T +# note: optimized methods of copyuntil for IOStreams and delim::UInt8 in iostream.jl +function _copyuntil(out, s::IO, delim::T, keep::Bool) where T output! = isa(out, IO) ? write : push! for c in readeach(s, T) if c == delim @@ -867,11 +926,11 @@ function _readuntil(out, s::IO, delim::T, keep::Bool) where T return out end readuntil(s::IO, delim::T; keep::Bool=false) where T = - _readuntil(Vector{T}(), s, delim, keep) + _copyuntil(Vector{T}(), s, delim, keep) readuntil(s::IO, delim::UInt8; keep::Bool=false) = - _readuntil(resize!(StringVector(70), 0), s, delim, keep) -readuntil(out::IO, s::IO, delim::T; keep::Bool=false) where T = - _readuntil(out, s, delim, keep) + _copyuntil(resize!(StringVector(70), 0), s, delim, keep) +copyuntil(out::IO, s::IO, delim::T; keep::Bool=false) where T = + _copyuntil(out, s, delim, keep) # requires that indices for target are the integer unit range from firstindex to lastindex # returns whether the delimiter was matched @@ -959,20 +1018,20 @@ function readuntil_vector!(io::IO, target::AbstractVector{T}, keep::Bool, out) w return false end -function readuntil(out::IO, io::IO, target::AbstractString; keep::Bool=false) +function copyuntil(out::IO, io::IO, target::AbstractString; keep::Bool=false) # small-string target optimizations x = Iterators.peel(target) isnothing(x) && return out c, rest = x if isempty(rest) && c <= '\x7f' - return readuntil(out, io, c % UInt8; keep) + return copyuntil(out, io, c % UInt8; keep) end # convert String to a utf8-byte-iterator if !(target isa String) && !(target isa SubString{String}) target = String(target) end target = codeunits(target)::AbstractVector - return readuntil(out, io, target, keep=keep) + return copyuntil(out, io, target, keep=keep) end function readuntil(io::IO, target::AbstractVector{T}; keep::Bool=false) where T @@ -980,7 +1039,7 @@ function readuntil(io::IO, target::AbstractVector{T}; keep::Bool=false) where T readuntil_vector!(io, target, keep, out) return out end -readuntil(out::IO, io::IO, target::AbstractVector; keep::Bool=false) = +copyuntil(out::IO, io::IO, target::AbstractVector; keep::Bool=false) = (readuntil_vector!(io, target, keep, out); out) """ diff --git a/base/iobuffer.jl b/base/iobuffer.jl index b07eae4d93ed1..deb86e774f4e4 100644 --- a/base/iobuffer.jl +++ b/base/iobuffer.jl @@ -516,7 +516,7 @@ function occursin(delim::UInt8, buf::GenericIOBuffer) return false end -function readuntil(out::IO, io::GenericIOBuffer, delim::UInt8; keep::Bool=false) +function copyuntil(out::IO, io::GenericIOBuffer, delim::UInt8; keep::Bool=false) data = view(io.data, io.ptr:io.size) # note: findfirst + copyto! is much faster than a single loop # except for nout ≲ 20. A single loop is 2x faster for nout=5. @@ -529,8 +529,8 @@ function readuntil(out::IO, io::GenericIOBuffer, delim::UInt8; keep::Bool=false) return out end -function readline(out::GenericIOBuffer, s::IO; keep::Bool=false) - readuntil(out, s, 0x0a, keep=true) +function copyline(out::GenericIOBuffer, s::IO; keep::Bool=false) + copyuntil(out, s, 0x0a, keep=true) line = out.data i = out.size if keep || i == 0 || line[i] != 0x0a @@ -547,7 +547,7 @@ function readline(out::GenericIOBuffer, s::IO; keep::Bool=false) return out end -function _readline(out::IO, io::GenericIOBuffer; keep::Bool=false) +function _copyline(out::IO, io::GenericIOBuffer; keep::Bool=false) data = view(io.data, io.ptr:io.size) # note: findfirst + copyto! is much faster than a single loop # except for nout ≲ 20. A single loop is 2x faster for nout=5. @@ -560,8 +560,8 @@ function _readline(out::IO, io::GenericIOBuffer; keep::Bool=false) io.ptr += nread return out end -readline(out::IO, io::GenericIOBuffer; keep::Bool=false) = _readline(out, io; keep) -readline(out::GenericIOBuffer, io::GenericIOBuffer; keep::Bool=false) = _readline(out, io; keep) +copyline(out::IO, io::GenericIOBuffer; keep::Bool=false) = _copyline(out, io; keep) +copyline(out::GenericIOBuffer, io::GenericIOBuffer; keep::Bool=false) = _copyline(out, io; keep) # copy-free crc32c of IOBuffer: diff --git a/base/iostream.jl b/base/iostream.jl index eeeedb600abfe..7a075e94f1ce1 100644 --- a/base/iostream.jl +++ b/base/iostream.jl @@ -445,13 +445,13 @@ function readuntil_string(s::IOStream, delim::UInt8, keep::Bool) end readuntil(s::IOStream, delim::AbstractChar; keep::Bool=false) = delim ≤ '\x7f' ? readuntil_string(s, delim % UInt8, keep) : - String(take!(readuntil(IOBuffer(StringVector(70),write=true), s, delim; keep))) + String(take!(copyuntil(IOBuffer(StringVector(70),write=true), s, delim; keep))) function readline(s::IOStream; keep::Bool=false) @_lock_ios s ccall(:jl_readuntil, Ref{String}, (Ptr{Cvoid}, UInt8, UInt8, UInt8), s.ios, '\n', 1, keep ? 0 : 2) end -function readuntil(out::IOBuffer, s::IOStream, delim::UInt8; keep::Bool=false) +function copyuntil(out::IOBuffer, s::IOStream, delim::UInt8; keep::Bool=false) d = out.data ptr = (out.append ? out.size+1 : out.ptr) isempty(d) && resize!(d, min(70, out.maxsize)) @@ -477,7 +477,7 @@ function readuntil(out::IOBuffer, s::IOStream, delim::UInt8; keep::Bool=false) return out end -function readuntil(out::IOStream, s::IOStream, delim::UInt8; keep::Bool=false) +function copyuntil(out::IOStream, s::IOStream, delim::UInt8; keep::Bool=false) @_lock_ios out @_lock_ios s ccall(:ios_copyuntil, Csize_t, (Ptr{Cvoid}, Ptr{Cvoid}, UInt8, Cint), out.ios, s.ios, delim, keep) return out diff --git a/test/read.jl b/test/read.jl index 22f263b22a404..dfcae6795792f 100644 --- a/test/read.jl +++ b/test/read.jl @@ -180,12 +180,12 @@ for (name, f) in l @test readuntil(io(t), collect(s)::Vector{Char}, keep=true) == Vector{Char}(kept) buf = IOBuffer() - @test String(take!(readuntil(buf, io(t), s))) == m - @test String(take!(readuntil(buf, io(t), s, keep=true))) == kept + @test String(take!(copyuntil(buf, io(t), s))) == m + @test String(take!(copyuntil(buf, io(t), s, keep=true))) == kept file = tempname() for (k,m) in ((false, m), (true, kept)) open(file, "w") do f - @test f == readuntil(f, io(t), s, keep=k) + @test f == copyuntil(f, io(t), s, keep=k) end @test read(file, String) == m end @@ -297,8 +297,39 @@ for (name, f) in l cleanup() verbose && println("$name readline...") - @test readline(io(), keep=true) == readline(IOBuffer(text), keep=true) - @test readline(io(), keep=true) == readline(filename, keep=true) + file = tempname() + for lineending in ("\n", "\r\n", "") + kept = "foo bar" * lineending + t = isempty(lineending) ? "foo bar" : kept * "baz\n" + write(file, t) + @test readline(io(t)) == readline(file) == "foo bar" + @test readline(io(t), keep=true) == readline(file, keep=true) == kept + + @test String(take!(copyline(IOBuffer(), file))) == "foo bar" + @test String(take!(copyline(IOBuffer(), file, keep=true))) == kept + + buf = IOBuffer() + @test buf === copyline(buf, io(t)) + @test String(take!(buf)) == "foo bar" + @test String(take!(copyline(buf, file, keep=true))) == kept + for keep in (true, false) + open(file, "w") do f + @test f === copyline(f, io(t), keep=keep) + end + @test read(file, String) == (keep ? kept : "foo bar") + end + + write(file, lineending) + @test readline(IOBuffer(lineending)) == "" + @test readline(IOBuffer(lineending), keep=true) == lineending + @test String(take!(copyline(IOBuffer(), IOBuffer(lineending)))) == "" + @test String(take!(copyline(IOBuffer(), IOBuffer(lineending), keep=true))) == lineending + @test readline(file) == "" + @test readline(file, keep=true) == lineending + @test String(take!(copyline(IOBuffer(), file))) == "" + @test String(take!(copyline(IOBuffer(), file, keep=true))) == lineending + end + rm(file) verbose && println("$name readlines...") @test readlines(io(), keep=true) == readlines(IOBuffer(text), keep=true) From 6580502e10b16a55b30c797b30d81de51913adb8 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Thu, 16 Feb 2023 22:38:58 -0500 Subject: [PATCH 19/24] add manual entries for copyuntil and copyline --- doc/src/base/io-network.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/src/base/io-network.md b/doc/src/base/io-network.md index 4e371039f1a9b..68f144427a892 100644 --- a/doc/src/base/io-network.md +++ b/doc/src/base/io-network.md @@ -71,6 +71,8 @@ Base.readline Base.readuntil Base.readlines Base.eachline +Base.copyline +Base.copyuntil Base.displaysize ``` From 6eb891704774ebc3da7259a4f27ec1c8beeb9a19 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Fri, 17 Feb 2023 08:01:25 -0500 Subject: [PATCH 20/24] try calling cleanup() more often in test for Windows --- test/read.jl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/read.jl b/test/read.jl index dfcae6795792f..ff0952b1495da 100644 --- a/test/read.jl +++ b/test/read.jl @@ -308,6 +308,8 @@ for (name, f) in l @test String(take!(copyline(IOBuffer(), file))) == "foo bar" @test String(take!(copyline(IOBuffer(), file, keep=true))) == kept + cleanup() + buf = IOBuffer() @test buf === copyline(buf, io(t)) @test String(take!(buf)) == "foo bar" @@ -319,6 +321,8 @@ for (name, f) in l @test read(file, String) == (keep ? kept : "foo bar") end + cleanup() + write(file, lineending) @test readline(IOBuffer(lineending)) == "" @test readline(IOBuffer(lineending), keep=true) == lineending @@ -328,6 +332,8 @@ for (name, f) in l @test readline(file, keep=true) == lineending @test String(take!(copyline(IOBuffer(), file))) == "" @test String(take!(copyline(IOBuffer(), file, keep=true))) == lineending + + cleanup() end rm(file) From 243a15bfb6889b4593a4adf7ae35c96c3b9064c0 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Fri, 17 Feb 2023 20:31:07 -0500 Subject: [PATCH 21/24] use _unsafe_take and a few other tweaks --- base/io.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/base/io.jl b/base/io.jl index 6097f3099c451..77190c421d49b 100644 --- a/base/io.jl +++ b/base/io.jl @@ -521,9 +521,9 @@ julia> readuntil("my_file.txt", '.', keep = true) julia> rm("my_file.txt") ``` """ -readuntil(stream::IO, delim; kw...) = take!(copyuntil(IOBuffer(sizehint=70), stream, delim; kw...)) -readuntil(stream::IO, delim::Union{AbstractChar, AbstractString}; kw...) = String(take!(copyuntil(IOBuffer(StringVector(70),write=true), stream, delim; kw...))) readuntil(filename::AbstractString, delim; kw...) = open(io->readuntil(io, delim; kw...), convert(String, filename)::String) +readuntil(stream::IO, delim::UInt8; kw...) = _unsafe_take!(copyuntil(IOBuffer(sizehint=70), stream, delim; kw...)) +readuntil(stream::IO, delim::Union{AbstractChar, AbstractString}; kw...) = String(_unsafe_take!(copyuntil(IOBuffer(sizehint=70), stream, delim; kw...))) """ @@ -595,7 +595,7 @@ Logan readline(filename::AbstractString; keep::Bool=false) = open(io -> readline(io; keep), filename) readline(s::IO=stdin; keep::Bool=false) = - String(take!(copyline(IOBuffer(StringVector(70),write=true), s; keep))) + String(_unsafe_take!(copyline(IOBuffer(sizehint=70), s; keep))) """ copyline(out::IO, io::IO=stdin; keep::Bool=false) @@ -1215,7 +1215,7 @@ function iterate(r::Iterators.Reverse{<:EachLine}, state) buf.size = _stripnewline(r.itr.keep, buf.size, buf.data) empty!(chunks) # will cause next iteration to terminate seekend(r.itr.stream) # reposition to end of stream for isdone - s = String(take!(buf)) + s = String(_unsafe_take!(buf)) else # extract the string from chunks[ichunk][inewline+1] to chunks[jchunk][jnewline] if ichunk == jchunk # common case: current and previous newline in same chunk @@ -1232,7 +1232,7 @@ function iterate(r::Iterators.Reverse{<:EachLine}, state) end write(buf, view(chunks[jchunk], 1:jnewline)) buf.size = _stripnewline(r.itr.keep, buf.size, buf.data) - s = String(take!(buf)) + s = String(_unsafe_take!(buf)) # overwrite obsolete chunks (ichunk+1:jchunk) i = jchunk From 1f4ef8aefda893f6835affa4a439185fe83f9b4a Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 18 Feb 2023 13:55:30 -0500 Subject: [PATCH 22/24] bugfix: missing ensureroom --- base/iostream.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/base/iostream.jl b/base/iostream.jl index 7a075e94f1ce1..f5a8c0a8dffc8 100644 --- a/base/iostream.jl +++ b/base/iostream.jl @@ -445,16 +445,16 @@ function readuntil_string(s::IOStream, delim::UInt8, keep::Bool) end readuntil(s::IOStream, delim::AbstractChar; keep::Bool=false) = delim ≤ '\x7f' ? readuntil_string(s, delim % UInt8, keep) : - String(take!(copyuntil(IOBuffer(StringVector(70),write=true), s, delim; keep))) + String(unsafe_take!(copyuntil(IOBuffer(sizehint=70), s, delim; keep))) function readline(s::IOStream; keep::Bool=false) @_lock_ios s ccall(:jl_readuntil, Ref{String}, (Ptr{Cvoid}, UInt8, UInt8, UInt8), s.ios, '\n', 1, keep ? 0 : 2) end function copyuntil(out::IOBuffer, s::IOStream, delim::UInt8; keep::Bool=false) - d = out.data + ensureroom(out, 16) ptr = (out.append ? out.size+1 : out.ptr) - isempty(d) && resize!(d, min(70, out.maxsize)) + d = out.data len = length(d) while true GC.@preserve d @_lock_ios s n= From 7061e7f40bc2a1b03407bbcce20d901f8407cbfa Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Tue, 27 Jun 2023 08:29:29 -0400 Subject: [PATCH 23/24] Update base/io.jl Co-authored-by: Rafael Fourquet --- base/io.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base/io.jl b/base/io.jl index 77190c421d49b..565afc3a84644 100644 --- a/base/io.jl +++ b/base/io.jl @@ -504,8 +504,8 @@ The delimiter can be a `UInt8`, `AbstractChar`, string, or vector. Keyword argument `keep` controls whether the delimiter is included in the result. The text is assumed to be encoded in UTF-8. -Returns a `String` if `delim` is an `AbstractChar` or a string -or otherwise returns a `Vector{typeof(delim)}`. See also [`copyuntil`](@ref) +Return a `String` if `delim` is an `AbstractChar` or a string +or otherwise return a `Vector{typeof(delim)}`. See also [`copyuntil`](@ref) to instead write in-place to another stream (which can be a preallocated [`IOBuffer`](@ref)). # Examples From 13bbd4850ab2c01263463e14ca58d16fdb437234 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Tue, 27 Jun 2023 08:29:41 -0400 Subject: [PATCH 24/24] Update base/io.jl Co-authored-by: Rafael Fourquet --- base/io.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/io.jl b/base/io.jl index 565afc3a84644..c62d6393d12ec 100644 --- a/base/io.jl +++ b/base/io.jl @@ -566,7 +566,7 @@ false (as it is by default), these trailing newline characters are removed from line before it is returned. When `keep` is true, they are returned as part of the line. -Returns a `String`. See also [`copyline`](@ref) to instead write in-place +Return a `String`. See also [`copyline`](@ref) to instead write in-place to another stream (which can be a preallocated [`IOBuffer`](@ref)). See also [`readuntil`](@ref) for reading until more general delimiters.