diff --git a/spec/std/adler32_spec.cr b/spec/std/adler32_spec.cr new file mode 100644 index 000000000000..450ee398d3d8 --- /dev/null +++ b/spec/std/adler32_spec.cr @@ -0,0 +1,16 @@ +require "spec" +require "adler32" + +describe Adler32 do + it "should be able to calculate adler32" do + adler = Adler32.checksum("foo").to_s(16) + adler.should eq("2820145") + end + + it "should be able to calculate adler32 combined" do + adler1 = Adler32.checksum("hello") + adler2 = Adler32.checksum(" world!") + combined = Adler32.combine(adler1, adler2, " world!".size) + Adler32.checksum("hello world!").should eq(combined) + end +end diff --git a/spec/std/crc32_spec.cr b/spec/std/crc32_spec.cr new file mode 100644 index 000000000000..cfc5ef1b0853 --- /dev/null +++ b/spec/std/crc32_spec.cr @@ -0,0 +1,16 @@ +require "spec" +require "crc32" + +describe CRC32 do + it "should be able to calculate crc32" do + crc = CRC32.checksum("foo").to_s(16) + crc.should eq("8c736521") + end + + it "should be able to calculate crc32 combined" do + crc1 = CRC32.checksum("hello") + crc2 = CRC32.checksum(" world!") + combined = CRC32.combine(crc1, crc2, " world!".size) + CRC32.checksum("hello world!").should eq(combined) + end +end diff --git a/spec/std/flate/flate_spec.cr b/spec/std/flate/flate_spec.cr new file mode 100644 index 000000000000..43984e8a0232 --- /dev/null +++ b/spec/std/flate/flate_spec.cr @@ -0,0 +1,47 @@ +require "spec" +require "flate" + +module Flate + describe Writer do + it "should be able to write" do + message = "this is a test string !!!!\n" + io = IO::Memory.new + writer = Writer.new(io) + writer.print message + writer.close + + io.rewind + reader = Reader.new(io) + reader.gets_to_end.should eq(message) + end + + it "can be closed without sync" do + io = IO::Memory.new + writer = Writer.new(io) + writer.close + writer.closed?.should be_true + io.closed?.should be_false + + expect_raises IO::Error, "closed stream" do + writer.print "a" + end + end + + it "can be closed with sync (1)" do + io = IO::Memory.new + writer = Writer.new(io, sync_close: true) + writer.close + writer.closed?.should be_true + io.closed?.should be_true + end + + it "can be closed with sync (2)" do + io = IO::Memory.new + writer = Writer.new(io) + writer.sync_close = true + writer.close + writer.closed?.should be_true + io.closed?.should be_true + end + end +end diff --git a/spec/std/gzip/gzip_spec.cr b/spec/std/gzip/gzip_spec.cr new file mode 100644 index 000000000000..750fd91a6c0f --- /dev/null +++ b/spec/std/gzip/gzip_spec.cr @@ -0,0 +1,46 @@ +require "spec" +require "gzip" + +describe Gzip do + it "writes and reads to memory" do + io = IO::Memory.new + + time = Time.new(2016, 1, 2) + os = 4_u8 + extra = Bytes[1, 2, 3] + name = "foo.txt" + comment = "some comment" + contents = "hello world" + + Gzip::Writer.open(io) do |gzip| + header = gzip.header + header.modification_time = time + header.os = os + header.extra = extra + header.name = name + header.comment = comment + + io.bytesize.should eq(0) + gzip.flush + io.bytesize.should_not eq(0) + + gzip.print contents + end + + io.rewind + + Gzip::Reader.open(io) do |gzip| + header = gzip.header.not_nil! + header.modification_time.should eq(time) + header.os.should eq(os) + header.extra.should eq(extra) + header.name.should eq(name) + header.comment.should eq(comment) + + # Reading zero bytes is OK + gzip.read(Bytes.empty).should eq(0) + + gzip.gets_to_end.should eq(contents) + end + end +end diff --git a/spec/std/http/server/handlers/deflate_handler_spec.cr b/spec/std/http/server/handlers/compress_handler_spec.cr similarity index 89% rename from spec/std/http/server/handlers/deflate_handler_spec.cr rename to spec/std/http/server/handlers/compress_handler_spec.cr index 60e2765d045d..a676708c7181 100644 --- a/spec/std/http/server/handlers/deflate_handler_spec.cr +++ b/spec/std/http/server/handlers/compress_handler_spec.cr @@ -1,14 +1,14 @@ require "spec" require "http/server" -describe HTTP::DeflateHandler do +describe HTTP::CompressHandler do it "doesn't deflates if doesn't have 'deflate' in Accept-Encoding header" do io = IO::Memory.new request = HTTP::Request.new("GET", "/") response = HTTP::Server::Response.new(io) context = HTTP::Server::Context.new(request, response) - handler = HTTP::DeflateHandler.new + handler = HTTP::CompressHandler.new handler.next = HTTP::Handler::Proc.new do |ctx| ctx.response.print "Hello" end @@ -27,7 +27,7 @@ describe HTTP::DeflateHandler do response = HTTP::Server::Response.new(io) context = HTTP::Server::Context.new(request, response) - handler = HTTP::DeflateHandler.new + handler = HTTP::CompressHandler.new handler.next = HTTP::Handler::Proc.new do |ctx| ctx.response.print "Hello" end @@ -39,7 +39,7 @@ describe HTTP::DeflateHandler do body = response2.body io2 = IO::Memory.new - deflate = Zlib::Deflate.new(io2) + deflate = Flate::Writer.new(io2) deflate.print "Hello" deflate.close io2.rewind @@ -55,7 +55,7 @@ describe HTTP::DeflateHandler do response = HTTP::Server::Response.new(io) context = HTTP::Server::Context.new(request, response) - handler = HTTP::DeflateHandler.new + handler = HTTP::CompressHandler.new handler.next = HTTP::Handler::Proc.new do |ctx| ctx.response.print "Hello" end @@ -67,7 +67,7 @@ describe HTTP::DeflateHandler do body = response2.body io2 = IO::Memory.new - deflate = Zlib::Deflate.gzip(io2) + deflate = Gzip::Writer.new(io2) deflate.print "Hello" deflate.close io2.rewind diff --git a/spec/std/http/server/server_spec.cr b/spec/std/http/server/server_spec.cr index 682467c1fb80..236ce34ad21c 100644 --- a/spec/std/http/server/server_spec.cr +++ b/spec/std/http/server/server_spec.cr @@ -310,7 +310,7 @@ module HTTP server = Server.new("0.0.0.0", 0, [ ErrorHandler.new, LogHandler.new, - DeflateHandler.new, + CompressHandler.new, StaticFileHandler.new("."), ] ) @@ -329,7 +329,7 @@ module HTTP server = Server.new(0, [ ErrorHandler.new, LogHandler.new, - DeflateHandler.new, + CompressHandler.new, StaticFileHandler.new("."), ] ) diff --git a/spec/std/zip/zip_spec.cr b/spec/std/zip/zip_spec.cr index e9af41929b20..051c5cb12743 100644 --- a/spec/std/zip/zip_spec.cr +++ b/spec/std/zip/zip_spec.cr @@ -59,19 +59,19 @@ describe Zip do io = IO::Memory.new text = "contents of foo" - crc32 = Zlib.crc32(text) + crc32 = CRC32.checksum(text) Zip::Writer.open(io) do |zip| entry = Zip::Writer::Entry.new("foo.txt") entry.compression_method = Zip::CompressionMethod::STORED - entry.crc32 = crc32.to_u32 + entry.crc32 = crc32 entry.compressed_size = text.bytesize.to_u32 entry.uncompressed_size = text.bytesize.to_u32 zip.add entry, &.print(text) entry = Zip::Writer::Entry.new("bar.txt") entry.compression_method = Zip::CompressionMethod::STORED - entry.crc32 = crc32.to_u32 + entry.crc32 = crc32 entry.compressed_size = text.bytesize.to_u32 entry.uncompressed_size = text.bytesize.to_u32 zip.add entry, &.print(text) diff --git a/spec/std/zlib/deflate_spec.cr b/spec/std/zlib/deflate_spec.cr deleted file mode 100644 index 809cce556839..000000000000 --- a/spec/std/zlib/deflate_spec.cr +++ /dev/null @@ -1,65 +0,0 @@ -require "spec" -require "zlib" - -module Zlib - describe Deflate do - it "should be able to deflate" do - message = "this is a test string !!!!\n" - io = IO::Memory.new - deflate = Deflate.new(io) - deflate.print message - deflate.close - - io.rewind - inflate = Inflate.new(io) - inflate.gets_to_end.should eq(message) - end - - it "can be closed without sync" do - io = IO::Memory.new - deflate = Deflate.new(io) - deflate.close - deflate.closed?.should be_true - io.closed?.should be_false - - expect_raises IO::Error, "closed stream" do - deflate.print "a" - end - end - - it "can be closed with sync (1)" do - io = IO::Memory.new - deflate = Deflate.new(io, sync_close: true) - deflate.close - deflate.closed?.should be_true - io.closed?.should be_true - end - - it "can be closed with sync (2)" do - io = IO::Memory.new - deflate = Deflate.new(io) - deflate.sync_close = true - deflate.close - deflate.closed?.should be_true - io.closed?.should be_true - end - - it "can be flushed" do - io = IO::Memory.new - deflate = Deflate.new(io) - - deflate.print "this" - io.to_slice.hexstring.should eq("789c") - - deflate.flush - (io.to_slice.hexstring.size > 4).should be_true - - deflate.print " is a test string !!!!\n" - deflate.close - - io.rewind - inflate = Inflate.new(io) - inflate.gets_to_end.should eq("this is a test string !!!!\n") - end - end -end diff --git a/spec/std/zlib/inflate_spec.cr b/spec/std/zlib/inflate_spec.cr deleted file mode 100644 index b5e7d9153e6a..000000000000 --- a/spec/std/zlib/inflate_spec.cr +++ /dev/null @@ -1,69 +0,0 @@ -require "spec" -require "zlib" - -module Zlib - describe Inflate do - it "should be able to inflate" do - io = IO::Memory.new - "789c2bc9c82c5600a2448592d4e21285e292a2ccbc74054520e00200854f087b".scan(/../).each do |match| - io.write_byte match[0].to_u8(16) - end - io.rewind - - inflate = Inflate.new(io) - - str = String::Builder.build do |builder| - IO.copy(inflate, builder) - end - - str.should eq("this is a test string !!!!\n") - inflate.read(Bytes.new(10)).should eq(0) - end - - it "can be closed without sync" do - io = IO::Memory.new("") - inflate = Inflate.new(io) - inflate.close - inflate.closed?.should be_true - io.closed?.should be_false - - expect_raises IO::Error, "closed stream" do - inflate.gets - end - end - - it "can be closed with sync (1)" do - io = IO::Memory.new("") - inflate = Inflate.new(io, sync_close: true) - inflate.close - inflate.closed?.should be_true - io.closed?.should be_true - end - - it "can be closed with sync (2)" do - io = IO::Memory.new("") - inflate = Inflate.new(io) - inflate.sync_close = true - inflate.close - inflate.closed?.should be_true - io.closed?.should be_true - end - - it "should not inflate from empty stream" do - io = IO::Memory.new("") - inflate = Inflate.new(io) - inflate.read_byte.should be_nil - end - - it "should not freeze when reading empty slice" do - io = IO::Memory.new - "789c2bc9c82c5600a2448592d4e21285e292a2ccbc74054520e00200854f087b".scan(/../).each do |match| - io.write_byte match[0].to_u8(16) - end - io.rewind - inflate = Inflate.new(io) - slice = Bytes.new(0) - inflate.read(slice).should eq(0) - end - end -end diff --git a/spec/std/zlib/reader_spec.cr b/spec/std/zlib/reader_spec.cr new file mode 100644 index 000000000000..e5633616c830 --- /dev/null +++ b/spec/std/zlib/reader_spec.cr @@ -0,0 +1,69 @@ +require "spec" +require "zlib" + +module Zlib + describe Reader do + it "should be able to read" do + io = IO::Memory.new + "789c2bc9c82c5600a2448592d4e21285e292a2ccbc74054520e00200854f087b".scan(/../).each do |match| + io.write_byte match[0].to_u8(16) + end + io.rewind + + reader = Reader.new(io) + + str = String::Builder.build do |builder| + IO.copy(reader, builder) + end + + str.should eq("this is a test string !!!!\n") + reader.read(Bytes.new(10)).should eq(0) + end + + it "can be closed without sync" do + io = IO::Memory.new(Bytes[120, 156, 3, 0, 0, 0, 0, 1]) + reader = Reader.new(io) + reader.close + reader.closed?.should be_true + io.closed?.should be_false + + expect_raises IO::Error, "closed stream" do + reader.gets + end + end + + it "can be closed with sync (1)" do + io = IO::Memory.new(Bytes[120, 156, 3, 0, 0, 0, 0, 1]) + reader = Reader.new(io, sync_close: true) + reader.close + reader.closed?.should be_true + io.closed?.should be_true + end + + it "can be closed with sync (2)" do + io = IO::Memory.new(Bytes[120, 156, 3, 0, 0, 0, 0, 1]) + reader = Reader.new(io) + reader.sync_close = true + reader.close + reader.closed?.should be_true + io.closed?.should be_true + end + + it "should not read from empty stream" do + io = IO::Memory.new(Bytes[120, 156, 3, 0, 0, 0, 0, 1]) + reader = Reader.new(io) + reader.read_byte.should be_nil + end + + it "should not freeze when reading empty slice" do + io = IO::Memory.new + "789c2bc9c82c5600a2448592d4e21285e292a2ccbc74054520e00200854f087b".scan(/../).each do |match| + io.write_byte match[0].to_u8(16) + end + io.rewind + reader = Reader.new(io) + slice = Bytes.empty + reader.read(slice).should eq(0) + end + end +end diff --git a/spec/std/zlib/stress_spec.cr b/spec/std/zlib/stress_spec.cr index 9401d8191138..dd0005c8b55e 100644 --- a/spec/std/zlib/stress_spec.cr +++ b/spec/std/zlib/stress_spec.cr @@ -3,34 +3,34 @@ require "zlib" module Zlib describe Zlib do - it "inflate deflate should be inverse with random string" do + it "write read should be inverse with random string" do expected = String.build do |io| 1_000_000.times { rand(2000).to_i.to_s(32, io) } end io = IO::Memory.new - deflate = Deflate.new(io) - deflate.print expected - deflate.close + writer = Writer.new(io) + writer.print expected + writer.close io.rewind - inflate = Inflate.new(io) - inflate.gets_to_end.should eq(expected) + reader = Reader.new(io) + reader.gets_to_end.should eq(expected) end - it "inflate deflate should be inverse (utf-8)" do + it "write read should be inverse (utf-8)" do expected = "日本さん語日本さん語" io = IO::Memory.new - deflate = Deflate.new(io) - deflate.print expected - deflate.close + writer = Writer.new(io) + writer.print expected + writer.close io.rewind - inflate = Inflate.new(io) - inflate.gets_to_end.should eq(expected) + reader = Reader.new(io) + reader.gets_to_end.should eq(expected) end end end diff --git a/spec/std/zlib/writer_spec.cr b/spec/std/zlib/writer_spec.cr new file mode 100644 index 000000000000..ec3be134d662 --- /dev/null +++ b/spec/std/zlib/writer_spec.cr @@ -0,0 +1,71 @@ +require "spec" +require "zlib" + +module Zlib + describe Writer do + it "should be able to write" do + message = "this is a test string !!!!\n" + io = IO::Memory.new + + writer = Writer.new(io) + + io.bytesize.should eq(0) + writer.flush + io.bytesize.should_not eq(0) + + writer.print message + writer.close + + io.rewind + reader = Reader.new(io) + reader.gets_to_end.should eq(message) + end + + it "can be closed without sync" do + io = IO::Memory.new + writer = Writer.new(io) + writer.close + writer.closed?.should be_true + io.closed?.should be_false + + expect_raises IO::Error, "closed stream" do + writer.print "a" + end + end + + it "can be closed with sync (1)" do + io = IO::Memory.new + writer = Writer.new(io, sync_close: true) + writer.close + writer.closed?.should be_true + io.closed?.should be_true + end + + it "can be closed with sync (2)" do + io = IO::Memory.new + writer = Writer.new(io) + writer.sync_close = true + writer.close + writer.closed?.should be_true + io.closed?.should be_true + end + + it "can be flushed" do + io = IO::Memory.new + writer = Writer.new(io) + + writer.print "this" + io.to_slice.hexstring.should eq("789c") + + writer.flush + (io.to_slice.hexstring.size > 4).should be_true + + writer.print " is a test string !!!!\n" + writer.close + + io.rewind + reader = Reader.new(io) + reader.gets_to_end.should eq("this is a test string !!!!\n") + end + end +end diff --git a/spec/std/zlib/zlib_spec.cr b/spec/std/zlib/zlib_spec.cr deleted file mode 100644 index 8de3faec1d09..000000000000 --- a/spec/std/zlib/zlib_spec.cr +++ /dev/null @@ -1,28 +0,0 @@ -require "spec" -require "zlib" - -describe Zlib do - it "should be able to calculate adler32" do - adler = Zlib.adler32("foo").to_s(16) - adler.should eq("2820145") - end - - it "should be able to calculate adler32 combined" do - adler1 = Zlib.adler32("hello") - adler2 = Zlib.adler32(" world!") - combined = Zlib.adler32_combine(adler1, adler2, " world!".size) - Zlib.adler32("hello world!").should eq(combined) - end - - it "should be able to calculate crc32" do - crc = Zlib.crc32("foo").to_s(16) - crc.should eq("8c736521") - end - - it "should be able to calculate crc32 combined" do - crc1 = Zlib.crc32("hello") - crc2 = Zlib.crc32(" world!") - combined = Zlib.crc32_combine(crc1, crc2, " world!".size) - Zlib.crc32("hello world!").should eq(combined) - end -end diff --git a/src/adler32/adler32.cr b/src/adler32/adler32.cr new file mode 100644 index 000000000000..7bca2d16ecdd --- /dev/null +++ b/src/adler32/adler32.cr @@ -0,0 +1,27 @@ +require "lib_z" + +module Adler32 + def self.initial : UInt32 + LibZ.adler32(0, nil, 0).to_u32 + end + + def self.checksum(slice : Bytes) : UInt32 + update(slice, initial) + end + + def self.checksum(string : String) : UInt32 + checksum(string.to_slice) + end + + def self.update(slice : Bytes, adler32 : UInt32) : UInt32 + LibZ.adler32(adler32, slice, slice.size).to_u32 + end + + def self.update(string : String, adler32 : UInt32) : UInt32 + update(string.to_slice, adler32) + end + + def self.combine(adler1 : UInt32, adler2 : UInt32, len) : UInt32 + LibZ.adler32_combine(adler1, adler2, len).to_u32 + end +end diff --git a/src/crc32/crc32.cr b/src/crc32/crc32.cr new file mode 100644 index 000000000000..89b1b4f49cbc --- /dev/null +++ b/src/crc32/crc32.cr @@ -0,0 +1,27 @@ +require "lib_z" + +module CRC32 + def self.initial : UInt32 + LibZ.crc32(0, nil, 0).to_u32 + end + + def self.checksum(slice : Bytes) : UInt32 + update(slice, initial) + end + + def self.checksum(string : String) : UInt32 + checksum(string.to_slice) + end + + def self.update(slice : Bytes, crc32 : UInt32) : UInt32 + LibZ.crc32(crc32, slice, slice.size).to_u32 + end + + def self.update(string : String, crc32 : UInt32) : UInt32 + update(string.to_slice, crc32) + end + + def self.combine(crc1 : UInt32, crc2 : UInt32, len) : UInt32 + LibZ.crc32_combine(crc1, crc2, len).to_u32 + end +end diff --git a/src/docs_main.cr b/src/docs_main.cr index e537cfb75d67..e1a42fa3abee 100644 --- a/src/docs_main.cr +++ b/src/docs_main.cr @@ -32,14 +32,18 @@ require "./thread" require "./xml" require "./yaml" require "./benchmark" +require "./adler32" require "./array" require "./bit_array" require "./box" require "./colorize" require "./complex" +require "./crc32" require "./deque" require "./dl" require "./file_utils" +require "./flate" +require "./gzip" require "./ini" require "./levenshtein" require "./option_parser" @@ -52,3 +56,4 @@ require "./string_scanner" require "./tempfile" require "./uri" require "./zip" +require "./zlib" diff --git a/src/flate/flate.cr b/src/flate/flate.cr new file mode 100644 index 000000000000..27550952cbf0 --- /dev/null +++ b/src/flate/flate.cr @@ -0,0 +1,33 @@ +require "lib_z" +require "./*" + +# The Flate module contains readers and writers of DEFLATE format compressed +# data, as specified in [RFC 1951](https://www.ietf.org/rfc/rfc1951.txt). +# +# See `Gzip`, `Zip` and `Zlib` for modules that provide access +# to DEFLATE-based file formats. +module Flate + NO_COMPRESSION = 0 + BEST_SPEED = 1 + BEST_COMPRESSION = 9 + DEFAULT_COMPRESSION = -1 + + enum Strategy + FILTERED = 1 + HUFFMAN_ONLY = 2 + RLE = 3 + FIXED = 4 + DEFAULT = 0 + end + + class Error < Exception + def initialize(ret, stream) + if msg = stream.msg + error_msg = String.new(msg) + super("flate: #{error_msg} #{ret}") + else + super("flate: #{ret}") + end + end + end +end diff --git a/src/flate/reader.cr b/src/flate/reader.cr new file mode 100644 index 000000000000..d3e44e6fe711 --- /dev/null +++ b/src/flate/reader.cr @@ -0,0 +1,138 @@ +# A read-only `IO` object to decompress data in the DEFLATE format. +# +# Instances of this class wrap another IO object. When you read from this instance +# instance, it reads data from the underlying IO, decompresses it, and returns +# it to the caller. +class Flate::Reader + include IO + + # If `#sync_close?` is `true`, closing this IO will close the underlying IO. + property? sync_close : Bool + + # Returns `true` if this reader is closed. + getter? closed = false + + # Peeked bytes from the underlying IO + @peek : Bytes? + + # Creates an instance of Flate::Reader. + def initialize(@io : IO, @sync_close : Bool = false, @dict : Bytes? = nil) + @buf = uninitialized UInt8[1] # input buffer used by zlib + @stream = LibZ::ZStream.new + @stream.zalloc = LibZ::AllocFunc.new { |opaque, items, size| GC.malloc(items * size) } + @stream.zfree = LibZ::FreeFunc.new { |opaque, address| GC.free(address) } + ret = LibZ.inflateInit2(pointerof(@stream), -LibZ::MAX_BITS, LibZ.zlibVersion, sizeof(LibZ::ZStream)) + if ret != LibZ::Error::OK + raise Flate::Error.new(ret, @stream) + end + + @end = false + end + + # Creates an instance of Flate::Reader, yields it to the given block, and closes + # it at its end. + def self.new(input : IO, sync_close : Bool = false, dict : Bytes? = nil) + reader = new input, sync_close: sync_close, dict: dict + yield reader ensure reader.close + end + + # Creates an instance of Flate::Reader for the gzip format. + # has written. + def self.gzip(input, sync_close : Bool = false) : self + new input, wbits: GZIP, sync_close: sync_close + end + + # Creates an instance of Flate::Reader for the gzip format, yields it to the given block, and closes + # it at its end. + def self.gzip(input, sync_close : Bool = false) + reader = gzip input, sync_close: sync_close + yield reader ensure reader.close + end + + # Always raises `IO::Error` because this is a read-only `IO`. + def write(slice : Bytes) + raise IO::Error.new "can't write to Flate::Reader" + end + + # See `IO#read`. + def read(slice : Bytes) + check_open + + return 0 if slice.empty? + return 0 if @end + + while true + if @stream.avail_in == 0 + # Try to peek into the underlying IO, so we can feed more + # data into zlib + @peek = @io.peek + if peek = @peek + @stream.next_in = peek + @stream.avail_in = peek.size + else + # If peeking is not possible, we are cautious and + # read byte per byte to avoid reading more data beyond + # the compressed data (for example, if the compressed stream + # is part of a zip/gzip file). + @stream.next_in = @buf.to_unsafe + @stream.avail_in = @io.read(@buf.to_slice).to_u32 + end + return 0 if @stream.avail_in == 0 + end + + old_avail_in = @stream.avail_in + + @stream.avail_out = slice.size.to_u32 + @stream.next_out = slice.to_unsafe + + ret = LibZ.inflate(pointerof(@stream), LibZ::Flush::NO_FLUSH) + read_bytes = slice.size - @stream.avail_out + + # If we were able to peek, skip the used bytes in the underlying IO + avail_in_diff = old_avail_in - @stream.avail_in + if @peek && avail_in_diff > 0 + @io.skip(avail_in_diff) + end + + case ret + when LibZ::Error::NEED_DICT + if dict = @dict + ret = LibZ.inflateSetDictionary(pointerof(@stream), dict, dict.size) + next if ret == LibZ::Error::OK + end + + raise Flate::Error.new(ret, @stream) + when LibZ::Error::DATA_ERROR, + LibZ::Error::MEM_ERROR + raise Flate::Error.new(ret, @stream) + when LibZ::Error::STREAM_END + @end = true + return read_bytes + else + # LibZ.inflate might not write any data to the output slice because + # it might need more input. We can know this happened because *ret* + # is not STREAM_END. + if read_bytes == 0 + next + else + return read_bytes + end + end + end + end + + # Closes this reader. + def close + return if @closed + @closed = true + + LibZ.inflateEnd(pointerof(@stream)) + + @io.close if @sync_close + end + + # :nodoc: + def inspect(io) + to_s(io) + end +end diff --git a/src/flate/writer.cr b/src/flate/writer.cr new file mode 100644 index 000000000000..7e9d8b76f844 --- /dev/null +++ b/src/flate/writer.cr @@ -0,0 +1,97 @@ +# A write-only `IO` object to compress data in the DEFLATE format. +# +# Instances of this class wrap another IO object. When you write to this +# instance, it compresses the data and writes it to the underlying IO. +# +# **Note**: unless created with a block, `close` must be invoked after all +# data has been written to a Flate::Writer instance. +class Flate::Writer + include IO + + # If `#sync_close?` is `true`, closing this IO will close the underlying IO. + property? sync_close : Bool + + # Creates an instance of Flate::Writer. `close` must be invoked after all data + # has written. + def initialize(@output : IO, level : Int32 = Flate::DEFAULT_COMPRESSION, + strategy : Flate::Strategy = Flate::Strategy::DEFAULT, + @sync_close : Bool = false, @dict : Bytes? = nil) + unless -1 <= level <= 9 + raise ArgumentError.new("invalid Flate level: #{level} (must be in -1..9)") + end + + @buf = uninitialized UInt8[8192] # output buffer used by zlib + @stream = LibZ::ZStream.new + @stream.zalloc = LibZ::AllocFunc.new { |opaque, items, size| GC.malloc(items * size) } + @stream.zfree = LibZ::FreeFunc.new { |opaque, address| GC.free(address) } + @closed = false + ret = LibZ.deflateInit2(pointerof(@stream), level, LibZ::Z_DEFLATED, -LibZ::MAX_BITS, LibZ::DEF_MEM_LEVEL, + strategy.value, LibZ.zlibVersion, sizeof(LibZ::ZStream)) + if ret != LibZ::Error::OK + raise Flate::Error.new(ret, @stream) + end + end + + # Creates an instance of Flate::Writer, yields it to the given block, and closes + # it at its end. + def self.new(output : IO, level : Int32 = Flate::DEFAULT_COMPRESSION, + strategy : Flate::Strategy = Flate::Strategy::DEFAULT, + sync_close : Bool = false, dict : Bytes? = nil) + writer = new(output, level: level, strategy: strategy, sync_close: sync_close, dict: dict) + yield writer ensure writer.close + end + + # Always raises `IO::Error` because this is a write-only `IO`. + def read(slice : Bytes) + raise "can't read from Flate::Writer" + end + + # See `IO#write`. + def write(slice : Bytes) + check_open + + @stream.avail_in = slice.size + @stream.next_in = slice + consume_output LibZ::Flush::NO_FLUSH + end + + # See `IO#flush`. + def flush + return if @closed + + consume_output LibZ::Flush::SYNC_FLUSH + end + + # Closes this writer. Must be invoked after all data has been written. + def close + return if @closed + @closed = true + + @stream.avail_in = 0 + @stream.next_in = Pointer(UInt8).null + consume_output LibZ::Flush::FINISH + LibZ.deflateEnd(pointerof(@stream)) + + @output.close if @sync_close + end + + # Returns `true` if this IO is closed. + def closed? + @closed + end + + # :nodoc: + def inspect(io) + to_s(io) + end + + private def consume_output(flush) + loop do + @stream.next_out = @buf.to_unsafe + @stream.avail_out = @buf.size.to_u32 + LibZ.deflate(pointerof(@stream), flush) # no bad return value + @output.write(@buf.to_slice[0, @buf.size - @stream.avail_out]) + break if @stream.avail_out != 0 + end + end +end diff --git a/src/gzip/gzip.cr b/src/gzip/gzip.cr new file mode 100644 index 000000000000..ff492d3080c7 --- /dev/null +++ b/src/gzip/gzip.cr @@ -0,0 +1,20 @@ +require "flate" +require "crc32" + +# The Gzip module contains readers and writers of gzip format compressed +# data, as specified in [RFC 1952](https://www.ietf.org/rfc/rfc1952.txt). +module Gzip + NO_COMPRESSION = Flate::NO_COMPRESSION + BEST_SPEED = Flate::BEST_SPEED + BEST_COMPRESSION = Flate::BEST_COMPRESSION + DEFAULT_COMPRESSION = Flate::DEFAULT_COMPRESSION + + private ID1 = 0x1f_u8 + private ID2 = 0x8b_u8 + private DEFLATE = 8_u8 + + class Error < Exception + end +end + +require "./*" diff --git a/src/gzip/header.cr b/src/gzip/header.cr new file mode 100644 index 000000000000..25c399e707ec --- /dev/null +++ b/src/gzip/header.cr @@ -0,0 +1,103 @@ +# A header in a gzip stream. +class Gzip::Header + property modification_time : Time + property os : UInt8 + property extra = Bytes.empty + property name : String? + property comment : String? + + # :nodoc: + @[Flags] + enum Flg : UInt8 + TEXT + HCRC + EXTRA + NAME + COMMENT + end + + # :nodoc: + def initialize + @modification_time = Time.new + @os = 255_u8 # Unknown + end + + # :nodoc: + def initialize(first_byte : UInt8, io : IO) + header = uninitialized UInt8[10] + header[0] = first_byte + io.read_fully(header.to_slice + 1) + + if header[0] != ID1 || header[1] != ID2 || header[2] != DEFLATE + raise Error.new("invalid gzip header") + end + + flg = Flg.new(header[3]) + + seconds = IO::ByteFormat::LittleEndian.decode(Int32, header.to_slice[4, 4]) + @modification_time = Time.epoch(seconds).to_local + + xfl = header[8] + @os = header[9] + + if flg.extra? + xlen = io.read_byte.not_nil! + @extra = Bytes.new(xlen) + io.read_fully(@extra) + end + + if flg.name? + @name = io.gets('\0', chomp: true) + end + + if flg.comment? + @comment = io.gets('\0', chomp: true) + end + + if flg.hcrc? + crc16 = io.read_bytes(UInt16, IO::ByteFormat::LittleEndian) + # TODO check crc16 + end + end + + # :nodoc: + def to_io(io) + # header + io.write_byte ID1 + io.write_byte ID2 + + # compression method + io.write_byte DEFLATE + + # flg + flg = Flg::None + flg |= Flg::EXTRA if @extra + flg |= Flg::NAME if @name + flg |= Flg::COMMENT if @comment + io.write_byte flg.value + + # time + io.write_bytes(modification_time.epoch.to_u32, IO::ByteFormat::LittleEndian) + + # xfl + io.write_byte 0_u8 + + # os + io.write_byte os + + if extra = @extra + io.write_byte extra.size.to_u8 + io.write(extra) + end + + if name = @name + io << name + io.write_byte 0_u8 + end + + if comment = @comment + io << comment + io.write_byte 0_u8 + end + end +end diff --git a/src/gzip/reader.cr b/src/gzip/reader.cr new file mode 100644 index 000000000000..80982431bfc0 --- /dev/null +++ b/src/gzip/reader.cr @@ -0,0 +1,138 @@ +# A read-only `IO` object to decompress data in the gzip format. +# +# Instances of this class wrap another IO object. When you read from this instance +# instance, it reads data from the underlying IO, decompresses it, and returns +# it to the caller. +# +# NOTE: A gzip stream can contain zero or more members. If it contains +# no members, `header` will be `nil`. If it contains one or more +# members, only the first header will be recorded here. This is +# because gzipping multiple members is not common as one usually +# combines gzip with tar. If, however, multiple members are present +# then reading from this reader will return the concatenation of +# all the members. +# +# ### Example: decompress a gzip file +# +# ``` +# require "gzip" +# +# File.write("file.gzip", Bytes[31, 139, 8, 0, 0, 0, 0, 0, 0, 3, 75, 76, 74, 6, 0, 194, 65, 36, 53, 3, 0, 0, 0]) +# +# string = File.open("file.gzip") do |file| +# Gzip::Reader.open(file) do |gzip| +# gzip.gets_to_end +# end +# end +# string # => "abc" +# ``` +class Gzip::Reader + include IO + + # Whether to close the enclosed `IO` when closing this reader. + property? sync_close = false + + # Returns `true` if this reader is closed. + getter? closed = false + + # Returns the first header in the gzip stream, if any. + getter header : Header? + + @flate_io : Flate::Reader? + + # Creates a new reader from the given *io*. + def initialize(@io : IO, @sync_close = false) + @crc32 = CRC32.initial # CRC32 of written data + @isize = 0_u32 # Total size of written data + + first_byte = @io.read_byte + + # A gzip file could be empty (have no members), so + # we account for that case + return unless first_byte + + @header = Header.new(first_byte, @io) + @flate_io = Flate::Reader.new(@io) + end + + # Creates a new reader from the given *filename*. + def self.new(filename : String) + new(::File.new(filename), sync_close: true) + end + + # Creates a new reader from the given *io*, yields it to the given block, + # and closes it at the end. + def self.open(io : IO, sync_close = false) + reader = new(io, sync_close: sync_close) + yield reader ensure reader.close + end + + # Creates a new reader from the given *filename*, yields it to the given block, + # and closes it at the end. + def self.open(filename : String) + reader = new(filename) + yield reader ensure reader.close + end + + # See `IO#read`. + def read(slice : Bytes) + check_open + + return 0 if slice.empty? + + while true + flate_io = @flate_io + return 0 unless flate_io + + read_bytes = flate_io.read(slice) + if read_bytes == 0 + crc32 = @io.read_bytes(UInt32, IO::ByteFormat::LittleEndian) + isize = @io.read_bytes(UInt32, IO::ByteFormat::LittleEndian) + + if crc32 != @crc32 + raise Gzip::Error.new("CRC32 checksum mismatch") + end + + if isize != @isize + raise Gzip::Error.new("isize mismatch") + end + + # Reset checksum and total size for next entry + @crc32 = CRC32.initial + @isize = 0_u32 + + # Check if another header with data comes + first_byte = @io.read_byte + if first_byte + Header.new(first_byte, @io) + @flate_io = Flate::Reader.new(@io) + else + @flate_io = nil + break + end + else + # Update CRC32 and total data size + @crc32 = CRC32.update(slice[0, read_bytes], @crc32) + @isize += read_bytes + + break + end + end + + read_bytes + end + + # Always raises `IO::Error` because this is a read-only `IO`. + def write(slice : Bytes) : Nil + raise IO::Error.new("can't write to Gzip::Reader") + end + + # Closes this reader. + def close + return if @closed + @closed = true + + @flate_io.try &.close + @io.close if @sync_close + end +end diff --git a/src/gzip/writer.cr b/src/gzip/writer.cr new file mode 100644 index 000000000000..3f2ea3bea3ca --- /dev/null +++ b/src/gzip/writer.cr @@ -0,0 +1,114 @@ +# A write-only `IO` object to compress data in the gzip format. +# +# Instances of this class wrap another IO object. When you write to this +# instance, it compresses the data and writes it to the underlying IO. +# +# **Note**: unless created with a block, `close` must be invoked after all +# data has been written to a Gzip::Writer instance. +# +# ### Example: compress a file +# +# ``` +# require "zlib" +# +# File.write("file.txt", "abc") +# +# File.open("./file.txt", "r") do |input_file| +# File.open("./file.gzip", "w") do |output_file| +# Gzip::Writre.open(output_file) do |gzip| +# IO.copy(input_file, gzip) +# end +# end +# end +# ``` +class Gzip::Writer + include IO + + # Whether to close the enclosed `IO` when closing this writer. + property? sync_close = false + + # Returns `true` if this writer is closed. + getter? closed = false + + # The header to write to the gzip stream. It will be + # written just before the first write to this writer. + # Changes to the header after the first write are + # ignored. + getter header = Header.new + + # Creates a new writer to the given *io*. + def initialize(@io : IO, @level = Gzip::DEFAULT_COMPRESSION, @sync_close = false) + @crc32 = CRC32.initial # CRC32 of written data + @isize = 0 # Total size of written data + end + + # Creates a new writer to the given *filename*. + def self.new(filename : String, level = Gzip::DEFAULT_COMPRESSION) + new(::File.new(filename, "w"), level: level, sync_close: true) + end + + # Creates a new writer to the given *io*, yields it to the given block, + # and closes it at the end. + def self.open(io : IO, level = Gzip::DEFAULT_COMPRESSION, sync_close = false) + writer = new(io, level: level, sync_close: sync_close) + yield writer ensure writer.close + end + + # Creates a new writer to the given *filename*, yields it to the given block, + # and closes it at the end. + def self.open(filename : String, level = Gzip::DEFAULT_COMPRESSION) + writer = new(filename, level: level) + yield writer ensure writer.close + end + + # Always raises `IO::Error` because this is a write-only `IO`. + def read(slice : Bytes) + raise IO::Error.new("can't read from Gzip::Writer") + end + + # See `IO#write`. + def write(slice : Bytes) : Nil + check_open + + flate_io = write_header + flate_io.write(slice) + + # Update CRC32 and total data size + @crc32 = CRC32.update(slice, @crc32) + @isize += slice.size + end + + # Flushes data, forcing writing the gzip header if no + # data has been written yet. + # + # See `IO#flush`. + def flush + check_open + + flate_io = write_header + flate_io.flush + end + + # Closes this writer. Must be invoked after all data has been written. + def close + return if @closed + @closed = true + + flate_io = write_header + flate_io.close + + @io.write_bytes @crc32, IO::ByteFormat::LittleEndian + @io.write_bytes @isize, IO::ByteFormat::LittleEndian + + @io.close if @sync_close + end + + private def write_header + flate_io = @flate_io + unless flate_io + flate_io = @flate_io = Flate::Writer.new(@io, level: @level) + header.to_io(@io) + end + flate_io + end +end diff --git a/src/http/common.cr b/src/http/common.cr index 43c1ac58a2d7..2aa5e4a03acf 100644 --- a/src/http/common.cr +++ b/src/http/common.cr @@ -1,5 +1,6 @@ {% if !flag?(:without_zlib) %} - require "zlib" + require "flate" + require "gzip" {% end %} module HTTP @@ -40,9 +41,9 @@ module HTTP encoding = headers["Content-Encoding"]? case encoding when "gzip" - body = Zlib::Inflate.gzip(body, sync_close: true) + body = Gzip::Reader.new(body, sync_close: true) when "deflate" - body = Zlib::Inflate.new(body, sync_close: true) + body = Flate::Reader.new(body, sync_close: true) end {% end %} end diff --git a/src/http/server.cr b/src/http/server.cr index 1936ce93c822..d5ea3553aa12 100644 --- a/src/http/server.cr +++ b/src/http/server.cr @@ -66,7 +66,7 @@ require "./common" # HTTP::Server.new("127.0.0.1", 8080, [ # HTTP::ErrorHandler.new, # HTTP::LogHandler.new, -# HTTP::DeflateHandler.new, +# HTTP::CompressHandler.new, # HTTP::StaticFileHandler.new("."), # ]).listen # ``` diff --git a/src/http/server/handlers/deflate_handler.cr b/src/http/server/handlers/compress_handler.cr similarity index 77% rename from src/http/server/handlers/deflate_handler.cr rename to src/http/server/handlers/compress_handler.cr index 83912d9ad369..b140785dd8e3 100644 --- a/src/http/server/handlers/deflate_handler.cr +++ b/src/http/server/handlers/compress_handler.cr @@ -1,10 +1,11 @@ {% if !flag?(:without_zlib) %} - require "zlib" + require "flate" + require "gzip" {% end %} # A handler that configures an `HTTP::Server::Response` to compress the response # output, either using gzip or deflate, depending on the `Accept-Encoding` request header. -class HTTP::DeflateHandler +class HTTP::CompressHandler include HTTP::Handler def call(context) @@ -15,10 +16,10 @@ class HTTP::DeflateHandler if request_headers.includes_word?("Accept-Encoding", "gzip") context.response.headers["Content-Encoding"] = "gzip" - context.response.output = Zlib::Deflate.gzip(context.response.output, sync_close: true) + context.response.output = Gzip::Writer.new(context.response.output, sync_close: true) elsif request_headers.includes_word?("Accept-Encoding", "deflate") context.response.headers["Content-Encoding"] = "deflate" - context.response.output = Zlib::Deflate.new(context.response.output, sync_close: true) + context.response.output = Flate::Writer.new(context.response.output, sync_close: true) end call_next(context) diff --git a/src/zlib/lib_zlib.cr b/src/lib_z/lib_z.cr similarity index 55% rename from src/zlib/lib_zlib.cr rename to src/lib_z/lib_z.cr index d6b51c47f8f8..1f3f51945cfd 100644 --- a/src/zlib/lib_zlib.cr +++ b/src/lib_z/lib_z.cr @@ -6,8 +6,6 @@ lib LibZ alias Long = LibC::Long alias ULong = LibC::ULong alias SizeT = LibC::SizeT - alias Double = LibC::Double - alias BitcntT = ULong alias Bytef = UInt8 @@ -37,36 +35,6 @@ lib LibZ reserved : Long end - struct GZHeader - text : Int32 - time : UInt64 - xflags : Int32 - os : Int32 - extra : UInt8* - extra_len : UInt32 - extra_max : UInt32 - name : UInt8* - name_max : UInt32 - comment : UInt8* - comm_max : UInt32 - hcrc : Int32 - done : Int32 - end - - enum Strategy - FILTERED = 1 - HUFFMAN_ONLY = 2 - RLE = 3 - FIXED = 4 - DEFAULT_STRATEGY = 0 - end - - # compression level - NO_COMPRESSION = 0 - BEST_SPEED = 1 - BEST_COMPRESSION = 9 - DEFAULT_COMPRESSION = -1 - # error codes enum Error OK = 0 @@ -95,37 +63,22 @@ lib LibZ Z_DEFLATED = 8 fun deflateInit2 = deflateInit2_(stream : ZStream*, level : Int32, method : Int32, - window_bits : Int32, mem_level : Int32, strategy : Strategy, + window_bits : Int32, mem_level : Int32, strategy : Int32, version : UInt8*, stream_size : Int32) : Error fun deflate(stream : ZStream*, flush : Flush) : Error fun deflateEnd(stream : ZStream*) : Int32 - fun deflateReset(stream : ZStream*) : Int32 - fun deflateParams(stream : ZStream*, level : Int32, strategy : Strategy) : Int32 - fun deflateSetDictionary(stream : ZStream*, dictionary : UInt8*, len : UInt32) : Int32 + fun deflateReset(stream : ZStream*) : Error + fun deflateSetDictionary(stream : ZStream*, dictionary : UInt8*, len : UInt) : Int fun inflateInit2 = inflateInit2_(stream : ZStream*, window_bits : Int32, version : UInt8*, stream_size : Int32) : Error fun inflate(stream : ZStream*, flush : Flush) : Error fun inflateEnd(stream : ZStream*) : Int32 fun inflateReset(stream : ZStream*) : Int32 - fun inflateSetDictionary(stream : ZStream*, dictionary : UInt8*, len : UInt32) : Int32 + fun inflateSetDictionary(stream : ZStream*, dictionary : UInt8*, len : UInt) : Int - alias GZFile = Void* + alias InFunc = Void*, UInt8** -> UInt + alias OutFunc = Void*, UInt8*, UInt -> Int - fun gzdopen(fd : Int32, mode : UInt8*) : GZFile - fun gzbuffer(file : GZFile, size : UInt32) : Int32 - fun gzsetparams(file : GZFile, level : Int32, strategy : Strategy) : Int32 - fun gzread(file : GZFile, buf : UInt8*, len : UInt32) : Int32 - fun gzwrite(file : GZFile, buf : UInt8*, len : UInt32) : Int32 - fun gzflush(file : GZFile, flush : Flush) : Int32 - fun gzseek(file : GZFile, offset : LibC::SizeT, whence : Int32) : Int32 - fun gzrewind(file : GZFile) : Int32 - fun gztell(file : GZFile) : LibC::SizeT - fun gzoffset(file : GZFile) : LibC::SizeT - fun gzeof(file : GZFile) : Int32 - fun gzdirect(file : GZFile) : Int32 - fun gzclose(file : GZFile) : Int32 - fun gzclose_r(file : GZFile) : Int32 - fun gzclose_w(file : GZFile) : Int32 - fun gzerror(file : GZFile, errnum : Int32*) : UInt8* - fun gzclearerr(file : GZFile) + fun inflateBackInit = inflateBackInit_(stream : ZStream*, window_bits : Int, window : UInt8*, version : UInt8*, stream_size : Int) : Int + fun inflateBack(stream : ZStream*, in : InFunc, in_desc : Void*, out : OutFunc, out_desc : Void*) : Int end diff --git a/src/zip/checksum_reader.cr b/src/zip/checksum_reader.cr index af5389ba4146..c8e0e3891f4c 100644 --- a/src/zip/checksum_reader.cr +++ b/src/zip/checksum_reader.cr @@ -5,12 +5,9 @@ module Zip private class ChecksumReader include IO - def initialize(@io : IO, @filename : String, verify @expected_crc32 : UInt32? = nil) - @crc32 = LibC::ULong.new(0) - end + getter crc32 = CRC32.initial - def crc32 - @crc32.to_u32 + def initialize(@io : IO, @filename : String, verify @expected_crc32 : UInt32? = nil) end def read(slice : Bytes) @@ -20,7 +17,7 @@ module Zip raise Zip::Error.new("checksum failed for entry #{@filename} (expected #{expected_crc32}, got #{crc32}") end else - @crc32 = Zlib.crc32(slice[0, read_bytes], @crc32) + @crc32 = CRC32.update(slice[0, read_bytes], @crc32) end read_bytes end diff --git a/src/zip/checksum_writer.cr b/src/zip/checksum_writer.cr index 4b810b45bac0..626a4f049be6 100644 --- a/src/zip/checksum_writer.cr +++ b/src/zip/checksum_writer.cr @@ -5,7 +5,7 @@ module Zip include IO getter count = 0_u32 - getter crc32 = LibC::ULong.new(0) + getter crc32 = CRC32.initial getter! io : IO def initialize(@compute_crc32 = false) @@ -17,13 +17,13 @@ module Zip def write(slice : Bytes) @count += slice.size - @crc32 = Zlib.crc32(slice, @crc32) if @compute_crc32 + @crc32 = CRC32.update(slice, @crc32) if @compute_crc32 io.write(slice) end def io=(@io) @count = 0_u32 - @crc32 = LibC::ULong.new(0) + @crc32 = CRC32.initial end end end diff --git a/src/zip/file_info.cr b/src/zip/file_info.cr index 35129f44c627..cee14d4b5de9 100644 --- a/src/zip/file_info.cr +++ b/src/zip/file_info.cr @@ -115,12 +115,13 @@ module Zip::FileInfo io = IO::Sized.new(io, compressed_size) unless is_sized when .deflated? if compressed_size == 0 && bit_3_set? - io = IO::Delimited.new(io, DEFLATE_END_SIGNATURE) + # Read until we end decompressing the deflate data, + # which has an unknown size else io = IO::Sized.new(io, compressed_size) unless is_sized end - io = Zlib::Inflate.new(io, wbits: Zlib::ZIP) + io = Flate::Reader.new(io) else raise "Unsupported compression method: #{compression_method}" end diff --git a/src/zip/reader.cr b/src/zip/reader.cr index 515d47099740..003c797cfbef 100644 --- a/src/zip/reader.cr +++ b/src/zip/reader.cr @@ -58,7 +58,7 @@ class Zip::Reader # are no more entries. # # After reading a next entry, previous entries can no - # longer be read (their IO will be closed.) + # longer be read (their `IO` will be closed.) def next_entry : Entry? return nil if @reached_end @@ -109,7 +109,15 @@ class Zip::Reader private def skip_data_descriptor(entry) if entry.compression_method.deflated? && entry.bit_3_set? - read_data_descriptor(entry) + # The data descriptor signature is optional: if we + # find it, we read the data descriptor info normally; + # otherwise, the first four bytes are the crc32 value. + signature = read UInt32 + if signature == FileInfo::DATA_DESCRIPTOR_SIGNATURE + read_data_descriptor(entry) + else + read_data_descriptor(entry, crc32: signature) + end @read_data_descriptor = true else @read_data_descriptor = false @@ -117,8 +125,8 @@ class Zip::Reader end end - private def read_data_descriptor(entry) - entry.crc32 = read UInt32 + private def read_data_descriptor(entry, crc32 = nil) + entry.crc32 = crc32 || (read UInt32) entry.compressed_size = read UInt32 entry.uncompressed_size = read UInt32 verify_checksum(entry) diff --git a/src/zip/writer.cr b/src/zip/writer.cr index 425d888d0d91..be1724fc6b70 100644 --- a/src/zip/writer.cr +++ b/src/zip/writer.cr @@ -102,7 +102,7 @@ class Zip::Writer yield @uncompressed_size_counter when .deflated? @compressed_size_counter.io = @io - io = Zlib::Deflate.new(@compressed_size_counter, wbits: Zlib::ZIP) + io = Flate::Writer.new(@compressed_size_counter) @uncompressed_size_counter.io = io yield @uncompressed_size_counter io.close diff --git a/src/zip/zip.cr b/src/zip/zip.cr index 75876cce8c83..31e666bba536 100644 --- a/src/zip/zip.cr +++ b/src/zip/zip.cr @@ -1,4 +1,5 @@ -require "zlib" +require "flate" +require "crc32" require "./*" # The Zip module contains readers and writers of the zip diff --git a/src/zlib.cr b/src/zlib.cr deleted file mode 100644 index c97acbb27771..000000000000 --- a/src/zlib.cr +++ /dev/null @@ -1,55 +0,0 @@ -require "./zlib/*" - -# The Zlib module provides access to the [zlib library](http://zlib.net/) for -# lossless data compression and decompression in zlib and gzip format: -# -# * `Zlib::Deflate` for compression -# * `Zlib::Inflate` for decompression -module Zlib - GZIP = LibZ::MAX_BITS + 16 - ZIP = -LibZ::MAX_BITS - - # Returns the linked zlib version. - def self.version : String - String.new LibZ.zlibVersion - end - - def self.adler32(data, adler) - slice = data.to_slice - LibZ.adler32(adler, slice, slice.size) - end - - def self.adler32(data) - adler = LibZ.adler32(0, nil, 0) - adler32(data, adler) - end - - def self.adler32_combine(adler1, adler2, len) - LibZ.adler32_combine(adler1, adler2, len) - end - - def self.crc32(data, crc) - slice = data.to_slice - LibZ.crc32(crc, slice, slice.size) - end - - def self.crc32(data) - crc = LibZ.crc32(0, nil, 0) - crc32(data, crc) - end - - def self.crc32_combine(crc1, crc2, len) - LibZ.crc32_combine(crc1, crc2, len) - end - - class Error < Exception - def initialize(ret, stream) - if msg = stream.msg - error_msg = String.new(msg) - super("inflate: #{error_msg} #{ret}") - else - super("inflate: #{ret}") - end - end - end -end diff --git a/src/zlib/deflate.cr b/src/zlib/deflate.cr deleted file mode 100644 index cfc52372e4e3..000000000000 --- a/src/zlib/deflate.cr +++ /dev/null @@ -1,132 +0,0 @@ -# A write-only `IO` object to compress data in zlib or gzip format. -# -# Instances of this class wrap another `IO` object. When you write to this -# instance, it compresses the data and writes it to the underlying `IO`. -# -# NOTE: unless created with a block, `close` must be invoked after all -# data has been written to a `Zlib::Deflate` instance. -# -# ### Example: compress a file -# -# ``` -# require "zlib" -# -# File.write("file.txt", "abc") -# -# File.open("./file.txt", "r") do |input_file| -# File.open("./file.gzip", "w") do |output_file| -# Zlib::Deflate.gzip(output_file) do |deflate| -# IO.copy(input_file, deflate) -# end -# end -# end -# ``` -# -# See also: `Zlib::Inflate` for decompressing data. -class Zlib::Deflate - include IO - - # If `#sync_close?` is `true`, closing this `IO` will close the underlying `IO`. - property? sync_close : Bool - - # Creates an instance of `Zlib::Deflate`. `close` must be invoked - # after all data has been written. - def initialize(@output : IO, level = LibZ::DEFAULT_COMPRESSION, wbits = LibZ::MAX_BITS, - mem_level = LibZ::DEF_MEM_LEVEL, strategy = LibZ::Strategy::DEFAULT_STRATEGY, - @sync_close : Bool = false) - @buf = uninitialized UInt8[8192] # output buffer used by zlib - @stream = LibZ::ZStream.new - @stream.zalloc = LibZ::AllocFunc.new { |opaque, items, size| GC.malloc(items * size) } - @stream.zfree = LibZ::FreeFunc.new { |opaque, address| GC.free(address) } - @closed = false - ret = LibZ.deflateInit2(pointerof(@stream), level, LibZ::Z_DEFLATED, wbits, mem_level, - strategy, LibZ.zlibVersion, sizeof(LibZ::ZStream)) - if ret != LibZ::Error::OK - raise Zlib::Error.new(ret, @stream) - end - end - - # Creates an instance of `Zlib::Deflate`, yields it to the given block, - # and closes it at its end. - def self.new(output : IO, level = LibZ::DEFAULT_COMPRESSION, wbits = LibZ::MAX_BITS, - mem_level = LibZ::DEF_MEM_LEVEL, strategy = LibZ::Strategy::DEFAULT_STRATEGY, - sync_close : Bool = false) - deflate = new(output, level: level, wbits: wbits, mem_level: mem_level, strategy: strategy, sync_close: sync_close) - begin - yield deflate - ensure - deflate.close - end - end - - # Creates an instance of `Zlib::Deflate` for the gzip format. `close` - # must be invoked after all data has written. - def self.gzip(output, sync_close : Bool = false) : self - new output, wbits: GZIP, sync_close: sync_close - end - - # Creates an instance of `Zlib::Deflate` for the gzip format, yields it - # to the given block, and closes it at its end. - def self.gzip(output, sync_close : Bool = false) - deflate = gzip(output, sync_close: sync_close) - begin - yield deflate - ensure - deflate.close - end - end - - # Always raises: this is a write-only `IO`. - def read(slice : Bytes) - raise "can't read from Zlib::Deflate" - end - - # See `IO#write`. - def write(slice : Bytes) - check_open - - @stream.avail_in = slice.size - @stream.next_in = slice - consume_output LibZ::Flush::NO_FLUSH - end - - # See `IO#flush`. - def flush - return if @closed - - consume_output LibZ::Flush::SYNC_FLUSH - end - - # Closes this `IO`. Must be invoked after all data has been written. - def close - return if @closed - @closed = true - - @stream.avail_in = 0 - @stream.next_in = Pointer(UInt8).null - consume_output LibZ::Flush::FINISH - LibZ.deflateEnd(pointerof(@stream)) - - @output.close if @sync_close - end - - # Returns `true` if this `IO` is closed. - def closed? - @closed - end - - # :nodoc: - def inspect(io) - to_s(io) - end - - private def consume_output(flush) - loop do - @stream.next_out = @buf.to_unsafe - @stream.avail_out = @buf.size.to_u32 - LibZ.deflate(pointerof(@stream), flush) # no bad return value - @output.write(@buf.to_slice[0, @buf.size - @stream.avail_out]) - break if @stream.avail_out != 0 - end - end -end diff --git a/src/zlib/inflate.cr b/src/zlib/inflate.cr deleted file mode 100644 index edaaa92a8271..000000000000 --- a/src/zlib/inflate.cr +++ /dev/null @@ -1,132 +0,0 @@ -# A read-only `IO` object to decompress data in zlib or gzip format. -# -# Instances of this class wrap another IO object. When you read from this instance -# instance, it reads data from the underlying IO, decompresses it, and returns -# it to the caller. -# -# ### Example: decompress text a file -# -# ``` -# require "zlib" -# -# File.write("file.gzip", Bytes[31, 139, 8, 0, 0, 0, 0, 0, 0, 3, 75, 76, 74, 6, 0, 194, 65, 36, 53, 3, 0, 0, 0]) -# -# string = File.open("file.gzip", "r") do |file| -# Zlib::Inflate.gzip(file) do |inflate| -# inflate.gets_to_end -# end -# end -# string # => "abc" -# ``` -# -# See also: `Zlib::Deflate` for compressing data. -class Zlib::Inflate - include IO - - # If `#sync_close?` is `true`, closing this `IO` will close the underlying `IO`. - property? sync_close : Bool - - # Creates an instance of `Zlib::Inflate`. - def initialize(@input : IO, wbits = LibZ::MAX_BITS, @sync_close : Bool = false) - @buf = uninitialized UInt8[8192] # input buffer used by zlib - @stream = LibZ::ZStream.new - @stream.zalloc = LibZ::AllocFunc.new { |opaque, items, size| GC.malloc(items * size) } - @stream.zfree = LibZ::FreeFunc.new { |opaque, address| GC.free(address) } - ret = LibZ.inflateInit2(pointerof(@stream), wbits, LibZ.zlibVersion, sizeof(LibZ::ZStream)) - if ret != LibZ::Error::OK - raise Zlib::Error.new(ret, @stream) - end - @closed = false - end - - # Creates an instance of `Zlib::Inflate`, yields it to the given block, - # and closes it at its end. - def self.new(input : IO, wbits = LibZ::MAX_BITS, sync_close : Bool = false) - inflate = new input, wbits: wbits, sync_close: sync_close - begin - yield inflate - ensure - inflate.close - end - end - - # Creates an instance of `Zlib::Inflate` for the gzip format. `close` - # must be invoked after all data has written. - def self.gzip(input, sync_close : Bool = false) : self - new input, wbits: GZIP, sync_close: sync_close - end - - # Creates an instance of `Zlib::Inflate` for the gzip format, yields it - # to the given block, and closes it at its end. - def self.gzip(input, sync_close : Bool = false) - inflate = gzip input, sync_close: sync_close - begin - yield inflate - ensure - inflate.close - end - end - - # Always raises: this is a read-only `IO`. - def write(slice : Bytes) - raise IO::Error.new "Can't write to InflateIO" - end - - # See `IO#read`. - def read(slice : Bytes) - check_open - - return 0 if slice.empty? - - while true - if @stream.avail_in == 0 - @stream.next_in = @buf.to_unsafe - @stream.avail_in = @input.read(@buf.to_slice).to_u32 - return 0 if @stream.avail_in == 0 - end - - @stream.avail_out = slice.size.to_u32 - @stream.next_out = slice.to_unsafe - - ret = LibZ.inflate(pointerof(@stream), LibZ::Flush::NO_FLUSH) - read_bytes = slice.size - @stream.avail_out - case ret - when LibZ::Error::NEED_DICT, - LibZ::Error::DATA_ERROR, - LibZ::Error::MEM_ERROR - raise Zlib::Error.new(ret, @stream) - when LibZ::Error::STREAM_END - return read_bytes - else - # LibZ.inflate might not write any data to the output slice because - # it might need more input. We can know this happened because *ret* - # is not STREAM_END. - if read_bytes == 0 - next - else - return read_bytes - end - end - end - end - - # Closes this `IO`. - def close - return if @closed - @closed = true - - LibZ.inflateEnd(pointerof(@stream)) - - @input.close if @sync_close - end - - # Returns `true` if this `IO` is closed. - def closed? - @closed - end - - # :nodoc: - def inspect(io) - to_s(io) - end -end diff --git a/src/zlib/reader.cr b/src/zlib/reader.cr new file mode 100644 index 000000000000..b06122730f19 --- /dev/null +++ b/src/zlib/reader.cr @@ -0,0 +1,99 @@ +# A read-only `IO` object to decompress data in the zlib format. +# +# Instances of this class wrap another IO object. When you read from this instance +# instance, it reads data from the underlying IO, decompresses it, and returns +# it to the caller. +class Zlib::Reader + include IO + + # Whether to close the enclosed `IO` when closing this reader. + property? sync_close = false + + # Returns `true` if this reader is closed. + getter? closed = false + + # Creates a new reader from the given *io*. + def initialize(@io : IO, @sync_close = false, dict : Bytes? = nil) + Zlib::Reader.read_header(io, dict) + @flate_io = Flate::Reader.new(@io, dict: dict) + @adler32 = Adler32.initial + @end = false + end + + # Creates a new reader from the given *io*, yields it to the given block, + # and closes it at the end. + def self.open(io : IO, sync_close = false, dict : Bytes? = nil) + reader = new(io, sync_close: sync_close, dict: dict) + yield reader ensure reader.close + end + + protected def self.read_header(io, dict) + cmf = io.read_byte || invalid_header + + cm = cmf & 0xF + if cm != 8 # the compression method must be 8 + invalid_header + end + + flg = io.read_byte || invalid_header + + # CMF and FLG, when viewed as a 16-bit unsigned integer stored + # in MSB order (CMF*256 + FLG), must be a multiple of 31 + unless (cmf.to_u16*256 + flg.to_u16).divisible_by?(31) + invalid_header + end + + fdict = flg.bit(5) == 1 + if fdict + unless dict + raise Zlib::Error.new("missing dictionary") + end + + checksum = io.read_bytes(UInt32, IO::ByteFormat::BigEndian) + dict_checksum = Adler32.checksum(dict) + if checksum != dict_checksum + raise Zlib::Error.new("dictionary ADLER-32 checksum mismatch") + end + end + end + + # See `IO#read`. + def read(slice : Bytes) + check_open + + return 0 if slice.empty? + return 0 if @end + + read_bytes = @flate_io.read(slice) + if read_bytes == 0 + # Check ADLER-32 + @end = true + @flate_io.close + adler32 = @io.read_bytes(UInt32, IO::ByteFormat::BigEndian) + if adler32 != @adler32 + raise Zlib::Error.new("ADLER-32 checksum mismatch") + end + else + # Update ADLER-32 checksum + @adler32 = Adler32.update(slice[0, read_bytes], @adler32) + end + read_bytes + end + + # Always raises `IO::Error` because this is a read-only `IO`. + def write(slice : Bytes) + raise IO::Error.new "can't write to Zlib::Reader" + end + + def close + return if @closed + @closed = true + + @flate_io.close + @io.close if @sync_close + end + + protected def self.invalid_header + raise Zlib::Error.new("invalid header") + end +end diff --git a/src/zlib/writer.cr b/src/zlib/writer.cr new file mode 100644 index 000000000000..5b80a4709ed2 --- /dev/null +++ b/src/zlib/writer.cr @@ -0,0 +1,120 @@ +# A write-only `IO` object to compress data in the zlib format. +# +# Instances of this class wrap another IO object. When you write to this +# instance, it compresses the data and writes it to the underlying IO. +# +# **Note**: unless created with a block, `close` must be invoked after all +# data has been written to a Zlib::Writer instance. +class Zlib::Writer + include IO + + # Whether to close the enclosed `IO` when closing this writer. + property? sync_close = false + + # Returns `true` if this writer is closed. + getter? closed = false + + # Creates a new writer to the given *io*. + def initialize(@io : IO, @level = Zlib::DEFAULT_COMPRESSION, @sync_close = false, @dict : Bytes? = nil) + @wrote_header = false + @adler32 = Adler32.initial + @flate_io = Flate::Writer.new(@io, level: level, dict: @dict) + end + + # Creates a new writer to the given *filename*. + def self.new(filename : String, level = Zlib::DEFAULT_COMPRESSION, dict : Bytes? = nil) + new(::File.new(filename, "w"), level: level, sync_close: true, dict: dict) + end + + # Creates a new writer to the given *io*, yields it to the given block, + # and closes it at the end. + def self.open(io : IO, level = Zlib::DEFAULT_COMPRESSION, sync_close = false, dict : Bytes? = nil) + writer = new(io, level: level, sync_close: sync_close, dict: dict) + yield writer ensure writer.close + end + + # Creates a new writer to the given *filename*, yields it to the given block, + # and closes it at the end. + def self.open(filename : String, level = Zlib::DEFAULT_COMPRESSION, dict : Bytes? = nil) + writer = new(filename, level: level, dict: dict) + yield writer ensure writer.close + end + + # Always raises `IO::Error` because this is a write-only `IO`. + def read(slice : Bytes) + raise IO::Error.new("can't read from Gzip::Writer") + end + + # See `IO#write`. + def write(slice : Bytes) : Nil + check_open + + write_header unless @wrote_header + + @flate_io.write(slice) + @adler32 = Adler32.update(slice, @adler32) + end + + # Flushes data, forcing writing the zlib header if no + # data has been written yet. + # + # See `IO#flush`. + def flush + check_open + + write_header unless @wrote_header + @flate_io.flush + end + + # Closes this writer. Must be invoked after all data has been written. + def close + return if @closed + @closed = true + + write_header unless @wrote_header + + @flate_io.close + + @io.write_bytes(@adler32, IO::ByteFormat::BigEndian) + + @io.close if @sync_close + end + + private def write_header + @wrote_header = true + + # CMF byte: 7 for window size, 8 for compression method (deflate) + cmf = 0x78_u8 + @io.write_byte cmf + + dict = @dict + + flg = 0_u8 + + if dict + flg |= 1 << 5 + end + + case @level + when 0..1 + flg |= 0 << 6 + when 2..5 + flg |= 1 << 6 + when 6, -1 + flg |= 2 << 6 + else + flg |= 3 << 6 + end + + # CMF and FLG, when viewed as a 16-bit unsigned integer stored + # in MSB order (CMF*256 + FLG), must be a multiple of 31 + flg += 31 - (cmf.to_u16*256 + flg.to_u16).remainder(31) + + @io.write_byte flg + + if dict + dict_checksum = Adler32.checksum(dict) + @io.write_bytes(dict_checksum, IO::ByteFormat::BigEndian) + end + end +end diff --git a/src/zlib/zlib.cr b/src/zlib/zlib.cr new file mode 100644 index 000000000000..7aecd62fa12f --- /dev/null +++ b/src/zlib/zlib.cr @@ -0,0 +1,15 @@ +require "flate" +require "adler32" +require "./*" + +# The Zlib module contains readers and writers of zlib format compressed +# data, as specified in [RFC 1950](https://www.ietf.org/rfc/rfc1950.txt). +module Zlib + NO_COMPRESSION = Flate::NO_COMPRESSION + BEST_SPEED = Flate::BEST_SPEED + BEST_COMPRESSION = Flate::BEST_COMPRESSION + DEFAULT_COMPRESSION = Flate::DEFAULT_COMPRESSION + + class Error < Exception + end +end