From 0d053563ab79a342ec7367a9a7be51e4354ca42d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Janko=20Marohni=C4=87?= Date: Wed, 3 May 2017 03:04:39 +1000 Subject: [PATCH] Make all components into IO objects By changing all components to use an IO object as a base, we can implement a common IO interface for all components, which delegates to the underlying IO object. This enables streaming multipart data into the request body, avoiding loading the whole multipart data into memory when File parts are backed by File objects. See httprb/http#409 for the new streaming API. --- lib/http/form_data/composite_io.rb | 57 +++++++++++++++ lib/http/form_data/file.rb | 14 ---- lib/http/form_data/multipart.rb | 37 ++++------ lib/http/form_data/multipart/param.rb | 59 ++++++++------- lib/http/form_data/part.rb | 22 ++---- lib/http/form_data/readable.rb | 38 ++++++++++ spec/lib/http/form_data/composite_io_spec.rb | 75 ++++++++++++++++++++ spec/lib/http/form_data/file_spec.rb | 70 ++++++++++++++++++ spec/lib/http/form_data/multipart_spec.rb | 67 +++++++++++++---- spec/lib/http/form_data/part_spec.rb | 34 +++++++-- spec/lib/http/form_data_spec.rb | 8 +-- 11 files changed, 373 insertions(+), 108 deletions(-) create mode 100644 lib/http/form_data/composite_io.rb create mode 100644 lib/http/form_data/readable.rb create mode 100644 spec/lib/http/form_data/composite_io_spec.rb diff --git a/lib/http/form_data/composite_io.rb b/lib/http/form_data/composite_io.rb new file mode 100644 index 0000000..f3444f1 --- /dev/null +++ b/lib/http/form_data/composite_io.rb @@ -0,0 +1,57 @@ +# frozen_string_literal: true + +module HTTP + module FormData + # Provides IO interface across multiple IO files. + class CompositeIO + # @param [Array] ios Array of IO objects + def initialize(*ios) + @ios = ios.flatten + @index = 0 + end + + # Reads and returns list of + # + # @param [Integer] length Number of bytes to retrieve + # @param [String] outbuf String to be replaced with retrieved data + # + # @return [String] + def read(length = nil, outbuf = nil) + outbuf = outbuf.to_s.replace("") + + while current_io + data = current_io.read(length) + outbuf << data.to_s + length -= data.to_s.length if length + + break if length == 0 + + advance_io + end + + return nil if length && outbuf.empty? + + outbuf + end + + def rewind + @ios.each(&:rewind) + @index = 0 + end + + def size + @size ||= @ios.map(&:size).inject(0, :+) + end + + private + + def current_io + @ios[@index] + end + + def advance_io + @index += 1 + end + end + end +end diff --git a/lib/http/form_data/file.rb b/lib/http/form_data/file.rb index 1f80052..6ac97de 100644 --- a/lib/http/form_data/file.rb +++ b/lib/http/form_data/file.rb @@ -57,20 +57,6 @@ def initialize(path_or_io, opts = {}) end end end - - # Returns content size. - # - # @return [Integer] - def size - @io.size - end - - # Returns content of the IO. - # - # @return [String] - def to_s - @io.read - end end end end diff --git a/lib/http/form_data/multipart.rb b/lib/http/form_data/multipart.rb index 27d862d..54b2a47 100644 --- a/lib/http/form_data/multipart.rb +++ b/lib/http/form_data/multipart.rb @@ -1,25 +1,27 @@ # frozen_string_literal: true require "securerandom" +require "stringio" require "http/form_data/multipart/param" +require "http/form_data/readable" +require "http/form_data/composite_io" module HTTP module FormData # `multipart/form-data` form data. class Multipart + include Readable + # @param [#to_h, Hash] data form data key-value Hash def initialize(data) - @parts = Param.coerce FormData.ensure_hash data - @boundary = (Array.new(21, "-") << SecureRandom.hex(21)).join("") - @content_length = nil - end + parts = Param.coerce FormData.ensure_hash data - # Returns content to be used for HTTP request body. - # - # @return [String] - def to_s - head + @parts.map(&:to_s).join(glue) + tail + @boundary = ("-" * 21) << SecureRandom.hex(21) + @io = CompositeIO.new( + *parts.flat_map { |part| [StringIO.new(glue), part] }, + StringIO.new(tail), + ) end # Returns MIME type to be used for HTTP request `Content-Type` header. @@ -34,30 +36,19 @@ def content_type # # @return [Integer] def content_length - unless @content_length - @content_length = head.bytesize + tail.bytesize - @content_length += @parts.map(&:size).reduce(:+) - @content_length += (glue.bytesize * (@parts.count - 1)) - end - - @content_length + size end private - # @return [String] - def head - @head ||= "--#{@boundary}#{CRLF}" - end - # @return [String] def glue - @glue ||= "#{CRLF}--#{@boundary}#{CRLF}" + @glue ||= "--#{@boundary}#{CRLF}" end # @return [String] def tail - @tail ||= "#{CRLF}--#{@boundary}--" + @tail ||= "--#{@boundary}--" end end end diff --git a/lib/http/form_data/multipart/param.rb b/lib/http/form_data/multipart/param.rb index d49d13f..6e0d73b 100644 --- a/lib/http/form_data/multipart/param.rb +++ b/lib/http/form_data/multipart/param.rb @@ -1,34 +1,18 @@ # frozen_string_literal: true +require "stringio" + +require "http/form_data/readable" +require "http/form_data/composite_io" + module HTTP module FormData class Multipart # Utility class to represent multi-part chunks class Param - # @param [#to_s] name - # @param [FormData::File, FormData::Part, #to_s] value - def initialize(name, value) - @name = name.to_s - - @part = - if value.is_a?(FormData::Part) - value - else - FormData::Part.new(value) - end - - parameters = { :name => @name } - parameters[:filename] = @part.filename if @part.filename - parameters = parameters.map { |k, v| "#{k}=#{v.inspect}" }.join("; ") - - @header = "Content-Disposition: form-data; #{parameters}" + include Readable - return unless @part.content_type - - @header += "#{CRLF}Content-Type: #{@part.content_type}" - end - - # Returns body part with headers and data. + # Initializes body part with headers and data. # # @example With {FormData::File} value # @@ -44,15 +28,28 @@ def initialize(name, value) # ixti # # @return [String] - def to_s - "#{@header}#{CRLF * 2}#{@part}" - end + # @param [#to_s] name + # @param [FormData::File, FormData::Part, #to_s] value + def initialize(name, value) + part = + if value.is_a?(FormData::Part) + value + else + FormData::Part.new(value) + end - # Calculates size of a part (headers + body). - # - # @return [Integer] - def size - @header.bytesize + (CRLF.bytesize * 2) + @part.size + parameters = { :name => name.to_s } + parameters[:filename] = part.filename if part.filename + parameters = parameters.map { |k, v| "#{k}=#{v.inspect}" }.join("; ") + + header = String.new # rubocop:disable String/EmptyLiteral + header << "Content-Disposition: form-data; #{parameters}#{CRLF}" + header << "Content-Type: #{part.content_type}#{CRLF}" if part.content_type + header << CRLF + + footer = CRLF.dup + + @io = CompositeIO.new(StringIO.new(header), part, StringIO.new(footer)) end # Flattens given `data` Hash into an array of `Param`'s. diff --git a/lib/http/form_data/part.rb b/lib/http/form_data/part.rb index 9b4a5e9..a1bede8 100644 --- a/lib/http/form_data/part.rb +++ b/lib/http/form_data/part.rb @@ -1,5 +1,9 @@ # frozen_string_literal: true +require "stringio" + +require "http/form_data/readable" + module HTTP module FormData # Represents a body part of multipart/form-data request. @@ -9,30 +13,18 @@ module FormData # body = "Message" # FormData::Part.new body, :content_type => 'foobar.txt; charset="UTF-8"' class Part + include Readable + attr_reader :content_type, :filename # @param [#to_s] body # @param [String] content_type Value of Content-Type header # @param [String] filename Value of filename parameter def initialize(body, content_type: nil, filename: nil) - @body = body.to_s + @io = StringIO.new(body.to_s) @content_type = content_type @filename = filename end - - # Returns content size. - # - # @return [Integer] - def size - @body.bytesize - end - - # Returns content of a file of IO. - # - # @return [String] - def to_s - @body - end end end end diff --git a/lib/http/form_data/readable.rb b/lib/http/form_data/readable.rb new file mode 100644 index 0000000..f85e77d --- /dev/null +++ b/lib/http/form_data/readable.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +module HTTP + module FormData + # Common behaviour for objects defined by an IO object. + module Readable + # Returns IO content. + # + # @return [String] + def to_s + rewind + read + end + + # Reads and returns part of IO content. + # + # @param [Integer] length Number of bytes to retrieve + # @param [String] outbuf String to be replaced with retrieved data + # + # @return [String, nil] + def read(length = nil, outbuf = nil) + @io.read(length, outbuf) + end + + # Returns IO size. + # + # @return [Integer] + def size + @io.size + end + + # Rewinds the IO. + def rewind + @io.rewind + end + end + end +end diff --git a/spec/lib/http/form_data/composite_io_spec.rb b/spec/lib/http/form_data/composite_io_spec.rb new file mode 100644 index 0000000..46cfb7c --- /dev/null +++ b/spec/lib/http/form_data/composite_io_spec.rb @@ -0,0 +1,75 @@ +# frozen_string_literal: true + +RSpec.describe HTTP::FormData::CompositeIO do + let(:ios) { ["Hello", " ", "", "world", "!"].map { |string| StringIO.new(string) } } + subject(:composite_io) { HTTP::FormData::CompositeIO.new *ios } + + describe "#read" do + it "reads all data" do + expect(composite_io.read).to eq "Hello world!" + end + + it "reads partial data" do + expect(composite_io.read(3)).to eq "Hel" + expect(composite_io.read(2)).to eq "lo" + expect(composite_io.read(1)).to eq " " + expect(composite_io.read(6)).to eq "world!" + end + + it "returns empty string when no data was retrieved" do + composite_io.read + expect(composite_io.read).to eq "" + end + + it "returns nil when no partial data was retrieved" do + composite_io.read + expect(composite_io.read(3)).to eq nil + end + + it "reads partial data with a buffer" do + outbuf = String.new + expect(composite_io.read(3, outbuf)).to eq "Hel" + expect(composite_io.read(2, outbuf)).to eq "lo" + expect(composite_io.read(1, outbuf)).to eq " " + expect(composite_io.read(6, outbuf)).to eq "world!" + end + + it "fills the buffer with retrieved content" do + outbuf = String.new + composite_io.read(3, outbuf) + expect(outbuf).to eq "Hel" + composite_io.read(2, outbuf) + expect(outbuf).to eq "lo" + composite_io.read(1, outbuf) + expect(outbuf).to eq " " + composite_io.read(6, outbuf) + expect(outbuf).to eq "world!" + end + + it "returns nil when no partial data was retrieved with a buffer" do + outbuf = String.new("content") + composite_io.read + expect(composite_io.read(3, outbuf)).to eq nil + expect(outbuf).to eq "" + end + end + + describe "#rewind" do + it "rewinds all IOs" do + composite_io.read + composite_io.rewind + expect(composite_io.read).to eq "Hello world!" + end + end + + describe "#size" do + it "returns sum of all IO sizes" do + expect(composite_io.size).to eq 12 + end + + it "returns 0 when there are no IOs" do + empty_composite_io = HTTP::FormData::CompositeIO.new + expect(empty_composite_io.size).to eq 0 + end + end +end diff --git a/spec/lib/http/form_data/file_spec.rb b/spec/lib/http/form_data/file_spec.rb index 19280c4..59ecf1d 100644 --- a/spec/lib/http/form_data/file_spec.rb +++ b/spec/lib/http/form_data/file_spec.rb @@ -54,6 +54,76 @@ end end + describe "#read" do + subject { described_class.new(file, opts).read } + + context "when file given as a String" do + let(:file) { fixture("the-http-gem.info").to_s } + it { is_expected.to eq fixture("the-http-gem.info").read(:mode => "rb") } + end + + context "when file given as a Pathname" do + let(:file) { fixture("the-http-gem.info") } + it { is_expected.to eq fixture("the-http-gem.info").read(:mode => "rb") } + end + + context "when file given as File" do + let(:file) { fixture("the-http-gem.info").open("rb") } + after { file.close } + it { is_expected.to eq fixture("the-http-gem.info").read(:mode => "rb") } + end + + context "when file given as IO" do + let(:file) { StringIO.new "привет мир!" } + it { is_expected.to eq "привет мир!" } + end + end + + describe "#rewind" do + subject { described_class.new(file, opts) } + + context "when file given as a String" do + let(:file) { fixture("the-http-gem.info").to_s } + + it "rewinds the underlying IO object" do + subject.read + subject.rewind + expect(subject.read).to eq fixture("the-http-gem.info").read(:mode => "rb") + end + end + + context "when file given as a Pathname" do + let(:file) { fixture("the-http-gem.info") } + + it "rewinds the underlying IO object" do + subject.read + subject.rewind + expect(subject.read).to eq fixture("the-http-gem.info").read(:mode => "rb") + end + end + + context "when file given as File" do + let(:file) { fixture("the-http-gem.info").open("rb") } + after { file.close } + + it "rewinds the underlying IO object" do + subject.read + subject.rewind + expect(subject.read).to eq fixture("the-http-gem.info").read(:mode => "rb") + end + end + + context "when file given as IO" do + let(:file) { StringIO.new "привет мир!" } + + it "rewinds the underlying IO object" do + subject.read + subject.rewind + expect(subject.read).to eq "привет мир!" + end + end + end + describe "#filename" do subject { described_class.new(file, opts).filename } diff --git a/spec/lib/http/form_data/multipart_spec.rb b/spec/lib/http/form_data/multipart_spec.rb index bd95614..6647779 100644 --- a/spec/lib/http/form_data/multipart_spec.rb +++ b/spec/lib/http/form_data/multipart_spec.rb @@ -6,19 +6,6 @@ let(:boundary) { /-{21}[a-f0-9]{42}/ } subject(:form_data) { HTTP::FormData::Multipart.new params } - describe "#content_type" do - subject { form_data.content_type } - - let(:content_type) { %r{^multipart\/form-data; boundary=#{boundary}$} } - - it { is_expected.to match(content_type) } - end - - describe "#content_length" do - subject { form_data.content_length } - it { is_expected.to eq form_data.to_s.bytesize } - end - describe "#to_s" do def disposition(params) params = params.map { |k, v| "#{k}=#{v.inspect}" }.join("; ") @@ -36,14 +23,14 @@ def disposition(params) "#{crlf}bar#{crlf}", "--#{boundary_value}#{crlf}", "#{disposition 'name' => 'baz', 'filename' => file.filename}#{crlf}", - "Content-Type: #{file.mime_type}#{crlf}", + "Content-Type: #{file.content_type}#{crlf}", "#{crlf}#{file}#{crlf}", "--#{boundary_value}--" ].join("") end context "with filename set to nil" do - let(:part) { HTTP::FormData::Part.new("s", :filename => nil) } + let(:part) { HTTP::FormData::Part.new("s", :content_type => "text/plain") } let(:form_data) { HTTP::FormData::Multipart.new(:foo => part) } it "doesn't include a filename" do @@ -52,10 +39,60 @@ def disposition(params) expect(form_data.to_s).to eq [ "--#{boundary_value}#{crlf}", "#{disposition 'name' => 'foo'}#{crlf}", + "Content-Type: #{part.content_type}#{crlf}", "#{crlf}s#{crlf}", "--#{boundary_value}--" ].join("") end end + + context "with content type set to nil" do + let(:part) { HTTP::FormData::Part.new("s") } + let(:form_data) { HTTP::FormData::Multipart.new(:foo => part) } + + it "doesn't include a filename" do + boundary_value = form_data.content_type[/(#{boundary})$/, 1] + + expect(form_data.to_s).to eq [ + "--#{boundary_value}#{crlf}", + "#{disposition 'name' => 'foo'}#{crlf}", + "#{crlf}s#{crlf}", + "--#{boundary_value}--" + ].join("") + end + end + end + + describe "#size" do + it "returns bytesize of multipart data" do + expect(form_data.size).to eq form_data.to_s.bytesize + end + end + + describe "#read" do + it "returns multipart data" do + expect(form_data.read).to eq form_data.to_s + end + end + + describe "#rewind" do + it "rewinds the multipart data IO" do + form_data.read + form_data.rewind + expect(form_data.read).to eq form_data.to_s + end + end + + describe "#content_type" do + subject { form_data.content_type } + + let(:content_type) { %r{^multipart\/form-data; boundary=#{boundary}$} } + + it { is_expected.to match(content_type) } + end + + describe "#content_length" do + subject { form_data.content_length } + it { is_expected.to eq form_data.to_s.bytesize } end end diff --git a/spec/lib/http/form_data/part_spec.rb b/spec/lib/http/form_data/part_spec.rb index 32a5ce0..8c92b56 100644 --- a/spec/lib/http/form_data/part_spec.rb +++ b/spec/lib/http/form_data/part_spec.rb @@ -1,11 +1,12 @@ # frozen_string_literal: true RSpec.describe HTTP::FormData::Part do - let(:body) { "" } - let(:opts) { {} } + let(:body) { "" } + let(:opts) { {} } + subject(:part) { HTTP::FormData::Part.new(body, opts) } describe "#size" do - subject { described_class.new(body, opts).size } + subject { part.size } context "when body given as a String" do let(:body) { "привет мир!" } @@ -14,7 +15,7 @@ end describe "#to_s" do - subject { described_class.new(body, opts).to_s } + subject! { part.to_s } context "when body given as String" do let(:body) { "привет мир!" } @@ -22,8 +23,29 @@ end end + describe "#read" do + subject { part.read } + + context "when body given as String" do + let(:body) { "привет мир!" } + it { is_expected.to eq "привет мир!" } + end + end + + describe "#rewind" do + context "when body given as String" do + let(:body) { "привет мир!" } + + it "rewinds the underlying IO object" do + part.read + part.rewind + expect(part.read).to eq "привет мир!" + end + end + end + describe "#filename" do - subject { described_class.new(body, opts).filename } + subject { part.filename } it { is_expected.to eq nil } @@ -34,7 +56,7 @@ end describe "#content_type" do - subject { described_class.new(body, opts).content_type } + subject { part.content_type } it { is_expected.to eq nil } diff --git a/spec/lib/http/form_data_spec.rb b/spec/lib/http/form_data_spec.rb index 9798ab8..4d7d556 100644 --- a/spec/lib/http/form_data_spec.rb +++ b/spec/lib/http/form_data_spec.rb @@ -10,14 +10,14 @@ end context "when form has at least one file param" do - let(:gemspec) { HTTP::FormData::File.new "gemspec" } - let(:params) { { :foo => :bar, :baz => gemspec } } + let(:file) { HTTP::FormData::File.new(fixture("the-http-gem.info").to_s) } + let(:params) { { :foo => :bar, :baz => file } } it { is_expected.to be_a HTTP::FormData::Multipart } end context "when form has file in an array param" do - let(:gemspec) { HTTP::FormData::File.new "gemspec" } - let(:params) { { :foo => :bar, :baz => [gemspec] } } + let(:file) { HTTP::FormData::File.new(fixture("the-http-gem.info").to_s) } + let(:params) { { :foo => :bar, :baz => [file] } } it { is_expected.to be_a HTTP::FormData::Multipart } end end