Skip to content

Commit

Permalink
Avoid creating multiple streams
Browse files Browse the repository at this point in the history
  • Loading branch information
ollietulloch committed Apr 8, 2024
1 parent 5f25d20 commit 2fa0380
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 27 deletions.
8 changes: 4 additions & 4 deletions lib/ndr_import/file/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ def files
yield @filename
end

# This method iterates over the tables in the given file and yields with two arguments:
# a tablename and a row enumerator (for that table). For a spreadsheet it may yield for
# every worksheet in the file and for a CSV file it will only yield once (the entire
# file is one table).
# This method iterates over the tables in the given file and yields with three arguments:
# a tablename, a row enumerator (for that table) and any file metdata.
# For a spreadsheet it may yield for every worksheet in the file and for a CSV file it
# will only yield once (the entire file is one table).
#
# As single table files are in the majority, the Base implementation is defined for
# single table handlers and you will only need to implement the rows iterator. If your
Expand Down
43 changes: 24 additions & 19 deletions lib/ndr_import/file/xml.rb
Original file line number Diff line number Diff line change
Expand Up @@ -35,27 +35,32 @@ def slurp_metadata_values
def stream_metadata_values
return unless @xml_file_metadata.is_a?(Hash)

self.file_metadata = @xml_file_metadata.transform_values { |xpath| metadata_at(xpath) }
with_encoding_check(@filename) do |stream, encoding|
self.file_metadata = @xml_file_metadata.transform_values.with_index do |xpath, index|
# Ensure we're at the start of the stream each time
stream.rewind unless index.zero?

streamed_metadata(xpath, stream, encoding)
end
end
end

def metadata_at(xpath)
with_encoding_check(@filename) do |stream, encoding|
cursor = Cursor.new(xpath, false)

# If markup isn't well-formed, try to work around it:
options = Nokogiri::XML::ParseOptions::RECOVER
reader = Nokogiri::XML::Reader(stream, nil, encoding, options)

reader.each do |node|
case node.node_type
when Nokogiri::XML::Reader::TYPE_ELEMENT # "opening tag"
raise NestingError, node if cursor.in?(node)

cursor.enter(node)
return cursor.inner_text if cursor.send(:current_stack_match?)
when Nokogiri::XML::Reader::TYPE_END_ELEMENT # "closing tag"
cursor.leave(node)
end
def streamed_metadata(xpath, stream, encoding)
cursor = Cursor.new(xpath, false)

# If markup isn't well-formed, try to work around it:
options = Nokogiri::XML::ParseOptions::RECOVER
reader = Nokogiri::XML::Reader(stream, nil, encoding, options)

reader.each do |node|
case node.node_type
when Nokogiri::XML::Reader::TYPE_ELEMENT # "opening tag"
raise NestingError, node if cursor.in?(node)

cursor.enter(node)
return cursor.inner_text if cursor.send(:current_stack_match?)
when Nokogiri::XML::Reader::TYPE_END_ELEMENT # "closing tag"
cursor.leave(node)
end
end
end
Expand Down
6 changes: 2 additions & 4 deletions test/file/xml_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,7 @@ def setup
'submitting_providercode' => '//OrganisationIdentifierCodeOfSubmittingOrganisation/@extension'
}
}
handler = NdrImport::File::Xml.new(file_path, nil, options)
handler.expects(:each_node).never
handler = NdrImport::File::Xml.new(file_path, nil, options)
expected_metadata = { 'submitting_providercode' => 'LT4' }
assert_equal expected_metadata, handler.file_metadata

Expand All @@ -114,8 +113,7 @@ def setup
'record_count' => '//COSD:RecordCount/@value'
}
}
handler = NdrImport::File::Xml.new(file_path, nil, options)
handler.expects(:read_xml_file).never
handler = NdrImport::File::Xml.new(file_path, nil, options)
expected_metadata = { 'submitting_providercode' => 'LT4', 'record_count' => '6349923' }
assert_equal expected_metadata, handler.file_metadata

Expand Down

0 comments on commit 2fa0380

Please sign in to comment.