From 4583450e0c304c085f006fc57c66439714736ade Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Fri, 1 Jul 2022 11:24:34 +0200 Subject: [PATCH 1/2] extract execute_append_request function --- lib/goo/sparql/client.rb | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/lib/goo/sparql/client.rb b/lib/goo/sparql/client.rb index 35b60b5f..b9b33e2c 100644 --- a/lib/goo/sparql/client.rb +++ b/lib/goo/sparql/client.rb @@ -196,6 +196,35 @@ def status resp[:outstanding] = outstanding resp end + + private + + def execute_append_request(graph, data_file, mime_type_in) + mime_type = "text/turtle" + + if mime_type_in == "text/x-nquads" + mime_type = "text/x-nquads" + graph = "http://data.bogus.graph/uri" + end + + params = {method: :post, url: "#{url.to_s}", headers: {"content-type" => mime_type, "mime-type" => mime_type}, timeout: nil} + backend_name = Goo.sparql_backend_name + + if backend_name == BACKEND_4STORE + params[:payload] = { + graph: graph.to_s, + data: data_file, + 'mime-type' => mime_type + } + #for some reason \\\\ breaks parsing + params[:payload][:data] = params[:payload][:data].split("\n").map { |x| x.sub("\\\\","") }.join("\n") + else + params[:url] << "?context=#{CGI.escape("<#{graph.to_s}>")}" + params[:payload] = data_file + end + + RestClient::Request.execute(params) + end end end end From 225c14411b43761fc91e935dfe88b6930cf3e8bd Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Fri, 1 Jul 2022 11:26:17 +0200 Subject: [PATCH 2/2] change the append to triple store to be done by chunk of 500 000 lines --- lib/goo/sparql/client.rb | 41 ++++++++++++++-------------------------- 1 file changed, 14 insertions(+), 27 deletions(-) diff --git a/lib/goo/sparql/client.rb b/lib/goo/sparql/client.rb index b9b33e2c..8f7ad9e1 100644 --- a/lib/goo/sparql/client.rb +++ b/lib/goo/sparql/client.rb @@ -83,41 +83,28 @@ def delete_data_graph(graph) def append_triples_no_bnodes(graph,file_path,mime_type_in) bnodes_filter = nil dir = nil - - if file_path.end_with?("ttl") + response = nil + if file_path.end_with?('ttl') bnodes_filter = file_path else - bnodes_filter,dir = bnodes_filter_file(file_path,mime_type_in) + bnodes_filter, dir = bnodes_filter_file(file_path, mime_type_in) + end + chunk_lines = 500_000 # number of line + file = File.foreach(bnodes_filter) + lines = [] + file.each_entry do |line| + lines << line + if lines.size == chunk_lines + response = execute_append_request graph, lines.join, mime_type_in + lines.clear + end end - mime_type = "text/turtle" - if mime_type_in == "text/x-nquads" - mime_type = "text/x-nquads" - graph = "http://data.bogus.graph/uri" - end + response = execute_append_request graph, lines.join, mime_type_in unless lines.empty? - data_file = File.read(bnodes_filter) - params = {method: :post, url: "#{url.to_s}", headers: {"content-type" => mime_type, "mime-type" => mime_type}, timeout: nil} - backend_name = Goo.sparql_backend_name - - if backend_name == BACKEND_4STORE - params[:payload] = { - graph: graph.to_s, - data: data_file, - "mime-type" => mime_type - } - #for some reason \\\\ breaks parsing - params[:payload][:data] = params[:payload][:data].split("\n").map { |x| x.sub("\\\\","") }.join("\n") - else - params[:url] << "?context=#{CGI.escape("<#{graph.to_s}>")}" - params[:payload] = data_file - end - - response = RestClient::Request.execute(params) unless dir.nil? File.delete(bnodes_filter) - begin FileUtils.rm_rf(dir) rescue => e