Skip to content

Commit

Permalink
Add a few writers
Browse files Browse the repository at this point in the history
  • Loading branch information
magbak committed Nov 24, 2024
1 parent b344b3a commit 30c0430
Show file tree
Hide file tree
Showing 7 changed files with 128 additions and 23 deletions.
2 changes: 1 addition & 1 deletion .cargo/config.toml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
[build]
jobs = 2
jobs = 4
24 changes: 12 additions & 12 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions lib/maplib/src/mapping.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,13 +212,14 @@ impl Mapping {
use_triplestore.insert_construct_result(dfs, transient)
}

pub fn write_n_triples(
pub fn write_triples(
&mut self,
buffer: &mut dyn Write,
graph: Option<NamedNode>,
rdf_format: RdfFormat,
) -> Result<(), MappingError> {
let triplestore = self.get_triplestore(&graph);
triplestore.write_ntriples(buffer).unwrap();
triplestore.write_triples(buffer, rdf_format).unwrap();
Ok(())
}

Expand Down
8 changes: 4 additions & 4 deletions lib/triplestore/src/triples_write.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
use super::Triplestore;
use crate::errors::TriplestoreError;
use oxttl::NTriplesSerializer;
use rayon::prelude::IntoParallelIterator;
use representation::polars_to_rdf::df_as_triples;
use std::io::Write;
use oxrdfio::{RdfFormat, RdfSerializer};

impl Triplestore {
pub fn write_ntriples(&self, buf: &mut dyn Write) -> Result<(), TriplestoreError> {
let mut writer = NTriplesSerializer::new().for_writer(buf);
pub fn write_triples(&self, buf:&mut dyn Write, format:RdfFormat) -> Result<(), TriplestoreError> {
let mut writer = RdfSerializer::from_format(format).for_writer(buf);

for (verb, df_map) in &self.df_map {
for ((subject_type, object_type), tt) in df_map {
Expand All @@ -20,6 +19,7 @@ impl Triplestore {
}
}
}
writer.finish().unwrap();
Ok(())
}
}
34 changes: 34 additions & 0 deletions py_maplib/maplib/maplib.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,7 @@ class Mapping:

def write_ntriples(self, file_path: Union[str, Path], graph: str = None) -> None:
"""
DEPRECATED: use write_triples with format="ntriples"
Write the non-transient triples to the file path specified in the NTriples format.
Usage:
Expand All @@ -535,8 +536,27 @@ class Mapping:
:param graph: The IRI of the graph to write.
"""

def write_triples(self,
file_path: Union[str, Path],
format=LiteralType["ntriples", "turtle", "rdf/xml"],
graph: str = None,
) -> None:
"""
Write the non-transient triples to the file path specified in the NTriples format.
Usage:
>>> m.write_triples("my_triples.nt", format="ntriples")
:param file_path: The path of the file containing triples
:param format: One of "ntriples", "turtle", "rdf/xml".
:param graph: The IRI of the graph to write.
"""


def write_ntriples_string(self, graph: str = None) -> str:
"""
DEPRECATED: use write_triples_string with format="ntriples"
Write the non-transient triples to a string in memory.
Usage:
Expand All @@ -547,6 +567,20 @@ class Mapping:
:return Triples in mapping in the NTriples format (potentially a large string)
"""

def write_triples_string(self, format=LiteralType["ntriples", "turtle", "rdf/xml"], graph: str = None) -> str:
"""
DEPRECATED: use write_triples_string with format="ntriples"
Write the non-transient triples to a string in memory.
Usage:
>>> s = m.write_ntriples_string(format="turtle")
:param format: One of "ntriples", "turtle", "rdf/xml".
:param graph: The IRI of the graph to write.
:return Triples in mapping in the NTriples format (potentially a large string)
"""

def write_native_parquet(
self, folder_path: Union[str, Path], graph: str = None
) -> None:
Expand Down
28 changes: 25 additions & 3 deletions py_maplib/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -454,25 +454,47 @@ impl PyMapping {
.map_err(PyMaplibError::from)?;
Ok(())
}
fn write_ntriples(&mut self,
file_path: &Bound<'_, PyAny>,
graph: Option<String>) -> PyResult<()> {
warn!("use write_triples with format=\"ntriples\" instead");
self.write_triples(file_path, Some("ntriples".to_string()), graph)
}

fn write_ntriples(
fn write_triples(
&mut self,
file_path: &Bound<'_, PyAny>,
format: Option<String>,
graph: Option<String>,
) -> PyResult<()> {
let format = if let Some(format) = format {
resolve_format(&format)
} else {
RdfFormat::NTriples
};
let file_path = file_path.str()?.to_string();
let path_buf = PathBuf::from(file_path);
let mut actual_file = File::create(path_buf.as_path())
.map_err(|x| PyMaplibError::from(MappingError::FileCreateIOError(x)))?;
let graph = parse_optional_graph(graph)?;
self.inner.write_n_triples(&mut actual_file, graph).unwrap();
self.inner.write_triples(&mut actual_file, graph, format).unwrap();
Ok(())
}

fn write_ntriples_string(&mut self, graph: Option<String>) -> PyResult<String> {
warn!("use write_triples_string with format=\"ntriples\" instead");
self.write_triples_string(Some("ntriples".to_string()), graph)
}

fn write_triples_string(&mut self, format: Option<String>, graph: Option<String>) -> PyResult<String> {
let format = if let Some(format) = format {
resolve_format(&format)
} else {
RdfFormat::NTriples
};
let mut out = vec![];
let graph = parse_optional_graph(graph)?;
self.inner.write_n_triples(&mut out, graph).unwrap();
self.inner.write_triples(&mut out, graph, format).unwrap();
Ok(String::from_utf8(out).unwrap())
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def test_read_write_ntriples_string():
with open(TESTDATA_PATH / "read_ntriples.nt") as f:
ntstring = f.read()
m.read_triples_string(ntstring, format="ntriples")
out_str = m.write_ntriples_string()
out_str = m.write_triples_string(format="ntriples")
m2 = Mapping()
m2.read_triples_string(out_str, format="ntriples")
res = m2.query(
Expand All @@ -72,3 +72,51 @@ def test_read_write_ntriples_string():
# res.write_csv(str(filename))
expected_df = pl.scan_csv(filename).select(["v", "o"]).sort(["v", "o"]).collect()
pl.testing.assert_frame_equal(res, expected_df)


def test_read_write_turtle_string():
m = Mapping()
with open(TESTDATA_PATH / "read_ntriples.nt") as f:
ntstring = f.read()
m.read_triples_string(ntstring, format="ntriples")
out_str = m.write_triples_string(format="turtle")
m2 = Mapping()
m2.read_triples_string(out_str, format="turtle")
res = m2.query(
"""
PREFIX foaf:<http://xmlns.com/foaf/0.1/>
SELECT ?v ?o WHERE {
?s ?v ?o .
} ORDER BY ?v ?o
"""
).sort(["v", "o"])
# TODO: Fix multitype sorting
filename = TESTDATA_PATH / "read_ntriples2.csv"
# res.write_csv(str(filename))
expected_df = pl.scan_csv(filename).select(["v", "o"]).sort(["v", "o"]).collect()
pl.testing.assert_frame_equal(res, expected_df)


def test_read_write_xml_string():
m = Mapping()
with open(TESTDATA_PATH / "read_ntriples.nt") as f:
ntstring = f.read()
m.read_triples_string(ntstring, format="ntriples")
out_str = m.write_triples_string(format="rdf/xml")
m2 = Mapping()
m2.read_triples_string(out_str, format="rdf/xml")
res = m2.query(
"""
PREFIX foaf:<http://xmlns.com/foaf/0.1/>
SELECT ?v ?o WHERE {
?s ?v ?o .
} ORDER BY ?v ?o
"""
).sort(["v", "o"])
# TODO: Fix multitype sorting
filename = TESTDATA_PATH / "read_ntriples2.csv"
# res.write_csv(str(filename))
expected_df = pl.scan_csv(filename).select(["v", "o"]).sort(["v", "o"]).collect()
pl.testing.assert_frame_equal(res, expected_df)

0 comments on commit 30c0430

Please sign in to comment.