From 54dee4ef96c79275fff3392743399e32896be7f0 Mon Sep 17 00:00:00 2001 From: Katharina Brunner Date: Tue, 22 Mar 2022 10:16:32 +0100 Subject: [PATCH] ex2_1: add yarrrrml to get r2rml file --- dat/student_sport.r2rml.ttl | 157 ++++++++++++++++++++++-------------- dat/student_sport.yml | 31 +++++++ examples/ex2_1.ipynb | 76 ++++++++++++++--- 3 files changed, 189 insertions(+), 75 deletions(-) create mode 100644 dat/student_sport.yml diff --git a/dat/student_sport.r2rml.ttl b/dat/student_sport.r2rml.ttl index ecd2671..b5e0cd5 100644 --- a/dat/student_sport.r2rml.ttl +++ b/dat/student_sport.r2rml.ttl @@ -1,63 +1,96 @@ -@prefix rr: . -@prefix foaf: . -@prefix ex: . -@prefix xsd: . -@base . +@prefix rr: . +@prefix rdf: . +@prefix rdfs: . +@prefix fnml: . +@prefix fno: . +@prefix d2rq: . +@prefix void: . +@prefix dc: . +@prefix foaf: . +@prefix : . +@prefix ns1: . - - a rr:TriplesMap; - - rr:logicalTable [ rr:tableName "Student"; ] ; - - rr:subjectMap [ - rr:template "http://example.com/student/{ID}"; - rr:class ex:Student; - ]; - - rr:predicateObjectMap - [ - rr:predicate ex:firstName ; - rr:objectMap [ rr:column "FirstName" ] - ]; - - rr:predicateObjectMap - [ - rr:predicate ex:lastName ; - rr:objectMap [ rr:column "LastName" ] - ] - . - - - a rr:TriplesMap; - - rr:logicalTable [ rr:tableName "Sport"; ] ; - - rr:subjectMap [ - rr:template "http://example.com/sport/{ID}"; - rr:class ex:Sport; - ]; - - rr:predicateObjectMap - [ - rr:predicate ex:id ; - rr:objectMap [ rr:column "ID"; ] - ]; - - rr:predicateObjectMap - [ - rr:predicate ex:description ; - rr:objectMap [ rr:column "Description" ] - ] - . - - - a rr:TriplesMap; - - rr:logicalTable [ rr:tableName "Student_Sport" ]; - - rr:subjectMap [ rr:template "http://example.com/student/{ID_Student}" ]; - - rr:predicateObjectMap [ - rr:predicate ex:plays ; - rr:objectMap [ rr:template "http://example.com/sport/{ID_Sport}" ]; - ]. +:rules_000 a void:Dataset. +:source_000 a rr:LogicalTable; + rdfs:label "students"; + rr:sqlQuery "select * from Student left join Student_Sport on Student.ID = Student_Sport.ID_Student left join Sport on Sport.ID = Student_Sport.ID_Sport;"; + rr:sqlVersion rr:MySQL. +:rules_000 void:exampleResource :map_student_000. +:map_student_000 rr:logicalTable :source_000; + a rr:TriplesMap; + rdfs:label "student". +:s_000 a rr:SubjectMap. +:map_student_000 rr:subjectMap :s_000. +:s_000 rr:template "http://example.com/{ID_Student}". +:pom_000 a rr:PredicateObjectMap. +:map_student_000 rr:predicateObjectMap :pom_000. +:pm_000 a rr:PredicateMap. +:pom_000 rr:predicateMap :pm_000. +:pm_000 rr:constant rdf:type. +:pom_000 rr:objectMap :om_000. +:om_000 a rr:ObjectMap; + rr:constant "http://example.com/Student"; + rr:termType rr:IRI. +:pom_001 a rr:PredicateObjectMap. +:map_student_000 rr:predicateObjectMap :pom_001. +:pm_001 a rr:PredicateMap. +:pom_001 rr:predicateMap :pm_001. +:pm_001 rr:constant ns1:firstName. +:pom_001 rr:objectMap :om_001. +:om_001 a rr:ObjectMap; + rr:column "FirstName"; + rr:termType rr:Literal. +:pom_002 a rr:PredicateObjectMap. +:map_student_000 rr:predicateObjectMap :pom_002. +:pm_002 a rr:PredicateMap. +:pom_002 rr:predicateMap :pm_002. +:pm_002 rr:constant ns1:lastName. +:pom_002 rr:objectMap :om_002. +:om_002 a rr:ObjectMap; + rr:column "LastName"; + rr:termType rr:Literal. +:pom_003 a rr:PredicateObjectMap. +:map_student_000 rr:predicateObjectMap :pom_003. +:pm_003 a rr:PredicateMap. +:pom_003 rr:predicateMap :pm_003. +:pm_003 rr:constant ns1:plays. +:pom_003 rr:objectMap :om_003. +:rules_000 void:exampleResource :map_sport_000. +:map_sport_000 rr:logicalTable :source_000; + a rr:TriplesMap; + rdfs:label "sport". +:s_001 a rr:SubjectMap. +:map_sport_000 rr:subjectMap :s_001. +:s_001 rr:template "http://example.com/{ID_Sport}". +:pom_004 a rr:PredicateObjectMap. +:map_sport_000 rr:predicateObjectMap :pom_004. +:pm_004 a rr:PredicateMap. +:pom_004 rr:predicateMap :pm_004. +:pm_004 rr:constant rdf:type. +:pom_004 rr:objectMap :om_004. +:om_004 a rr:ObjectMap; + rr:constant "http://example.com/Sport"; + rr:termType rr:IRI. +:pom_005 a rr:PredicateObjectMap. +:map_sport_000 rr:predicateObjectMap :pom_005. +:pm_005 a rr:PredicateMap. +:pom_005 rr:predicateMap :pm_005. +:pm_005 rr:constant ns1:description. +:pom_005 rr:objectMap :om_005. +:om_005 a rr:ObjectMap; + rr:column "Description"; + rr:termType rr:Literal. +:pom_006 a rr:PredicateObjectMap. +:map_sport_000 rr:predicateObjectMap :pom_006. +:pm_006 a rr:PredicateMap. +:pom_006 rr:predicateMap :pm_006. +:pm_006 rr:constant ns1:id. +:pom_006 rr:objectMap :om_006. +:om_006 a rr:ObjectMap; + rr:column "ID_Sport"; + rr:termType rr:Literal. +:om_003 a rr:ObjectMap; + rr:parentTriplesMap :map_sport_000; + rr:joinCondition :jc_000. +:jc_000 rr:child "ID_Sport"; + rr:parent "ID_Sport". diff --git a/dat/student_sport.yml b/dat/student_sport.yml new file mode 100644 index 0000000..f2a3cbe --- /dev/null +++ b/dat/student_sport.yml @@ -0,0 +1,31 @@ +prefixes: + ns1: "http://example.com/" + +sources: + students: + queryFormulation: mysql + query: select * from Student left join Student_Sport on Student.ID = Student_Sport.ID_Student left join Sport on Sport.ID = Student_Sport.ID_Sport; + +mappings: + student: + sources: students + s: http://example.com/$(ID_Student) + po: + - [a, ns1:Student] + - [ns1:firstName, $(FirstName)] + - [ns1:lastName, $(LastName)] + - p: ns1:plays + o: + - mapping: sport + condition: + function: equal + parameters: + - [str1, $(ID_Sport)] + - [str2, $(ID_Sport)] + sport: + sources: students + s: http://example.com/$(ID_Sport) + po: + - [a, ns1:Sport] + - [ns1:description, $(Description)] + - [ns1:id, $(ID_Sport)] \ No newline at end of file diff --git a/examples/ex2_1.ipynb b/examples/ex2_1.ipynb index ecda3a9..e354589 100644 --- a/examples/ex2_1.ipynb +++ b/examples/ex2_1.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -78,7 +78,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -103,6 +103,56 @@ "```" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This config references a `R2RML` mapping (`student_sport.r2rml.ttl`) that is applied to the input data. One way to create such a mapping is [YARRRML](https://rml.io/yarrrml/), \"a human-friendly text-based representation of RML rules\". You write the rules in YAML and then transform it with a yarrrml-parser to a `RML` or `R2RML` file. There is a [browser-based version](https://rml.io/yarrrml/matey/#) to get an idea how it works.\n", + "\n", + "The YARRRML file needed for this example can be found in `dat/` to use on the sample database:\n", + "\n", + "```YML\n", + "prefixes:\n", + " ns1: \"http://example.com/\"\n", + "\n", + "sources:\n", + " students:\n", + " queryFormulation: mysql\n", + " query: select * from Student left join Student_Sport on Student.ID = Student_Sport.ID_Student left join Sport on Sport.ID = Student_Sport.ID_Sport;\n", + "\n", + "mappings:\n", + " student:\n", + " sources: students\n", + " s: http://example.com/$(ID_Student)\n", + " po:\n", + " - [a, ns1:Student]\n", + " - [ns1:firstName, $(FirstName)]\n", + " - [ns1:lastName, $(LastName)]\n", + " - p: ns1:plays\n", + " o: \n", + " - mapping: sport\n", + " condition:\n", + " function: equal\n", + " parameters:\n", + " - [str1, $(ID_Sport)]\n", + " - [str2, $(ID_Sport)]\n", + " sport:\n", + " sources: students\n", + " s: http://example.com/$(ID_Sport)\n", + " po:\n", + " - [a, ns1:Sport]\n", + " - [ns1:description, $(Description)]\n", + " - [ns1:id, $(ID_Sport)]\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After using the yarrrml-parser with `yarrrml-parser -i student_sport.yml -o student_sport.r2rml.ttl -f R2RML` you get `students_sport.r2rml.ttl`. This is the file you refer in the configuration for `Morph-KGC`." + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -112,18 +162,18 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "INFO | 2022-02-27 12:15:21,403 | 7 mapping rules retrieved.\n", - "INFO | 2022-02-27 12:15:21,418 | Mapping partition with 1 groups generated.\n", - "INFO | 2022-02-27 12:15:21,419 | Maximum number of rules within mapping group: 7.\n", - "INFO | 2022-02-27 12:15:21,420 | Mappings processed in 1.739 seconds.\n", - "INFO | 2022-02-27 12:15:21,523 | Number of triples generated in total: 22.\n" + "INFO | 2022-03-22 10:14:42,617 | 7 mapping rules retrieved.\n", + "INFO | 2022-03-22 10:14:42,622 | Mapping partition with 1 groups generated.\n", + "INFO | 2022-03-22 10:14:42,622 | Maximum number of rules within mapping group: 7.\n", + "INFO | 2022-03-22 10:14:42,623 | Mappings processed in 0.131 seconds.\n", + "INFO | 2022-03-22 10:14:42,709 | Number of triples generated in total: 22.\n" ] } ], @@ -159,7 +209,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 18, "metadata": { "scrolled": true }, @@ -168,10 +218,10 @@ "name": "stderr", "output_type": "stream", "text": [ - "ic| student_name: 'Venus', sport_desc: 'Tennis'\n", - "ic| student_name: 'David', sport_desc: 'Football'\n", + "ic| student_name: 'Fernando', sport_desc: 'Formula1'\n", "ic| student_name: 'Fernando', sport_desc: 'Football'\n", - "ic| student_name: 'Fernando', sport_desc: 'Formula1'\n" + "ic| student_name: 'David', sport_desc: 'Football'\n", + "ic| student_name: 'Venus', sport_desc: 'Tennis'\n" ] } ], @@ -211,7 +261,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.9.10" } }, "nbformat": 4,