From 1148c905ecac8defb51961b8760a276f6377711e Mon Sep 17 00:00:00 2001 From: Robert Casties Date: Thu, 10 Nov 2022 15:28:58 +0100 Subject: [PATCH 1/5] add ismi sample data (see #17) --- examples/ismi/README.md | 24 +++ examples/ismi/data/ismi-om4-date-samples.json | 186 ++++++++++++++++++ 2 files changed, 210 insertions(+) create mode 100644 examples/ismi/README.md create mode 100644 examples/ismi/data/ismi-om4-date-samples.json diff --git a/examples/ismi/README.md b/examples/ismi/README.md new file mode 100644 index 0000000..2340bc9 --- /dev/null +++ b/examples/ismi/README.md @@ -0,0 +1,24 @@ +# Sample data from the ISMI project database + +The [Islamic Scientific Manuscript Initiative project](https://ismi.mpwig-berlin.mpg.de) aims to collect information about all Islamic Manuscripts in the exact sciences from the 9th to the 19th centuries CE. + +The old [ISMI database](https://gitlab.gwdg.de/MPIWG/Department-II/ismi-project) database OpenMind (OM4) stores historical dates as JSON objects with the following structure: + +- `state` + - "unknown": no date + - "not checked": unparsed date in `date_in_text` + - "known": date or date range entered in specified calendar + - `calendar_type`: calendar the date was entered in + - "Julian", "Islamic", "Gregorian" + - `input_form`: date type + - "Year" + - `from`: first day, `until`: last day of year (dates in Gregorian calendar, ambiguity of +-2 days when entered in Islamic calendar) + - "Range" + - `from`: first day, `until`: last day of range (dates in Gregorian calendar, ambiguity of +-2 days when entered in Islamic calendar) + - "Date" + - `date`: given day (date in Gregorian calendar, ambiguity of +-2 days when entered in Islamic calendar) + - `additional_info`: textual note with additional information + +We plan to extract as much as possible of this data in the migration to the new RDF database with a CIDOC-CRM-based data model. + +A sample file with dates of each type can be found in `data/ismi-om4-date-samples.json` diff --git a/examples/ismi/data/ismi-om4-date-samples.json b/examples/ismi/data/ismi-om4-date-samples.json new file mode 100644 index 0000000..4300195 --- /dev/null +++ b/examples/ismi/data/ismi-om4-date-samples.json @@ -0,0 +1,186 @@ +[ + { + "state": "unknown" + }, + { + "date_in_text": "8th/14th century", + "state": "not checked" + }, + { + "additional_info": "year 1233 in Julian calendar", + "calendar_type": "Julian", + "from": { + "ambiguity": 0, + "century": 13, + "dayOfMonth": 8, + "dayOfYear": 8, + "month": 1, + "year": 1232 + }, + "input_form": "Year", + "state": "known", + "until": { + "ambiguity": 0, + "century": 13, + "dayOfMonth": 7, + "dayOfYear": 7, + "month": 1, + "year": 1233 + }, + "year": 1232 + }, + { + "additional_info": "year 1205 in Islamic calendar", + "calendar_type": "Islamic", + "from": { + "ambiguity": 2, + "century": 18, + "dayOfMonth": 10, + "dayOfYear": 253, + "month": 9, + "year": 1790 + }, + "input_form": "Year", + "state": "known", + "until": { + "ambiguity": 2, + "century": 18, + "dayOfMonth": 29, + "dayOfYear": 241, + "month": 8, + "year": 1791 + }, + "year": 1205 + }, + { + "additional_info": "year 1564 in Gregorian calendar", + "calendar_type": "Gregorian", + "from": { + "ambiguity": 0, + "century": 16, + "dayOfMonth": 1, + "dayOfYear": 1, + "month": 1, + "year": 1564 + }, + "input_form": "Year", + "state": "known", + "until": { + "ambiguity": 0, + "century": 16, + "dayOfMonth": 31, + "dayOfYear": 366, + "month": 12, + "year": 1564 + }, + "year": 1564 + }, + { + "additional_info": "3. Martius(3) 1481 (1481-03-03) in Julian calendar (1481-03-12 Gregorian)", + "calendar_type": "Julian", + "date": { + "ambiguity": 0, + "century": 15, + "dayOfMonth": 12, + "dayOfYear": 71, + "month": 3, + "year": 1481 + }, + "input_form": "Date", + "state": "known" + }, + { + "additional_info": "6. Muḥarram(1) 888 in Islamic calendar (1483-02-23[+-2] Gregorian)", + "calendar_type": "Islamic", + "date": { + "ambiguity": 2, + "century": 15, + "dayOfMonth": 23, + "dayOfYear": 54, + "month": 2, + "year": 1483 + }, + "input_form": "Date", + "state": "known" + }, + { + "additional_info": "1. September(9) 1621 in Gregorian calendar", + "calendar_type": "Gregorian", + "date": { + "ambiguity": 0, + "century": 17, + "dayOfMonth": 1, + "dayOfYear": 244, + "month": 9, + "year": 1621 + }, + "input_form": "Date", + "state": "known" + }, + { + "additional_info": "1. Ianuarius(1) 811 - 31. December(12) 811 in Julian calendar", + "calendar_type": "Julian", + "from": { + "ambiguity": 0, + "century": 9, + "dayOfMonth": 5, + "dayOfYear": 5, + "month": 1, + "year": 811 + }, + "input_form": "Range", + "state": "known", + "until": { + "ambiguity": 0, + "century": 9, + "dayOfMonth": 4, + "dayOfYear": 4, + "month": 1, + "year": 812 + } + }, + { + "additional_info": "1. Muḥarram(1) 1000 - 29. Ḏu al-Ḥijjaẗ(12) 1024 in Islamic calendar", + "calendar_type": "Islamic", + "from": { + "ambiguity": 2, + "century": 16, + "dayOfMonth": 19, + "dayOfYear": 292, + "month": 10, + "year": 1591 + }, + "input_form": "Range", + "state": "known", + "until": { + "ambiguity": 2, + "century": 17, + "dayOfMonth": 19, + "dayOfYear": 19, + "month": 1, + "year": 1616 + } + }, + { + "additional_info": "1650-01-01 - 1699-01-01 in Gregorian calendar", + "calendar_type": "Gregorian", + "from": { + "ambiguity": 0, + "century": 17, + "dayOfMonth": 1, + "dayOfYear": 1, + "month": 1, + "year": 1650 + }, + "input_form": "Range", + "state": "known", + "until": { + "ambiguity": 0, + "century": 17, + "dayOfMonth": 1, + "dayOfYear": 1, + "month": 1, + "year": 1699 + } + } +] From 24be8e835d2131b4b92e4239559bdf032fcb4e8c Mon Sep 17 00:00:00 2001 From: Robert Casties Date: Mon, 18 Nov 2024 19:07:38 +0100 Subject: [PATCH 2/5] update ISMI sample data and README with current CIDOC-CRM model. --- examples/ismi/README.md | 34 ++-- examples/ismi/data/ismi-crm-date-samples.ttl | 104 ++++++++++ examples/ismi/data/ismi-om4-date-samples.json | 186 ------------------ 3 files changed, 120 insertions(+), 204 deletions(-) create mode 100644 examples/ismi/data/ismi-crm-date-samples.ttl delete mode 100644 examples/ismi/data/ismi-om4-date-samples.json diff --git a/examples/ismi/README.md b/examples/ismi/README.md index 2340bc9..e18c797 100644 --- a/examples/ismi/README.md +++ b/examples/ismi/README.md @@ -2,23 +2,21 @@ The [Islamic Scientific Manuscript Initiative project](https://ismi.mpwig-berlin.mpg.de) aims to collect information about all Islamic Manuscripts in the exact sciences from the 9th to the 19th centuries CE. -The old [ISMI database](https://gitlab.gwdg.de/MPIWG/Department-II/ismi-project) database OpenMind (OM4) stores historical dates as JSON objects with the following structure: +The new [ISMI database](https://gitlab.gwdg.de/MPIWG/Department-II/ismi-project) stores historical dates as CIDOC-CRM RDF objects with the following structure: -- `state` - - "unknown": no date - - "not checked": unparsed date in `date_in_text` - - "known": date or date range entered in specified calendar - - `calendar_type`: calendar the date was entered in - - "Julian", "Islamic", "Gregorian" - - `input_form`: date type - - "Year" - - `from`: first day, `until`: last day of year (dates in Gregorian calendar, ambiguity of +-2 days when entered in Islamic calendar) - - "Range" - - `from`: first day, `until`: last day of range (dates in Gregorian calendar, ambiguity of +-2 days when entered in Islamic calendar) - - "Date" - - `date`: given day (date in Gregorian calendar, ambiguity of +-2 days when entered in Islamic calendar) - - `additional_info`: textual note with additional information +- `crm:E52_Time-Span` + - `crm:P2_has_type`: date type + - "datetype:day" + - `crm:P82_at_some_time_within`: given day (xsd:date) + - "datetype:year" + - `crm:P82a_begin_of_the_begin`: first day (xsd:date), `crm:P82b_end_of_the_end`: last day of year (xsd:date) + - "datetype:range" + - `crm:P82a_begin_of_the_begin`: first day (xsd:date), `crm:P82b_end_of_the_end`: last day of range (xsd:date) + - `crm:P1_is_identified_by` + - `crm:E41_Appellation` + - `rdfs:label`: textual representation of timespan (e.g. "901 Rabīʿ I 14 (islamic)") + - `crm:P2_has_type`: calendar type (calendar the date was entered in) + - "calendartype:julian", "calendartype:islamic", "calendartype:gregorian" + - `crm:P3_has_note`: textual note with additional information -We plan to extract as much as possible of this data in the migration to the new RDF database with a CIDOC-CRM-based data model. - -A sample file with dates of each type can be found in `data/ismi-om4-date-samples.json` +A sample file with dates of each type can be found in `data/ismi-crm-date-samples.ttl` diff --git a/examples/ismi/data/ismi-crm-date-samples.ttl b/examples/ismi/data/ismi-crm-date-samples.ttl new file mode 100644 index 0000000..4c5a115 --- /dev/null +++ b/examples/ismi/data/ismi-crm-date-samples.ttl @@ -0,0 +1,104 @@ +@prefix rdfs: . +@prefix crm: . +@prefix xsd: . +# prefix for date and calendar type URIs +@prefix datetype: . +@prefix calendartype: . +# prefix for sample data +@prefix : . + +# day-precision date in islamic calendar +:date1 a crm:E52_Time-Span ; + crm:P2_has_type datetype:day ; + crm:P82_at_some_time_within "1495-12-11"^^xsd:date ; + crm:P3_has_note "day-precision date in islamic calendar" ; + crm:P1_is_identified_by :date1-label . +:date1-label a crm:E41_Appellation ; + crm:P2_has_type calendartype:islamic ; + rdfs:label "901 Rabīʿ I 14 (islamic)" . + +# year-precision date in islamic calendar +:date2 a crm:E52_Time-Span ; + crm:P2_has_type datetype:year ; + crm:P82a_begin_of_the_begin "1479-04-03"^^xsd:date ; + crm:P82b_end_of_the_end "1480-03-21"^^xsd:date ; + crm:P3_has_note "year-precision date in islamic calendar" ; + crm:P1_is_identified_by :date2-label . +:date2-label a crm:E41_Appellation ; + crm:P2_has_type calendartype:islamic ; + rdfs:label "884 (islamic)" . + +# range-type (century in islamic calendar) date in islamic calendar +:date3 a crm:E52_Time-Span ; + crm:P2_has_type datetype:range ; + crm:P82a_begin_of_the_begin "1494-10-11"^^xsd:date ; + crm:P82b_end_of_the_end "1591-10-18"^^xsd:date ; + crm:P3_has_note "range-type (century in islamic calendar) date in islamic calendar" ; + crm:P1_is_identified_by :date3-label . +:date3-label a crm:E41_Appellation ; + crm:P2_has_type calendartype:islamic ; + rdfs:label "900 Muḥarram 1 - 999 Ḏu al-Ḥijjaẗ 29 (islamic)" . + +# day-precision date in gregorian calendar +:date4 a crm:E52_Time-Span ; + crm:P2_has_type datetype:day ; + crm:P82_at_some_time_within "1830-02-08"^^xsd:date ; + crm:P3_has_note "day-precision date in gregorian calendar" ; + crm:P1_is_identified_by :date4-label . +:date4-label a crm:E41_Appellation ; + crm:P2_has_type calendartype:gregorian ; + rdfs:label "1830 February 8 (gregorian)" . + +# year-precision date in gregorian calendar +:date5 a crm:E52_Time-Span ; + crm:P2_has_type datetype:year ; + crm:P82a_begin_of_the_begin "1796-01-01"^^xsd:date ; + crm:P82b_end_of_the_end "1796-12-31"^^xsd:date ; + crm:P3_has_note "year-precision date in gregorian calendar" ; + crm:P1_is_identified_by :date5-label . +:date5-label a crm:E41_Appellation ; + crm:P2_has_type calendartype:gregorian ; + rdfs:label "1796 (gregorian)" . + +# range-type (century in gregorian calendar) date in gregorian calendar +:date6 a crm:E52_Time-Span ; + crm:P2_has_type datetype:range ; + crm:P82a_begin_of_the_begin "1600-01-01"^^xsd:date ; + crm:P82b_end_of_the_end "1699-12-31"^^xsd:date ; + crm:P3_has_note "range-type (century in gregorian calendar) date in gregorian calendar" ; + crm:P1_is_identified_by :date6-label . +:date6-label a crm:E41_Appellation ; + crm:P2_has_type calendartype:gregorian ; + rdfs:label "1600 January 1 - 1699 December 31 (gregorian)" . + +# day-precision date in julian calendar +:date7 a crm:E52_Time-Span ; + crm:P2_has_type datetype:day ; + crm:P82_at_some_time_within "1035-06-04"^^xsd:date ; + crm:P3_has_note "day-precision date in julian calendar" ; + crm:P1_is_identified_by :date7-label . +:date7-label a crm:E41_Appellation ; + crm:P2_has_type calendartype:julian ; + rdfs:label "1035 May 29 (julian)" . + +# year-precision date in julian calendar +:date8 a crm:E52_Time-Span ; + crm:P2_has_type datetype:year ; + crm:P82a_begin_of_the_begin "1013-01-07"^^xsd:date ; + crm:P82b_end_of_the_end "1014-01-06"^^xsd:date ; + crm:P3_has_note "year-precision date in julian calendar" ; + crm:P1_is_identified_by :date8-label . +:date8-label a crm:E41_Appellation ; + crm:P2_has_type calendartype:julian ; + rdfs:label "1013 (julian)" . + +# range-type (century in julian calendar) date in julian calendar +:date9 a crm:E52_Time-Span ; + crm:P2_has_type datetype:range ; + crm:P82a_begin_of_the_begin "1200-01-08"^^xsd:date ; + crm:P82b_end_of_the_end "1300-01-07"^^xsd:date ; + crm:P3_has_note "range-type (century in julian calendar) date in julian calendar" ; + crm:P1_is_identified_by :date9-label . +:date9-label a crm:E41_Appellation ; + crm:P2_has_type calendartype:julian ; + rdfs:label "1200 January 1 - 1299 December 31 (julian)" . diff --git a/examples/ismi/data/ismi-om4-date-samples.json b/examples/ismi/data/ismi-om4-date-samples.json deleted file mode 100644 index 4300195..0000000 --- a/examples/ismi/data/ismi-om4-date-samples.json +++ /dev/null @@ -1,186 +0,0 @@ -[ - { - "state": "unknown" - }, - { - "date_in_text": "8th/14th century", - "state": "not checked" - }, - { - "additional_info": "year 1233 in Julian calendar", - "calendar_type": "Julian", - "from": { - "ambiguity": 0, - "century": 13, - "dayOfMonth": 8, - "dayOfYear": 8, - "month": 1, - "year": 1232 - }, - "input_form": "Year", - "state": "known", - "until": { - "ambiguity": 0, - "century": 13, - "dayOfMonth": 7, - "dayOfYear": 7, - "month": 1, - "year": 1233 - }, - "year": 1232 - }, - { - "additional_info": "year 1205 in Islamic calendar", - "calendar_type": "Islamic", - "from": { - "ambiguity": 2, - "century": 18, - "dayOfMonth": 10, - "dayOfYear": 253, - "month": 9, - "year": 1790 - }, - "input_form": "Year", - "state": "known", - "until": { - "ambiguity": 2, - "century": 18, - "dayOfMonth": 29, - "dayOfYear": 241, - "month": 8, - "year": 1791 - }, - "year": 1205 - }, - { - "additional_info": "year 1564 in Gregorian calendar", - "calendar_type": "Gregorian", - "from": { - "ambiguity": 0, - "century": 16, - "dayOfMonth": 1, - "dayOfYear": 1, - "month": 1, - "year": 1564 - }, - "input_form": "Year", - "state": "known", - "until": { - "ambiguity": 0, - "century": 16, - "dayOfMonth": 31, - "dayOfYear": 366, - "month": 12, - "year": 1564 - }, - "year": 1564 - }, - { - "additional_info": "3. Martius(3) 1481 (1481-03-03) in Julian calendar (1481-03-12 Gregorian)", - "calendar_type": "Julian", - "date": { - "ambiguity": 0, - "century": 15, - "dayOfMonth": 12, - "dayOfYear": 71, - "month": 3, - "year": 1481 - }, - "input_form": "Date", - "state": "known" - }, - { - "additional_info": "6. Muḥarram(1) 888 in Islamic calendar (1483-02-23[+-2] Gregorian)", - "calendar_type": "Islamic", - "date": { - "ambiguity": 2, - "century": 15, - "dayOfMonth": 23, - "dayOfYear": 54, - "month": 2, - "year": 1483 - }, - "input_form": "Date", - "state": "known" - }, - { - "additional_info": "1. September(9) 1621 in Gregorian calendar", - "calendar_type": "Gregorian", - "date": { - "ambiguity": 0, - "century": 17, - "dayOfMonth": 1, - "dayOfYear": 244, - "month": 9, - "year": 1621 - }, - "input_form": "Date", - "state": "known" - }, - { - "additional_info": "1. Ianuarius(1) 811 - 31. December(12) 811 in Julian calendar", - "calendar_type": "Julian", - "from": { - "ambiguity": 0, - "century": 9, - "dayOfMonth": 5, - "dayOfYear": 5, - "month": 1, - "year": 811 - }, - "input_form": "Range", - "state": "known", - "until": { - "ambiguity": 0, - "century": 9, - "dayOfMonth": 4, - "dayOfYear": 4, - "month": 1, - "year": 812 - } - }, - { - "additional_info": "1. Muḥarram(1) 1000 - 29. Ḏu al-Ḥijjaẗ(12) 1024 in Islamic calendar", - "calendar_type": "Islamic", - "from": { - "ambiguity": 2, - "century": 16, - "dayOfMonth": 19, - "dayOfYear": 292, - "month": 10, - "year": 1591 - }, - "input_form": "Range", - "state": "known", - "until": { - "ambiguity": 2, - "century": 17, - "dayOfMonth": 19, - "dayOfYear": 19, - "month": 1, - "year": 1616 - } - }, - { - "additional_info": "1650-01-01 - 1699-01-01 in Gregorian calendar", - "calendar_type": "Gregorian", - "from": { - "ambiguity": 0, - "century": 17, - "dayOfMonth": 1, - "dayOfYear": 1, - "month": 1, - "year": 1650 - }, - "input_form": "Range", - "state": "known", - "until": { - "ambiguity": 0, - "century": 17, - "dayOfMonth": 1, - "dayOfYear": 1, - "month": 1, - "year": 1699 - } - } -] From 0e36e45cdd809c7cd910ec59c8d7ba2845bd2f4c Mon Sep 17 00:00:00 2001 From: Robert Casties Date: Mon, 18 Nov 2024 19:12:49 +0100 Subject: [PATCH 3/5] move to new directory. --- examples/ismi/README.md | 22 --- examples/use-cases/ismi/README.md | 34 ++-- .../ismi/data/ismi-crm-date-samples.ttl | 0 .../ismi/data/ismi-om4-date-samples.json | 186 ------------------ 4 files changed, 16 insertions(+), 226 deletions(-) delete mode 100644 examples/ismi/README.md rename examples/{ => use-cases}/ismi/data/ismi-crm-date-samples.ttl (100%) delete mode 100644 examples/use-cases/ismi/data/ismi-om4-date-samples.json diff --git a/examples/ismi/README.md b/examples/ismi/README.md deleted file mode 100644 index e18c797..0000000 --- a/examples/ismi/README.md +++ /dev/null @@ -1,22 +0,0 @@ -# Sample data from the ISMI project database - -The [Islamic Scientific Manuscript Initiative project](https://ismi.mpwig-berlin.mpg.de) aims to collect information about all Islamic Manuscripts in the exact sciences from the 9th to the 19th centuries CE. - -The new [ISMI database](https://gitlab.gwdg.de/MPIWG/Department-II/ismi-project) stores historical dates as CIDOC-CRM RDF objects with the following structure: - -- `crm:E52_Time-Span` - - `crm:P2_has_type`: date type - - "datetype:day" - - `crm:P82_at_some_time_within`: given day (xsd:date) - - "datetype:year" - - `crm:P82a_begin_of_the_begin`: first day (xsd:date), `crm:P82b_end_of_the_end`: last day of year (xsd:date) - - "datetype:range" - - `crm:P82a_begin_of_the_begin`: first day (xsd:date), `crm:P82b_end_of_the_end`: last day of range (xsd:date) - - `crm:P1_is_identified_by` - - `crm:E41_Appellation` - - `rdfs:label`: textual representation of timespan (e.g. "901 Rabīʿ I 14 (islamic)") - - `crm:P2_has_type`: calendar type (calendar the date was entered in) - - "calendartype:julian", "calendartype:islamic", "calendartype:gregorian" - - `crm:P3_has_note`: textual note with additional information - -A sample file with dates of each type can be found in `data/ismi-crm-date-samples.ttl` diff --git a/examples/use-cases/ismi/README.md b/examples/use-cases/ismi/README.md index 2340bc9..e18c797 100644 --- a/examples/use-cases/ismi/README.md +++ b/examples/use-cases/ismi/README.md @@ -2,23 +2,21 @@ The [Islamic Scientific Manuscript Initiative project](https://ismi.mpwig-berlin.mpg.de) aims to collect information about all Islamic Manuscripts in the exact sciences from the 9th to the 19th centuries CE. -The old [ISMI database](https://gitlab.gwdg.de/MPIWG/Department-II/ismi-project) database OpenMind (OM4) stores historical dates as JSON objects with the following structure: +The new [ISMI database](https://gitlab.gwdg.de/MPIWG/Department-II/ismi-project) stores historical dates as CIDOC-CRM RDF objects with the following structure: -- `state` - - "unknown": no date - - "not checked": unparsed date in `date_in_text` - - "known": date or date range entered in specified calendar - - `calendar_type`: calendar the date was entered in - - "Julian", "Islamic", "Gregorian" - - `input_form`: date type - - "Year" - - `from`: first day, `until`: last day of year (dates in Gregorian calendar, ambiguity of +-2 days when entered in Islamic calendar) - - "Range" - - `from`: first day, `until`: last day of range (dates in Gregorian calendar, ambiguity of +-2 days when entered in Islamic calendar) - - "Date" - - `date`: given day (date in Gregorian calendar, ambiguity of +-2 days when entered in Islamic calendar) - - `additional_info`: textual note with additional information +- `crm:E52_Time-Span` + - `crm:P2_has_type`: date type + - "datetype:day" + - `crm:P82_at_some_time_within`: given day (xsd:date) + - "datetype:year" + - `crm:P82a_begin_of_the_begin`: first day (xsd:date), `crm:P82b_end_of_the_end`: last day of year (xsd:date) + - "datetype:range" + - `crm:P82a_begin_of_the_begin`: first day (xsd:date), `crm:P82b_end_of_the_end`: last day of range (xsd:date) + - `crm:P1_is_identified_by` + - `crm:E41_Appellation` + - `rdfs:label`: textual representation of timespan (e.g. "901 Rabīʿ I 14 (islamic)") + - `crm:P2_has_type`: calendar type (calendar the date was entered in) + - "calendartype:julian", "calendartype:islamic", "calendartype:gregorian" + - `crm:P3_has_note`: textual note with additional information -We plan to extract as much as possible of this data in the migration to the new RDF database with a CIDOC-CRM-based data model. - -A sample file with dates of each type can be found in `data/ismi-om4-date-samples.json` +A sample file with dates of each type can be found in `data/ismi-crm-date-samples.ttl` diff --git a/examples/ismi/data/ismi-crm-date-samples.ttl b/examples/use-cases/ismi/data/ismi-crm-date-samples.ttl similarity index 100% rename from examples/ismi/data/ismi-crm-date-samples.ttl rename to examples/use-cases/ismi/data/ismi-crm-date-samples.ttl diff --git a/examples/use-cases/ismi/data/ismi-om4-date-samples.json b/examples/use-cases/ismi/data/ismi-om4-date-samples.json deleted file mode 100644 index 4300195..0000000 --- a/examples/use-cases/ismi/data/ismi-om4-date-samples.json +++ /dev/null @@ -1,186 +0,0 @@ -[ - { - "state": "unknown" - }, - { - "date_in_text": "8th/14th century", - "state": "not checked" - }, - { - "additional_info": "year 1233 in Julian calendar", - "calendar_type": "Julian", - "from": { - "ambiguity": 0, - "century": 13, - "dayOfMonth": 8, - "dayOfYear": 8, - "month": 1, - "year": 1232 - }, - "input_form": "Year", - "state": "known", - "until": { - "ambiguity": 0, - "century": 13, - "dayOfMonth": 7, - "dayOfYear": 7, - "month": 1, - "year": 1233 - }, - "year": 1232 - }, - { - "additional_info": "year 1205 in Islamic calendar", - "calendar_type": "Islamic", - "from": { - "ambiguity": 2, - "century": 18, - "dayOfMonth": 10, - "dayOfYear": 253, - "month": 9, - "year": 1790 - }, - "input_form": "Year", - "state": "known", - "until": { - "ambiguity": 2, - "century": 18, - "dayOfMonth": 29, - "dayOfYear": 241, - "month": 8, - "year": 1791 - }, - "year": 1205 - }, - { - "additional_info": "year 1564 in Gregorian calendar", - "calendar_type": "Gregorian", - "from": { - "ambiguity": 0, - "century": 16, - "dayOfMonth": 1, - "dayOfYear": 1, - "month": 1, - "year": 1564 - }, - "input_form": "Year", - "state": "known", - "until": { - "ambiguity": 0, - "century": 16, - "dayOfMonth": 31, - "dayOfYear": 366, - "month": 12, - "year": 1564 - }, - "year": 1564 - }, - { - "additional_info": "3. Martius(3) 1481 (1481-03-03) in Julian calendar (1481-03-12 Gregorian)", - "calendar_type": "Julian", - "date": { - "ambiguity": 0, - "century": 15, - "dayOfMonth": 12, - "dayOfYear": 71, - "month": 3, - "year": 1481 - }, - "input_form": "Date", - "state": "known" - }, - { - "additional_info": "6. Muḥarram(1) 888 in Islamic calendar (1483-02-23[+-2] Gregorian)", - "calendar_type": "Islamic", - "date": { - "ambiguity": 2, - "century": 15, - "dayOfMonth": 23, - "dayOfYear": 54, - "month": 2, - "year": 1483 - }, - "input_form": "Date", - "state": "known" - }, - { - "additional_info": "1. September(9) 1621 in Gregorian calendar", - "calendar_type": "Gregorian", - "date": { - "ambiguity": 0, - "century": 17, - "dayOfMonth": 1, - "dayOfYear": 244, - "month": 9, - "year": 1621 - }, - "input_form": "Date", - "state": "known" - }, - { - "additional_info": "1. Ianuarius(1) 811 - 31. December(12) 811 in Julian calendar", - "calendar_type": "Julian", - "from": { - "ambiguity": 0, - "century": 9, - "dayOfMonth": 5, - "dayOfYear": 5, - "month": 1, - "year": 811 - }, - "input_form": "Range", - "state": "known", - "until": { - "ambiguity": 0, - "century": 9, - "dayOfMonth": 4, - "dayOfYear": 4, - "month": 1, - "year": 812 - } - }, - { - "additional_info": "1. Muḥarram(1) 1000 - 29. Ḏu al-Ḥijjaẗ(12) 1024 in Islamic calendar", - "calendar_type": "Islamic", - "from": { - "ambiguity": 2, - "century": 16, - "dayOfMonth": 19, - "dayOfYear": 292, - "month": 10, - "year": 1591 - }, - "input_form": "Range", - "state": "known", - "until": { - "ambiguity": 2, - "century": 17, - "dayOfMonth": 19, - "dayOfYear": 19, - "month": 1, - "year": 1616 - } - }, - { - "additional_info": "1650-01-01 - 1699-01-01 in Gregorian calendar", - "calendar_type": "Gregorian", - "from": { - "ambiguity": 0, - "century": 17, - "dayOfMonth": 1, - "dayOfYear": 1, - "month": 1, - "year": 1650 - }, - "input_form": "Range", - "state": "known", - "until": { - "ambiguity": 0, - "century": 17, - "dayOfMonth": 1, - "dayOfYear": 1, - "month": 1, - "year": 1699 - } - } -] From f0ee32ce62aaed14f22b9ccd74f27e0c08bfb522 Mon Sep 17 00:00:00 2001 From: Robert Casties Date: Thu, 21 Nov 2024 13:59:58 +0100 Subject: [PATCH 4/5] add notebook with first examples of converting ismi dates to undate. --- examples/use-cases/ismi/ismi-dates.ipynb | 179 +++++++++++++++++++++++ examples/use-cases/ismi/requirements.txt | 2 + 2 files changed, 181 insertions(+) create mode 100644 examples/use-cases/ismi/ismi-dates.ipynb create mode 100644 examples/use-cases/ismi/requirements.txt diff --git a/examples/use-cases/ismi/ismi-dates.ipynb b/examples/use-cases/ismi/ismi-dates.ipynb new file mode 100644 index 0000000..098fbfd --- /dev/null +++ b/examples/use-cases/ismi/ismi-dates.ipynb @@ -0,0 +1,179 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0fa36628-ccf2-4977-8c4c-e0a85e2b37b6", + "metadata": {}, + "source": [ + "# Working with ISMI project dates" + ] + }, + { + "cell_type": "markdown", + "id": "ffd4b544-8957-494e-9e09-b703d68bb7df", + "metadata": {}, + "source": [ + "## Load date samples from RDF" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a8a36e7f-6057-44d1-8466-6709910d4249", + "metadata": {}, + "outputs": [], + "source": [ + "from rdflib import Graph, RDF, URIRef\n", + "from rdflib.namespace import Namespace, RDFS\n", + "from undate.undate import Undate\n", + "\n", + "# additional RDF namespaces\n", + "crmNs = Namespace('http://www.cidoc-crm.org/cidoc-crm/')\n", + "\n", + "g = Graph()\n", + "g.bind('crm', crmNs)\n", + "# load ISMI RDF sample data\n", + "g.parse('data/ismi-crm-date-samples.ttl')\n", + "# check: number of triples\n", + "len(g)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c940ca2b-b369-4511-8dc9-420bdaeb3e65", + "metadata": {}, + "outputs": [], + "source": [ + "date_uris = [u for u in g.subjects(RDF.type, crmNs['E52_Time-Span'])]\n", + "\n", + "for uri in date_uris:\n", + " q = '''SELECT ?uri ?label ?note \n", + " WHERE { \n", + " ?uri crm:P3_has_note ?note ;\n", + " crm:P1_is_identified_by / rdfs:label ?label .\n", + " } limit 10'''\n", + " res = g.query(q, initBindings={'uri': uri})\n", + " for r in res:\n", + " print(f\"uri={str(uri)} label={r.label} note={r.note}\")" + ] + }, + { + "cell_type": "markdown", + "id": "16361060-657f-431c-b70f-9101d550aa38", + "metadata": {}, + "source": [ + "## Convert RDF dates to Undate" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e443b974-930b-4a5d-8f21-641b4556b159", + "metadata": {}, + "outputs": [], + "source": [ + "from undate.date import DatePrecision, Date\n", + "import datetime\n", + "\n", + "uri = date_uris[1]\n", + "\n", + "#\n", + "# read date type\n", + "#\n", + "date_type = None\n", + "for date_type_uri in g.objects(uri, crmNs.P2_has_type):\n", + " for dt in ['day', 'year', 'range']:\n", + " if str(date_type_uri) == 'http://content.mpiwg-berlin.mpg.de/ns/ismi/type/date/' + dt:\n", + " date_type = dt\n", + "\n", + "if not date_type:\n", + " raise RuntimeError(f\"Unknown datetype URI {date_type_uri}\")\n", + "\n", + "#\n", + "# read label and calendar\n", + "#\n", + "date_label_uri = next(g.objects(uri, crmNs.P1_is_identified_by))\n", + "date_label = str(next(g.objects(date_label_uri, RDFS.label)))\n", + "for date_label_calendar_uri in g.objects(date_label_uri, crmNs.P2_has_type):\n", + " for ct in ['gregorian', 'julian', 'islamic']:\n", + " if str(date_label_calendar_uri) == 'http://content.mpiwg-berlin.mpg.de/ns/ismi/type/calendar/' + ct:\n", + " calendar_type = ct\n", + "\n", + "if not calendar_type:\n", + " raise RuntimeError(f\"Unknown calendar type URI {date_label_calendar_uri}\")\n", + "\n", + "#\n", + "# create undate\n", + "#\n", + "if date_type == 'day':\n", + " xsd_date = next(g.objects(uri, crmNs.P82_at_some_time_within))\n", + " date = Undate.parse(str(xsd_date), 'ISO8601')\n", + " date.precision = DatePrecision.DAY\n", + " date.label = date_label\n", + "\n", + "if date_type == 'year':\n", + " xsd_date_from = next(g.objects(uri, crmNs.P82a_begin_of_the_begin))\n", + " xsd_date_until = next(g.objects(uri, crmNs.P82b_end_of_the_end))\n", + " date_from = datetime.date.fromisoformat(str(xsd_date_from))\n", + " if calendar_type == 'gregorian':\n", + " # this should be fine\n", + " date = Undate(year=date_from.year)\n", + "\n", + " else:\n", + " # create day precision Undate from end date\n", + " date = Undate.parse(str(xsd_date_until), 'ISO8601')\n", + " # change earliest date\n", + " date.earliest = Date(year=date_from.year, month=date_from.month, day=date_from.day)\n", + "\n", + " # change precision and label\n", + " date.precision = DatePrecision.DAY\n", + " date.label = date_label\n", + "\n", + "if date_type == 'range':\n", + " xsd_date_from = next(g.objects(uri, crmNs.P82a_begin_of_the_begin))\n", + " xsd_date_until = next(g.objects(uri, crmNs.P82b_end_of_the_end))\n", + " # create day precision Undate from start date\n", + " date = Undate.parse(str(xsd_date_from), 'ISO8601')\n", + " # change latest date\n", + " date_until = datetime.date.fromisoformat(str(xsd_date_until))\n", + " date.latest = Date(year=date_until.year, month=date_until.month, day=date_until.day)\n", + " # change precision and label\n", + " date.precision = DatePrecision.DAY\n", + " date.label = date_label\n", + "\n", + "\n", + "print(f\"{date_label=} {date_type=} {calendar_type=} {date=}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "742ba275-7de6-461b-8891-6f06dbdd89a0", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/use-cases/ismi/requirements.txt b/examples/use-cases/ismi/requirements.txt new file mode 100644 index 0000000..0d277bc --- /dev/null +++ b/examples/use-cases/ismi/requirements.txt @@ -0,0 +1,2 @@ +jupyterlab~=4.3.1 +rdflib~=7.1.1 From b994eb2b155d16dcf98122112148a6d60c379cfb Mon Sep 17 00:00:00 2001 From: Robert Casties Date: Fri, 22 Nov 2024 13:51:53 +0100 Subject: [PATCH 5/5] move notebook to separate branch --- examples/use-cases/ismi/ismi-dates.ipynb | 179 ----------------------- examples/use-cases/ismi/requirements.txt | 2 - 2 files changed, 181 deletions(-) delete mode 100644 examples/use-cases/ismi/ismi-dates.ipynb delete mode 100644 examples/use-cases/ismi/requirements.txt diff --git a/examples/use-cases/ismi/ismi-dates.ipynb b/examples/use-cases/ismi/ismi-dates.ipynb deleted file mode 100644 index 098fbfd..0000000 --- a/examples/use-cases/ismi/ismi-dates.ipynb +++ /dev/null @@ -1,179 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "0fa36628-ccf2-4977-8c4c-e0a85e2b37b6", - "metadata": {}, - "source": [ - "# Working with ISMI project dates" - ] - }, - { - "cell_type": "markdown", - "id": "ffd4b544-8957-494e-9e09-b703d68bb7df", - "metadata": {}, - "source": [ - "## Load date samples from RDF" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a8a36e7f-6057-44d1-8466-6709910d4249", - "metadata": {}, - "outputs": [], - "source": [ - "from rdflib import Graph, RDF, URIRef\n", - "from rdflib.namespace import Namespace, RDFS\n", - "from undate.undate import Undate\n", - "\n", - "# additional RDF namespaces\n", - "crmNs = Namespace('http://www.cidoc-crm.org/cidoc-crm/')\n", - "\n", - "g = Graph()\n", - "g.bind('crm', crmNs)\n", - "# load ISMI RDF sample data\n", - "g.parse('data/ismi-crm-date-samples.ttl')\n", - "# check: number of triples\n", - "len(g)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c940ca2b-b369-4511-8dc9-420bdaeb3e65", - "metadata": {}, - "outputs": [], - "source": [ - "date_uris = [u for u in g.subjects(RDF.type, crmNs['E52_Time-Span'])]\n", - "\n", - "for uri in date_uris:\n", - " q = '''SELECT ?uri ?label ?note \n", - " WHERE { \n", - " ?uri crm:P3_has_note ?note ;\n", - " crm:P1_is_identified_by / rdfs:label ?label .\n", - " } limit 10'''\n", - " res = g.query(q, initBindings={'uri': uri})\n", - " for r in res:\n", - " print(f\"uri={str(uri)} label={r.label} note={r.note}\")" - ] - }, - { - "cell_type": "markdown", - "id": "16361060-657f-431c-b70f-9101d550aa38", - "metadata": {}, - "source": [ - "## Convert RDF dates to Undate" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e443b974-930b-4a5d-8f21-641b4556b159", - "metadata": {}, - "outputs": [], - "source": [ - "from undate.date import DatePrecision, Date\n", - "import datetime\n", - "\n", - "uri = date_uris[1]\n", - "\n", - "#\n", - "# read date type\n", - "#\n", - "date_type = None\n", - "for date_type_uri in g.objects(uri, crmNs.P2_has_type):\n", - " for dt in ['day', 'year', 'range']:\n", - " if str(date_type_uri) == 'http://content.mpiwg-berlin.mpg.de/ns/ismi/type/date/' + dt:\n", - " date_type = dt\n", - "\n", - "if not date_type:\n", - " raise RuntimeError(f\"Unknown datetype URI {date_type_uri}\")\n", - "\n", - "#\n", - "# read label and calendar\n", - "#\n", - "date_label_uri = next(g.objects(uri, crmNs.P1_is_identified_by))\n", - "date_label = str(next(g.objects(date_label_uri, RDFS.label)))\n", - "for date_label_calendar_uri in g.objects(date_label_uri, crmNs.P2_has_type):\n", - " for ct in ['gregorian', 'julian', 'islamic']:\n", - " if str(date_label_calendar_uri) == 'http://content.mpiwg-berlin.mpg.de/ns/ismi/type/calendar/' + ct:\n", - " calendar_type = ct\n", - "\n", - "if not calendar_type:\n", - " raise RuntimeError(f\"Unknown calendar type URI {date_label_calendar_uri}\")\n", - "\n", - "#\n", - "# create undate\n", - "#\n", - "if date_type == 'day':\n", - " xsd_date = next(g.objects(uri, crmNs.P82_at_some_time_within))\n", - " date = Undate.parse(str(xsd_date), 'ISO8601')\n", - " date.precision = DatePrecision.DAY\n", - " date.label = date_label\n", - "\n", - "if date_type == 'year':\n", - " xsd_date_from = next(g.objects(uri, crmNs.P82a_begin_of_the_begin))\n", - " xsd_date_until = next(g.objects(uri, crmNs.P82b_end_of_the_end))\n", - " date_from = datetime.date.fromisoformat(str(xsd_date_from))\n", - " if calendar_type == 'gregorian':\n", - " # this should be fine\n", - " date = Undate(year=date_from.year)\n", - "\n", - " else:\n", - " # create day precision Undate from end date\n", - " date = Undate.parse(str(xsd_date_until), 'ISO8601')\n", - " # change earliest date\n", - " date.earliest = Date(year=date_from.year, month=date_from.month, day=date_from.day)\n", - "\n", - " # change precision and label\n", - " date.precision = DatePrecision.DAY\n", - " date.label = date_label\n", - "\n", - "if date_type == 'range':\n", - " xsd_date_from = next(g.objects(uri, crmNs.P82a_begin_of_the_begin))\n", - " xsd_date_until = next(g.objects(uri, crmNs.P82b_end_of_the_end))\n", - " # create day precision Undate from start date\n", - " date = Undate.parse(str(xsd_date_from), 'ISO8601')\n", - " # change latest date\n", - " date_until = datetime.date.fromisoformat(str(xsd_date_until))\n", - " date.latest = Date(year=date_until.year, month=date_until.month, day=date_until.day)\n", - " # change precision and label\n", - " date.precision = DatePrecision.DAY\n", - " date.label = date_label\n", - "\n", - "\n", - "print(f\"{date_label=} {date_type=} {calendar_type=} {date=}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "742ba275-7de6-461b-8891-6f06dbdd89a0", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.5" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/examples/use-cases/ismi/requirements.txt b/examples/use-cases/ismi/requirements.txt deleted file mode 100644 index 0d277bc..0000000 --- a/examples/use-cases/ismi/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -jupyterlab~=4.3.1 -rdflib~=7.1.1