From 0b02b2c517cdb06a0c65d5dc81cd6f130ad2fd38 Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Wed, 28 Nov 2018 09:13:04 +0530 Subject: [PATCH 01/37] testes Lists --- Testing Plan.md | 27 +++++++++++++++++++++++++++ test/test.js | 5 +++++ 2 files changed, 32 insertions(+) diff --git a/Testing Plan.md b/Testing Plan.md index b7ad3642..b0df9538 100644 --- a/Testing Plan.md +++ b/Testing Plan.md @@ -275,6 +275,33 @@ of “The Thirty,” and he called out, \m David welcomed them and made them officers in his army. ``` +### Test List Markers: should pass +``` +\id MAT 41MATGNT92.SFM, Good News Translation, June 2003 +\usfm 3.0 +\toc1 The Acts of the Apostles +\toc2 Acts +\ip One of these brothers, Joseph, had become... +\ipr (50.24) +\c 136 +\s1 God's Love Never Fails +\lh +\v 16-22 This is the list of the administrators of the tribes of Israel: +\li1 Reuben - Eliezer son of Zichri +\li1 Simeon - Shephatiah son of Maacah +\li1 Levi - Hashabiah son of Kemuel +\lf This was the list of the administrators of the tribes of Israel. +\v 7 in company with Zerubbabel, Jeshua, Nehemiah, Azariah, Raamiah, Nahamani, Mordecai, +Bilshan, Mispereth, Bigvai, Nehum and Baanah): +\b +\pm The list of the men of Israel: +\b +\lim1 +\v 8 the descendants of Parosh - \litl 2,172\litl* +\lim1 +\v 9 of Shephatiah - \litl 372\litl* + +``` ## Marker Wise Syntax Check the behaviour of the parser/validator are proper under these situations where internal structure of a marker needs to be validated diff --git a/test/test.js b/test/test.js index c742c3c3..fac77b39 100644 --- a/test/test.js +++ b/test/test.js @@ -69,4 +69,9 @@ describe('Ensure all true positives', function () { let output = parser.validate('\\id MAT 41MATGNT92.SFM, Good News Translation, June 2003\n\\usfm 3.0\n\\toc1 The Acts of the Apostles\n\\toc2 Acts\n\\ip One of these brothers, Joseph, had become...\n\\ipr (50.24)\n\\c 136\n\\qa Aleph\n\\s1 God\'s Love Never Fails\n\\q1\n\\v 1 \\qac P\\qac*Praise the \\nd Lord\\nd*! He is good.\n\\qr God\'s love never fails \\qs Selah\\qs*\n\\q1\n\\v 2 Praise the God of all gods.\n\\q1 May his glory fill the whole world.\n\\b\n\\qc Amen! Amen!\n\\qd For the director of music. On my stringed instruments.\n\\b\n\\v 18 God\'s spirit took control of one of them, Amasai, who later became the commander\nof “The Thirty,” and he called out,\n\\qm1 “David son of Jesse, we are yours!\n\\qm1 Success to you and those who help you!\n\\qm1 God is on your side.”\n\\b\n\\m David welcomed them and made them officers in his army.') assert.strictEqual(output, true) }) + + it('List Markers', function () { + let output = parser.validate('\\id MAT 41MATGNT92.SFM, Good News Translation, June 2003\n\\usfm 3.0\n\\toc1 The Acts of the Apostles\n\\toc2 Acts\n\\ip One of these brothers, Joseph, had become...\n\\ipr (50.24)\n\\c 136\n\\s1 God\'s Love Never Fails\n\\lh\n\\v 16-22 This is the list of the administrators of the tribes of Israel:\n\\li1 Reuben - Eliezer son of Zichri\n\\li1 Simeon - Shephatiah son of Maacah\n\\li1 Levi - Hashabiah son of Kemuel\n\\lf This was the list of the administrators of the tribes of Israel.\n\\v 7 in company with Zerubbabel, Jeshua, Nehemiah, Azariah, Raamiah, Nahamani, Mordecai,\nBilshan, Mispereth, Bigvai, Nehum and Baanah):\n\\b\n\\pm The list of the men of Israel:\n\\b\n\\lim1\n\\v 8 the descendants of Parosh - \\litl 2,172\\litl*\n\\lim1\n\\v 9 of Shephatiah - \\litl 372\\litl*') + assert.strictEqual(output, true) + }) }) From 94e029297bbbc1bcfe4e582f18cf3a36668b11e5 Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Wed, 28 Nov 2018 09:26:41 +0530 Subject: [PATCH 02/37] tested table markers --- Testing Plan.md | 19 +++++++++++++++++++ grammarOperations.js | 10 ++++++++++ test/test.js | 5 +++++ 3 files changed, 34 insertions(+) diff --git a/Testing Plan.md b/Testing Plan.md index b0df9538..2901675d 100644 --- a/Testing Plan.md +++ b/Testing Plan.md @@ -303,6 +303,25 @@ Bilshan, Mispereth, Bigvai, Nehum and Baanah): ``` +### Test Table Markers: Should pass +``` +\id MAT 41MATGNT92.SFM, Good News Translation, June 2003 +\usfm 3.0 +\toc1 The Acts of the Apostles +\toc2 Acts +\ip One of these brothers, Joseph, had become... +\ipr (50.24) +\c 136 +\p +\v 12-83 They presented their offerings in the following order: +\tr \th1 Day \th2 Tribe \thr3 Leader +\tr \tcr1 1st \tc2 Judah \tcr3 Nahshon son of Amminadab +\tr \tcr1 2nd \tc2 Issachar \tcr3 Nethanel son of Zuar +\tr \tcr1 3rd \tc2 Zebulun \tcr3 Eliab son of Helon +\tr \tcr1 4th \tc2 Reuben \tcr3 Elizur son of Shedeur +\tr \tcr1 5th \tc2 Simeon \tcr3 Shelumiel son of Zurishaddai +``` + ## Marker Wise Syntax Check the behaviour of the parser/validator are proper under these situations where internal structure of a marker needs to be validated diff --git a/grammarOperations.js b/grammarOperations.js index abffac63..fb999b84 100644 --- a/grammarOperations.js +++ b/grammarOperations.js @@ -478,6 +478,16 @@ sem.addOperation('composeJson', { headerCell: function(cell) { return cell.composeJson() }, + + row: function (_, cell) { + let rowObj = cell.composeJson() + return rowObj + }, + + cell: function (elmnt) { + return elmnt.composeJson + }, + thElement: function(_, _, _, num, text) { return {'th': text.sourceString, 'column':num.sourceString} diff --git a/test/test.js b/test/test.js index fac77b39..591f1e44 100644 --- a/test/test.js +++ b/test/test.js @@ -74,4 +74,9 @@ describe('Ensure all true positives', function () { let output = parser.validate('\\id MAT 41MATGNT92.SFM, Good News Translation, June 2003\n\\usfm 3.0\n\\toc1 The Acts of the Apostles\n\\toc2 Acts\n\\ip One of these brothers, Joseph, had become...\n\\ipr (50.24)\n\\c 136\n\\s1 God\'s Love Never Fails\n\\lh\n\\v 16-22 This is the list of the administrators of the tribes of Israel:\n\\li1 Reuben - Eliezer son of Zichri\n\\li1 Simeon - Shephatiah son of Maacah\n\\li1 Levi - Hashabiah son of Kemuel\n\\lf This was the list of the administrators of the tribes of Israel.\n\\v 7 in company with Zerubbabel, Jeshua, Nehemiah, Azariah, Raamiah, Nahamani, Mordecai,\nBilshan, Mispereth, Bigvai, Nehum and Baanah):\n\\b\n\\pm The list of the men of Israel:\n\\b\n\\lim1\n\\v 8 the descendants of Parosh - \\litl 2,172\\litl*\n\\lim1\n\\v 9 of Shephatiah - \\litl 372\\litl*') assert.strictEqual(output, true) }) + + it('table Markers', function () { + let output = parser.validate('\\id MAT 41MATGNT92.SFM, Good News Translation, June 2003\n\\usfm 3.0\n\\toc1 The Acts of the Apostles\n\\toc2 Acts\n\\ip One of these brothers, Joseph, had become...\n\\ipr (50.24)\n\\c 136\n\\p\n\\v 12-83 They presented their offerings in the following order:\n\\tr \\th1 Day \\th2 Tribe \\thr3 Leader\n\\tr \\tcr1 1st \\tc2 Judah \\tcr3 Nahshon son of Amminadab\n\\tr \\tcr1 2nd \\tc2 Issachar \\tcr3 Nethanel son of Zuar\n\\tr \\tcr1 3rd \\tc2 Zebulun \\tcr3 Eliab son of Helon\n\\tr \\tcr1 4th \\tc2 Reuben \\tcr3 Elizur son of Shedeur\n\\tr \\tcr1 5th \\tc2 Simeon \\tcr3 Shelumiel son of Zurishaddai') + assert.strictEqual(output, true) + }) }) From 5ccca25c0c50951c5cc266337c8f2c7525540992 Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Wed, 28 Nov 2018 13:52:45 +0530 Subject: [PATCH 03/37] fix json structure for list and table --- grammarOperations.js | 44 +++++++++++++++++++++++++++++++------------- usfm.ohm | 9 +++++---- 2 files changed, 36 insertions(+), 17 deletions(-) diff --git a/grammarOperations.js b/grammarOperations.js index fb999b84..280c2a03 100644 --- a/grammarOperations.js +++ b/grammarOperations.js @@ -139,8 +139,8 @@ sem.addOperation('composeJson', { if ( metaScripture.sourceString!='' ) { verse['metadata_inline'] = metaScripture.composeJson()} - verse['text'] = '' + verseText.composeJson() - if (verseTextMore.sourceString!='') { verse['text'] += verseTextMore.composeJson()} + verse['text'] = verseText.composeJson() + if (verseTextMore.sourceString!='') { verse['text'].concat(verseTextMore.composeJson())} return verse }, @@ -149,6 +149,10 @@ sem.addOperation('composeJson', { if (num2.sourceString!='') { number = number + '-' + num2.sourceString} return number }, + + verseText: function (content) { + return content.composeJson() + }, sectionElement: function (sElement ) { return sElement.composeJson() @@ -318,7 +322,6 @@ sem.addOperation('composeJson', { iq: function (itemElement) { let iq = itemElement.composeJson() - console.log(iq) return {'iq':iq} }, @@ -485,26 +488,40 @@ sem.addOperation('composeJson', { }, cell: function (elmnt) { - return elmnt.composeJson + return elmnt.composeJson() }, + - - thElement: function(_, _, _, num, text) { + thElement: function(_, _, num, _, text) { + console.log(num) return {'th': text.sourceString, 'column':num.sourceString} }, - thrElement: function(_, _, _, num, text) { + thrElement: function(_, _, num, _, text) { return {'thr': text.sourceString, 'column':num.sourceString} }, - tcElement: function(_, _, _, num, text) { + tcElement: function(_, _, num, _, text) { return {'tc': text.sourceString, 'column':num.sourceString} }, - tcrElement: function(_, _, _, num, text) { + tcrElement: function(_, _, num, _, text) { return {'tcr': text.sourceString, 'column':num.sourceString} }, + li: function (itemElement) { + let li = {'list': itemElement.composeJson()} + // li = JSON.stringify(li) + return li + }, + + liElement: function (_, _, _, num, _, text) { + let obj = {} + obj['item'] = text.composeJson() + if (num.sourceString != ''){ obj['item']['num'] = num.sourceString } + return obj + }, + litElement: function (_, _, _, _, text) { return {'lit' : text.composeJson()} }, @@ -521,6 +538,7 @@ sem.addOperation('composeJson', { chapterContentTextContent: function(_,element) { let text = element.composeJson() + console.log(text) return text }, @@ -548,11 +566,11 @@ sem.addOperation('composeJson', { }) exports.match = function (str) { - // try { + try { let matchObj = bib.match(str) let adaptor = sem(matchObj) return adaptor.composeJson() - // } catch (err) { - // return matchObj - // } + } catch (err) { + return matchObj + } } diff --git a/usfm.ohm b/usfm.ohm index 5ea88afd..80e1c107 100644 --- a/usfm.ohm +++ b/usfm.ohm @@ -74,12 +74,12 @@ usfmBible{ publishedCharElement = publishedCharMarker text publishedCharMarker "*" publishedCharMarker = backSlash "vp" - verseText = chapterContentTextContent+ + verseText = chapterContentTextContent paraMarker = paraUnNumberedMarker | paraNumberedMarker paraUnNumberedMarker = ("po" | "m" | "pr" | "cls" | "pmo" | "pm" | "pmc" | "pmr" | "pmi" | "nb" | "pc" | "b" | "pb" | "qr" | "qc" | "qd" | "lh" | "lf" | "p" ) - paraNumberedMarker = ("pi" | "ph" | "q" | "qm" | "li" | "lim") number? + paraNumberedMarker = ("pi" | "ph" | "q" | "qm" | "lim") number? qaElement = newLine backSlash "qa" spaceChar text @@ -245,7 +245,8 @@ usfmBible{ tcElement = backSlash "tc" number spaceChar text tcrElement = backSlash "tcr" number spaceChar text - + li = (liElement)+ + liElement = newLine backSlash "li" number? spaceChar text @@ -254,7 +255,7 @@ usfmBible{ bookIntroductionTitlesTextContent = (text | notesElement | charElement | milestoneElement | figureElement | zNameSpace)+ bookTitlesTextContent = (text | notesElement | charElement | zNameSpace)+ - chapterContentTextContent = newLine? (text | notesElement | charElement | milestoneElement| figureElement | table |zNameSpace | mte)+ + chapterContentTextContent = newLine? (text | notesElement | charElement | milestoneElement| figureElement | table | li | zNameSpace | mte)+ bookIntroductionEndTitlesTextContent = (text | notesElement | charElement | milestoneElement | zNameSpace)+ milestoneElement = (backSlash "qt" number? "-s" spaceChar* attributes? backSlash "*" ) From 0fcd24d00a35047740507849353a6b4e74e151b8 Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Thu, 29 Nov 2018 09:42:21 +0530 Subject: [PATCH 04/37] tested footnotes --- Testing Plan.md | 23 ++++++++++++++++++----- grammarOperations.js | 14 +++++++------- test/test.js | 9 +++++++-- usfm.ohm | 6 +++--- 4 files changed, 35 insertions(+), 17 deletions(-) diff --git a/Testing Plan.md b/Testing Plan.md index 2901675d..477e5b43 100644 --- a/Testing Plan.md +++ b/Testing Plan.md @@ -262,12 +262,12 @@ Ensure true cases are being validated successfully, for maximum number of marker \qr God's love never fails \qs Selah\qs* \q1 \v 2 Praise the God of all gods. -\q1 May his glory fill the whole world.\b +\q1 May his glory fill the whole world. +\b \qc Amen! Amen! \qd For the director of music. On my stringed instruments. \b -\v 18 God's spirit took control of one of them, Amasai, who later became the commander -of “The Thirty,” and he called out, +\v 18 God's spirit took control of one of them, Amasai, who later became the commander of “The Thirty,” and he called out, \qm1 “David son of Jesse, we are yours! \qm1 Success to you and those who help you! \qm1 God is on your side.” @@ -291,8 +291,7 @@ of “The Thirty,” and he called out, \li1 Simeon - Shephatiah son of Maacah \li1 Levi - Hashabiah son of Kemuel \lf This was the list of the administrators of the tribes of Israel. -\v 7 in company with Zerubbabel, Jeshua, Nehemiah, Azariah, Raamiah, Nahamani, Mordecai, -Bilshan, Mispereth, Bigvai, Nehum and Baanah): +\v 7 in company with Zerubbabel, Jeshua, Nehemiah, Azariah, Raamiah, Nahamani, Mordecai,Bilshan, Mispereth, Bigvai, Nehum and Baanah): \b \pm The list of the men of Israel: \b @@ -322,6 +321,20 @@ Bilshan, Mispereth, Bigvai, Nehum and Baanah): \tr \tcr1 5th \tc2 Simeon \tcr3 Shelumiel son of Zurishaddai ``` +### Test Footnotes: should pass +``` +\id MAT 41MATGNT92.SFM, Good News Translation, June 2003 +\c 136 +\s1 The Preaching of John the Baptist +\r (Matthew 3.1-12; Luke 3.1-18; John 1.19-28) +\p +\v 1 This is the Good News about Jesus Christ, the Son of God. \f + \fr 1.1: \ft Some manuscripts do not have \fq the Son of God.\f* +\v 20 Adam \f + \fr 3.20: \fk Adam: \ft This name in Hebrew means “all human beings.”\f* named his wife Eve, \f + \fr 3.20: \fk Eve: \ft This name sounds similar to the Hebrew word for “living,” which is rendered in this context as “human beings.”\f* because she was the mother of all human beings. +\v 38 whoever believes in me should drink. As the scripture says, ‘Streams of life-giving water will pour out from his side.’” \f + \fr 7.38: \ft Jesus' words in verses 37-38 may be translated: \fqa “Whoever is thirsty should come to me and drink. \fv 38\fv* As the scripture says, ‘Streams of life-giving water will pour out ...’”\f* +\v 3 Él es el resplandor glorioso de Dios,\f c \fr 1.3: \fk Resplandor: \ft Cf. Jn 1.4-9,14\fdc ; también Sab 7.25-26, donde algo parecido se dice de la sabiduría.\f* la imagen misma ... +``` + + ## Marker Wise Syntax Check the behaviour of the parser/validator are proper under these situations where internal structure of a marker needs to be validated diff --git a/grammarOperations.js b/grammarOperations.js index 280c2a03..f8576874 100644 --- a/grammarOperations.js +++ b/grammarOperations.js @@ -428,15 +428,15 @@ sem.addOperation('composeJson', { return element.composeJson() }, - fElement: function (_, _, content, _, _){ + fElement: function (_, _, _, content, _, _, _){ return {'footnote': content.sourceString} }, - feElement: function (_, _, content, _, _){ + feElement: function (_, _, _, content, _, _, _){ return {'footnote': content.sourceString} }, - crossrefElement: function (_, _, content, _, _){ + crossrefElement: function (_, _, _, content, _, _, _){ return {'cross-ref': content.sourceString} }, @@ -566,11 +566,11 @@ sem.addOperation('composeJson', { }) exports.match = function (str) { - try { + // try { let matchObj = bib.match(str) let adaptor = sem(matchObj) return adaptor.composeJson() - } catch (err) { - return matchObj - } + // } catch (err) { + // return err + // } } diff --git a/test/test.js b/test/test.js index 591f1e44..1aec3f22 100644 --- a/test/test.js +++ b/test/test.js @@ -66,12 +66,12 @@ describe('Ensure all true positives', function () { }) it('Poetry Markers', function () { - let output = parser.validate('\\id MAT 41MATGNT92.SFM, Good News Translation, June 2003\n\\usfm 3.0\n\\toc1 The Acts of the Apostles\n\\toc2 Acts\n\\ip One of these brothers, Joseph, had become...\n\\ipr (50.24)\n\\c 136\n\\qa Aleph\n\\s1 God\'s Love Never Fails\n\\q1\n\\v 1 \\qac P\\qac*Praise the \\nd Lord\\nd*! He is good.\n\\qr God\'s love never fails \\qs Selah\\qs*\n\\q1\n\\v 2 Praise the God of all gods.\n\\q1 May his glory fill the whole world.\n\\b\n\\qc Amen! Amen!\n\\qd For the director of music. On my stringed instruments.\n\\b\n\\v 18 God\'s spirit took control of one of them, Amasai, who later became the commander\nof “The Thirty,” and he called out,\n\\qm1 “David son of Jesse, we are yours!\n\\qm1 Success to you and those who help you!\n\\qm1 God is on your side.”\n\\b\n\\m David welcomed them and made them officers in his army.') + let output = parser.validate('\\id MAT 41MATGNT92.SFM, Good News Translation, June 2003\n\\usfm 3.0\n\\toc1 The Acts of the Apostles\n\\toc2 Acts\n\\ip One of these brothers, Joseph, had become...\n\\ipr (50.24)\n\\c 136\n\\qa Aleph\n\\s1 God\'s Love Never Fails\n\\q1\n\\v 1 \\qac P\\qac*Praise the \\nd Lord\\nd*! He is good.\n\\qr God\'s love never fails \\qs Selah\\qs*\n\\q1\n\\v 2 Praise the God of all gods.\n\\q1 May his glory fill the whole world.\n\\b\n\\qc Amen! Amen!\n\\qd For the director of music. On my stringed instruments.\n\\b\n\\v 18 God\'s spirit took control of one of them, Amasai, who later became the commander of “The Thirty,” and he called out,\n\\qm1 “David son of Jesse, we are yours!\n\\qm1 Success to you and those who help you!\n\\qm1 God is on your side.”\n\\b\n\\m David welcomed them and made them officers in his army.') assert.strictEqual(output, true) }) it('List Markers', function () { - let output = parser.validate('\\id MAT 41MATGNT92.SFM, Good News Translation, June 2003\n\\usfm 3.0\n\\toc1 The Acts of the Apostles\n\\toc2 Acts\n\\ip One of these brothers, Joseph, had become...\n\\ipr (50.24)\n\\c 136\n\\s1 God\'s Love Never Fails\n\\lh\n\\v 16-22 This is the list of the administrators of the tribes of Israel:\n\\li1 Reuben - Eliezer son of Zichri\n\\li1 Simeon - Shephatiah son of Maacah\n\\li1 Levi - Hashabiah son of Kemuel\n\\lf This was the list of the administrators of the tribes of Israel.\n\\v 7 in company with Zerubbabel, Jeshua, Nehemiah, Azariah, Raamiah, Nahamani, Mordecai,\nBilshan, Mispereth, Bigvai, Nehum and Baanah):\n\\b\n\\pm The list of the men of Israel:\n\\b\n\\lim1\n\\v 8 the descendants of Parosh - \\litl 2,172\\litl*\n\\lim1\n\\v 9 of Shephatiah - \\litl 372\\litl*') + let output = parser.validate('\\id MAT 41MATGNT92.SFM, Good News Translation, June 2003\n\\usfm 3.0\n\\toc1 The Acts of the Apostles\n\\toc2 Acts\n\\ip One of these brothers, Joseph, had become...\n\\ipr (50.24)\n\\c 136\n\\s1 God\'s Love Never Fails\n\\lh\n\\v 16-22 This is the list of the administrators of the tribes of Israel:\n\\li1 Reuben - Eliezer son of Zichri\n\\li1 Simeon - Shephatiah son of Maacah\n\\li1 Levi - Hashabiah son of Kemuel\n\\lf This was the list of the administrators of the tribes of Israel.\n\\v 7 in company with Zerubbabel, Jeshua, Nehemiah, Azariah, Raamiah, Nahamani, Mordecai,Bilshan, Mispereth, Bigvai, Nehum and Baanah):\n\\b\n\\pm The list of the men of Israel:\n\\b\n\\lim1\n\\v 8 the descendants of Parosh - \\litl 2,172\\litl*\n\\lim1\n\\v 9 of Shephatiah - \\litl 372\\litl*\n') assert.strictEqual(output, true) }) @@ -79,4 +79,9 @@ describe('Ensure all true positives', function () { let output = parser.validate('\\id MAT 41MATGNT92.SFM, Good News Translation, June 2003\n\\usfm 3.0\n\\toc1 The Acts of the Apostles\n\\toc2 Acts\n\\ip One of these brothers, Joseph, had become...\n\\ipr (50.24)\n\\c 136\n\\p\n\\v 12-83 They presented their offerings in the following order:\n\\tr \\th1 Day \\th2 Tribe \\thr3 Leader\n\\tr \\tcr1 1st \\tc2 Judah \\tcr3 Nahshon son of Amminadab\n\\tr \\tcr1 2nd \\tc2 Issachar \\tcr3 Nethanel son of Zuar\n\\tr \\tcr1 3rd \\tc2 Zebulun \\tcr3 Eliab son of Helon\n\\tr \\tcr1 4th \\tc2 Reuben \\tcr3 Elizur son of Shedeur\n\\tr \\tcr1 5th \\tc2 Simeon \\tcr3 Shelumiel son of Zurishaddai') assert.strictEqual(output, true) }) + + it('Footnote Markers', function () { + let output = parser.validate('\\id MAT 41MATGNT92.SFM, Good News Translation, June 2003\n\\c 136\n\\s1 The Preaching of John the Baptist\n\\r (Matthew 3.1-12; Luke 3.1-18; John 1.19-28)\n\\p\n\\v 1 This is the Good News about Jesus Christ, the Son of God. \\f + \\fr 1.1: \\ft Some manuscripts do not have \\fq the Son of God.\\f*\n\\v 20 Adam \\f + \\fr 3.20: \\fk Adam: \\ft This name in Hebrew means “all human beings.”\\f* named his wife Eve, \\f + \\fr 3.20: \\fk Eve: \\ft This name sounds similar to the Hebrew word for “living,” which is rendered in this context as “human beings.”\\f* because she was the mother of all human beings.\n\\v 38 whoever believes in me should drink. As the scripture says, ‘Streams of life-giving water will pour out from his side.’” \\f + \\fr 7.38: \\ft Jesus\' words in verses 37-38 may be translated: \\fqa “Whoever is thirsty should come to me and drink. \\fv 38\\fv* As the scripture says, ‘Streams of life-giving water will pour out ...’”\\f*\n\\v 3 Él es el resplandor glorioso de Dios,\\f c \\fr 1.3: \\fk Resplandor: \\ft Cf. Jn 1.4-9,14\\fdc ; también Sab 7.25-26, donde algo parecido se dice de la sabiduría.\\f* la imagen misma ...') + assert.strictEqual(output, true) + }) }) diff --git a/usfm.ohm b/usfm.ohm index 80e1c107..89b24069 100644 --- a/usfm.ohm +++ b/usfm.ohm @@ -142,9 +142,9 @@ usfmBible{ notesElement = footnoteElement | crossrefElement footnoteElement = fElement | feElement - fElement = backSlash "f" footnoteContent* backSlash "f*" - feElement = backSlash "fe" footnoteContent* backSlash "fe*" - crossrefElement = backSlash "x" crossrefContent* backSlash "x*" + fElement = backSlash "f" spaceChar? footnoteContent* backSlash "f*" spaceChar? + feElement = backSlash "fe" spaceChar? footnoteContent* backSlash "fe*"spaceChar? + crossrefElement = backSlash "x" spaceChar? crossrefContent* backSlash "x*" spaceChar? footnoteContent = text | footnoteContentElement footnoteContentElement = backSlash "fr" spaceChar From d79944157a57e26f74fce8ec14face80cf2265e7 Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Thu, 29 Nov 2018 09:57:17 +0530 Subject: [PATCH 05/37] tested cross-refs --- Testing Plan.md | 21 +++++++++++++++++++++ test/test.js | 5 +++++ usfm.ohm | 2 +- 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/Testing Plan.md b/Testing Plan.md index 477e5b43..ff66529e 100644 --- a/Testing Plan.md +++ b/Testing Plan.md @@ -334,6 +334,27 @@ Ensure true cases are being validated successfully, for maximum number of marker \v 3 Él es el resplandor glorioso de Dios,\f c \fr 1.3: \fk Resplandor: \ft Cf. Jn 1.4-9,14\fdc ; también Sab 7.25-26, donde algo parecido se dice de la sabiduría.\f* la imagen misma ... ``` +### Test Cross-refernces : Should pass +``` +\id MAT 41MATGNT92.SFM, Good News Translation, June 2003 +\c 6 +\v 18 “Why do you call me good?” Jesus asked him. “No one is good except God alone. +\v 19 \x - \xo 10.19: a \xt Exo 20.13; Deu 5.17; \xo b \xt Exo 20.14; Deu 5.18; \xo c \xt Exo 20.15; Deu 5.19; \xo d \xt Exo 20.16; Deu 5.20; \xo e \xt Exo 20.12; Deu 5.16.\x* You know the commandments: ‘Do not commit murder... +\c 2 +\cd \xt 1|GEN 2:1\xt* Бог благословляет седьмой день; \xt 8|GEN 2:8\x* человек в раю Едемском; четыре реки; дерево познания добра и зла. \xt 18|GEN 2:18\x* Человек дает названия животным. \xt 21|GEN 2:21\xt* Создание женщины. +\p +\v 1 Так совершены небо и земля и все воинство их. +\c 3 +\s1 The Preaching of John the Baptist\x - \xo 3.0 \xta Compare with \xt Mk 1.1-8; Lk 3.1-18; \xta and \xt Jn 1.19-28 \xta parallel passages.\x* +\p +\v 1 At that time John the Baptist came to... +\v 2 \x - \xo 1:1 \xop Гл 1. (1)\xop* \xt 4 Царств. 14:25.\x*И биде слово Господне +към Иона, син Аматиев: +\v 3 Our God is in heaven; +\q2 he does whatever he wishes. +\q1 +\v 4 \x - \xo 115.4-8: \xt Ps 135.15-18; \xdc Ltj Jr 4-73; \xt Rev 9.20.\x* Their gods are made of silver and gold, +``` ## Marker Wise Syntax Check the behaviour of the parser/validator are proper under these situations where internal structure of a marker needs to be validated diff --git a/test/test.js b/test/test.js index 1aec3f22..f2e48dbb 100644 --- a/test/test.js +++ b/test/test.js @@ -84,4 +84,9 @@ describe('Ensure all true positives', function () { let output = parser.validate('\\id MAT 41MATGNT92.SFM, Good News Translation, June 2003\n\\c 136\n\\s1 The Preaching of John the Baptist\n\\r (Matthew 3.1-12; Luke 3.1-18; John 1.19-28)\n\\p\n\\v 1 This is the Good News about Jesus Christ, the Son of God. \\f + \\fr 1.1: \\ft Some manuscripts do not have \\fq the Son of God.\\f*\n\\v 20 Adam \\f + \\fr 3.20: \\fk Adam: \\ft This name in Hebrew means “all human beings.”\\f* named his wife Eve, \\f + \\fr 3.20: \\fk Eve: \\ft This name sounds similar to the Hebrew word for “living,” which is rendered in this context as “human beings.”\\f* because she was the mother of all human beings.\n\\v 38 whoever believes in me should drink. As the scripture says, ‘Streams of life-giving water will pour out from his side.’” \\f + \\fr 7.38: \\ft Jesus\' words in verses 37-38 may be translated: \\fqa “Whoever is thirsty should come to me and drink. \\fv 38\\fv* As the scripture says, ‘Streams of life-giving water will pour out ...’”\\f*\n\\v 3 Él es el resplandor glorioso de Dios,\\f c \\fr 1.3: \\fk Resplandor: \\ft Cf. Jn 1.4-9,14\\fdc ; también Sab 7.25-26, donde algo parecido se dice de la sabiduría.\\f* la imagen misma ...') assert.strictEqual(output, true) }) + + it('Cross-reference Markers', function () { + let output = parser.validate('\\id MAT 41MATGNT92.SFM, Good News Translation, June 2003\n\\c 6\n\\v 18 “Why do you call me good?” Jesus asked him. “No one is good except God alone.\n\\v 19 \\x - \\xo 10.19: a \\xt Exo 20.13; Deu 5.17; \\xo b \\xt Exo 20.14; Deu 5.18; \\xo c \\xt Exo 20.15; Deu 5.19; \\xo d \\xt Exo 20.16; Deu 5.20; \\xo e \\xt Exo 20.12; Deu 5.16.\\x* You know the commandments: ‘Do not commit murder...\n\\c 2\n\\cd \\xt 1|GEN 2:1\\xt* Бог благословляет седьмой день; \\xt 8|GEN 2:8\\x* человек в раю Едемском; четыре реки; дерево познания добра и зла. \\xt 18|GEN 2:18\\x* Человек дает названия животным. \\xt 21|GEN 2:21\\xt* Создание женщины.\n\\p\n\\v 1 Так совершены небо и земля и все воинство их.\n\\c 3\n\\s1 The Preaching of John the Baptist\\x - \\xo 3.0 \\xta Compare with \\xt Mk 1.1-8; Lk 3.1-18; \\xta and \\xt Jn 1.19-28 \\xta parallel passages.\\x*\n\\p\n\\v 1 At that time John the Baptist came to...\n\\v 2 \\x - \\xo 1:1 \\xop Гл 1. (1)\\xop* \\xt 4 Царств. 14:25.\\x*И биде слово Господне към Иона, син Аматиев:\n\\v 3 Our God is in heaven;\n\\q2 he does whatever he wishes.\n\\q1\n\\v 4 \\x - \\xo 115.4-8: \\xt Ps 135.15-18; \\xdc Ltj Jr 4-73; \\xt Rev 9.20.\\x* Their gods are made of silver and gold,') + assert.strictEqual(output, true) + }) }) diff --git a/usfm.ohm b/usfm.ohm index 89b24069..3989ac6f 100644 --- a/usfm.ohm +++ b/usfm.ohm @@ -144,7 +144,7 @@ usfmBible{ footnoteElement = fElement | feElement fElement = backSlash "f" spaceChar? footnoteContent* backSlash "f*" spaceChar? feElement = backSlash "fe" spaceChar? footnoteContent* backSlash "fe*"spaceChar? - crossrefElement = backSlash "x" spaceChar? crossrefContent* backSlash "x*" spaceChar? + crossrefElement = backSlash ("xt"|"x") spaceChar? crossrefContent* backSlash ("xt*"|"x*") spaceChar? footnoteContent = text | footnoteContentElement footnoteContentElement = backSlash "fr" spaceChar From 7cd0e3696d5c93681463c34adddf902859c4eea2 Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Thu, 29 Nov 2018 10:12:36 +0530 Subject: [PATCH 06/37] enabled nesting of character markers --- Testing Plan.md | 11 +++++++++-- usfm.ohm | 32 +++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/Testing Plan.md b/Testing Plan.md index ff66529e..b144f276 100644 --- a/Testing Plan.md +++ b/Testing Plan.md @@ -348,14 +348,21 @@ Ensure true cases are being validated successfully, for maximum number of marker \s1 The Preaching of John the Baptist\x - \xo 3.0 \xta Compare with \xt Mk 1.1-8; Lk 3.1-18; \xta and \xt Jn 1.19-28 \xta parallel passages.\x* \p \v 1 At that time John the Baptist came to... -\v 2 \x - \xo 1:1 \xop Гл 1. (1)\xop* \xt 4 Царств. 14:25.\x*И биде слово Господне -към Иона, син Аматиев: +\v 2 \x - \xo 1:1 \xop Гл 1. (1)\xop* \xt 4 Царств. 14:25.\x*И биде слово Господне към Иона, син Аматиев: \v 3 Our God is in heaven; \q2 he does whatever he wishes. \q1 \v 4 \x - \xo 115.4-8: \xt Ps 135.15-18; \xdc Ltj Jr 4-73; \xt Rev 9.20.\x* Their gods are made of silver and gold, ``` +### Test Word and Character Markers: should pass +``` +\id MAT 41MATGNT92.SFM, Good News Translation, June 2003 +\c 6 +\v 14 That is why \bk The Book of the \+nd Lord\+nd*'s Battles\bk* speaks of “...the town of Waheb in the area of ... +\v 15 and the slope of the valleys ... +``` + ## Marker Wise Syntax Check the behaviour of the parser/validator are proper under these situations where internal structure of a marker needs to be validated diff --git a/usfm.ohm b/usfm.ohm index 3989ac6f..6b4adaef 100644 --- a/usfm.ohm +++ b/usfm.ohm @@ -182,37 +182,67 @@ usfmBible{ charElement = inLineCharElement | inLineCharAttributeElement | inLineCharNumberedElement inLineCharElement = newLine? backSlash "add" spaceChar chapterContentTextContent backSlash "add" "*" spaceChar? + | newLine? backSlash "+add" spaceChar chapterContentTextContent backSlash "+add" "*" spaceChar? | newLine? backSlash "bk" spaceChar chapterContentTextContent backSlash "bk" "*" spaceChar? + | newLine? backSlash "+bk" spaceChar chapterContentTextContent backSlash "+bk" "*" spaceChar? | newLine? backSlash "dc" spaceChar chapterContentTextContent backSlash "dc" "*" spaceChar? | newLine? backSlash "+dc" spaceChar chapterContentTextContent backSlash "+dc" "*" spaceChar? | newLine? backSlash "k" spaceChar chapterContentTextContent backSlash "k" "*" spaceChar? + | newLine? backSlash "+k" spaceChar chapterContentTextContent backSlash "+k" "*" spaceChar? | newLine? backSlash "nd" spaceChar chapterContentTextContent backSlash "nd" "*" spaceChar? + | newLine? backSlash "+nd" spaceChar chapterContentTextContent backSlash "+nd" "*" spaceChar? | newLine? backSlash "ord" spaceChar chapterContentTextContent backSlash "ord" "*" spaceChar? + | newLine? backSlash "+ord" spaceChar chapterContentTextContent backSlash "+ord" "*" spaceChar? | newLine? backSlash "pn" spaceChar chapterContentTextContent backSlash "pn" "*" spaceChar? + | newLine? backSlash "+pn" spaceChar chapterContentTextContent backSlash "+pn" "*" spaceChar? | newLine? backSlash "png" spaceChar chapterContentTextContent backSlash "png" "*" spaceChar? + | newLine? backSlash "+png" spaceChar chapterContentTextContent backSlash "+png" "*" spaceChar? | newLine? backSlash "addpn" spaceChar chapterContentTextContent backSlash "addpn" "*" spaceChar? + | newLine? backSlash "+addpn" spaceChar chapterContentTextContent backSlash "+addpn" "*" spaceChar? | newLine? backSlash "qt" spaceChar chapterContentTextContent backSlash "qt" "*" spaceChar? + | newLine? backSlash "+qt" spaceChar chapterContentTextContent backSlash "+qt" "*" spaceChar? | newLine? backSlash "sig" spaceChar chapterContentTextContent backSlash "sig" "*" spaceChar? + | newLine? backSlash "+sig" spaceChar chapterContentTextContent backSlash "+sig" "*" spaceChar? | newLine? backSlash "sls" spaceChar chapterContentTextContent backSlash "sls" "*" spaceChar? + | newLine? backSlash "+sls" spaceChar chapterContentTextContent backSlash "+sls" "*" spaceChar? | newLine? backSlash "tl" spaceChar chapterContentTextContent backSlash "tl" "*" spaceChar? + | newLine? backSlash "+tl" spaceChar chapterContentTextContent backSlash "+tl" "*" spaceChar? | newLine? backSlash "wj" spaceChar chapterContentTextContent backSlash "wj" "*" spaceChar? + | newLine? backSlash "+wj" spaceChar chapterContentTextContent backSlash "+wj" "*" spaceChar? | newLine? backSlash "em" spaceChar chapterContentTextContent backSlash "em" "*" spaceChar? + | newLine? backSlash "+em" spaceChar chapterContentTextContent backSlash "+em" "*" spaceChar? | newLine? backSlash "bd" spaceChar chapterContentTextContent backSlash "bd" "*" spaceChar? + | newLine? backSlash "+bd" spaceChar chapterContentTextContent backSlash "+bd" "*" spaceChar? | newLine? backSlash "it" spaceChar chapterContentTextContent backSlash "it" "*" spaceChar? + | newLine? backSlash "+it" spaceChar chapterContentTextContent backSlash "+it" "*" spaceChar? | newLine? backSlash "bdit" spaceChar chapterContentTextContent backSlash "bdit" "*" spaceChar? + | newLine? backSlash "+bdit" spaceChar chapterContentTextContent backSlash "+bdit" "*" spaceChar? | newLine? backSlash "no" spaceChar chapterContentTextContent backSlash "no" "*" spaceChar? + | newLine? backSlash "+no" spaceChar chapterContentTextContent backSlash "+no" "*" spaceChar? | newLine? backSlash "sc" spaceChar chapterContentTextContent backSlash "sc" "*" spaceChar? + | newLine? backSlash "+sc" spaceChar chapterContentTextContent backSlash "+sc" "*" spaceChar? | newLine? backSlash "sup" spaceChar chapterContentTextContent backSlash "sup" "*" spaceChar? + | newLine? backSlash "+sup" spaceChar chapterContentTextContent backSlash "+sup" "*" spaceChar? | newLine? backSlash "ndx" spaceChar chapterContentTextContent backSlash "ndx" "*" spaceChar? + | newLine? backSlash "+ndx" spaceChar chapterContentTextContent backSlash "+ndx" "*" spaceChar? | newLine? backSlash "wg" spaceChar chapterContentTextContent backSlash "wg" "*" spaceChar? + | newLine? backSlash "+wg" spaceChar chapterContentTextContent backSlash "+wg" "*" spaceChar? | newLine? backSlash "wh" spaceChar chapterContentTextContent backSlash "wh" "*" spaceChar? + | newLine? backSlash "+wh" spaceChar chapterContentTextContent backSlash "+wh" "*" spaceChar? | newLine? backSlash "wa" spaceChar chapterContentTextContent backSlash "wa" "*" spaceChar? + | newLine? backSlash "+wa" spaceChar chapterContentTextContent backSlash "+wa" "*" spaceChar? | newLine? backSlash "qs" spaceChar chapterContentTextContent backSlash "qs" "*" spaceChar? + | newLine? backSlash "+qs" spaceChar chapterContentTextContent backSlash "+qs" "*" spaceChar? | newLine? backSlash "qac" spaceChar chapterContentTextContent backSlash "qac" "*" spaceChar? + | newLine? backSlash "+qac" spaceChar chapterContentTextContent backSlash "+qac" "*" spaceChar? | newLine? backSlash "litl" spaceChar chapterContentTextContent backSlash "litl" "*" spaceChar? + | newLine? backSlash "+litl" spaceChar chapterContentTextContent backSlash "+litl" "*" spaceChar? | newLine? backSlash "lik" spaceChar chapterContentTextContent backSlash "lik" "*" spaceChar? + | newLine? backSlash "+lik" spaceChar chapterContentTextContent backSlash "+lik" "*" spaceChar? | newLine? backSlash "rq" spaceChar chapterContentTextContent backSlash "rq" "*" spaceChar? + | newLine? backSlash "+rq" spaceChar chapterContentTextContent backSlash "+rq" "*" spaceChar? | newLine? backSlash "ior" spaceChar bookIntroductionTitlesTextContent backSlash "ior" "*" spaceChar? + | newLine? backSlash "+ior" spaceChar bookIntroductionTitlesTextContent backSlash "+ior" "*" spaceChar? @@ -265,4 +295,4 @@ usfmBible{ zNameSpace = newLine? backSlash "z" char* spaceChar? text? (backSlash "z" char* "*" )? } - \ No newline at end of file + From 20ada4105fc5998a0ffba6ef8fa1c72e3e01ddc3 Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Thu, 29 Nov 2018 11:56:23 +0530 Subject: [PATCH 07/37] changed rule: verse, ms and mr --- grammarOperations.js | 4 ++-- usfm.ohm | 19 +++++++++---------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/grammarOperations.js b/grammarOperations.js index f8576874..4b59de45 100644 --- a/grammarOperations.js +++ b/grammarOperations.js @@ -132,7 +132,7 @@ sem.addOperation('composeJson', { return obj }, - verseElement: function (_, _, _, _, verseNumber, verseMeta, verseText, metaScripture, verseTextMore) { + verseElement: function (_, _, _, _, verseNumber, verseMeta, metaScripture, verseText) { let verse ={} verse['number'] = verseNumber.composeJson() if ( verseMeta.sourceString!='' ) { verse['metadata'] = verseMeta.composeJson()} @@ -140,7 +140,7 @@ sem.addOperation('composeJson', { if ( metaScripture.sourceString!='' ) { verse['metadata_inline'] = metaScripture.composeJson()} verse['text'] = verseText.composeJson() - if (verseTextMore.sourceString!='') { verse['text'].concat(verseTextMore.composeJson())} + // if (verseTextMore.sourceString!='') { verse['text'].concat(verseTextMore.composeJson())} return verse }, diff --git a/usfm.ohm b/usfm.ohm index 6b4adaef..638d1f07 100644 --- a/usfm.ohm +++ b/usfm.ohm @@ -37,7 +37,7 @@ usfmBible{ bookIdentification = idElement usfmElement? - bookHeaders = hElement | ideElement | remElement | tocElement | tocaElement | stsElement | mt + bookHeaders = hElement | ideElement | remElement | tocElement | tocaElement | stsElement | mt | mte introduction = ibElement | ieElement | iexElement | ili | imElement | imiElement | imqElement | imt | imte | io @@ -50,12 +50,12 @@ usfmBible{ chapterHeader = cElement (caElement | cpElement | clElement | cdElement)* - metaScripture = sectionHeader | mte | remElement | iexElement | ipElement | spElement | litElement | qaElement | paraElement + metaScripture = sectionHeader | mte | remElement | iexElement | ipElement | spElement | litElement | qaElement | paraElement | notesElement | figureElement | zNameSpace sectionHeader = sectionPreHeader? sectionElement sectionPostHeader* sectionPreHeader = msElement mrElement? sectionPostHeader = srElement | rElement | dElement - verseElement = newLine backSlash "v" spaceChar verseNumber (vaElement | vpElement)* verseText (metaScripture verseText?)* + verseElement = newLine backSlash "v" spaceChar verseNumber (vaElement | vpElement)* (metaScripture? verseText)+ verseNumber = number ("-" number)? spaceChar @@ -123,8 +123,8 @@ usfmBible{ iqElement = newLine backSlash "iq" number? spaceChar bookIntroductionTitlesTextContent isElement = newLine backSlash "is" number? spaceChar bookIntroductionTitlesTextContent - mrElement = newLine backSlash "mr" spaceChar chapterContentTextContent - msElement = newLine backSlash "ms" number? spaceChar chapterContentTextContent + mrElement = newLine backSlash "mr" spaceChar text + msElement = newLine backSlash "ms" number? spaceChar text mt = mtElement+ mtElement = newLine backSlash "mt" number? spaceChar chapterContentTextContent mte = mteElement+ @@ -146,7 +146,7 @@ usfmBible{ feElement = backSlash "fe" spaceChar? footnoteContent* backSlash "fe*"spaceChar? crossrefElement = backSlash ("xt"|"x") spaceChar? crossrefContent* backSlash ("xt*"|"x*") spaceChar? - footnoteContent = text | footnoteContentElement + footnoteContent = text | footnoteContentElement | charElement footnoteContentElement = backSlash "fr" spaceChar | backSlash "fq" spaceChar | backSlash "fqa" spaceChar @@ -161,7 +161,7 @@ usfmBible{ | backSlash "fdc*" spaceChar | backSlash "fm" spaceChar - crossrefContent = text | crossrefContentElement + crossrefContent = text | crossrefContentElement | charElement crossrefContentElement = backSlash "xo" spaceChar | backSlash "xk" spaceChar | backSlash "xq" spaceChar @@ -285,7 +285,7 @@ usfmBible{ bookIntroductionTitlesTextContent = (text | notesElement | charElement | milestoneElement | figureElement | zNameSpace)+ bookTitlesTextContent = (text | notesElement | charElement | zNameSpace)+ - chapterContentTextContent = newLine? (text | notesElement | charElement | milestoneElement| figureElement | table | li | zNameSpace | mte)+ + chapterContentTextContent = newLine? (text | charElement | milestoneElement| table | li | metaScripture)+ bookIntroductionEndTitlesTextContent = (text | notesElement | charElement | milestoneElement | zNameSpace)+ milestoneElement = (backSlash "qt" number? "-s" spaceChar* attributes? backSlash "*" ) @@ -294,5 +294,4 @@ usfmBible{ | (backSlash "ts" number? "-e" spaceChar* attributes? backSlash "*" ) zNameSpace = newLine? backSlash "z" char* spaceChar? text? (backSlash "z" char* "*" )? -} - +} \ No newline at end of file From da3d7a122cf10bd4b9e61c0514fb5c8f43f6df77 Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Thu, 29 Nov 2018 13:32:29 +0530 Subject: [PATCH 08/37] tested cross-refs --- Testing Plan.md | 21 +++++++++++++++++++++ grammarOperations.js | 4 ++-- test/test.js | 2 +- usfm.ohm | 2 +- 4 files changed, 25 insertions(+), 4 deletions(-) diff --git a/Testing Plan.md b/Testing Plan.md index b144f276..4deea815 100644 --- a/Testing Plan.md +++ b/Testing Plan.md @@ -358,9 +358,30 @@ Ensure true cases are being validated successfully, for maximum number of marker ### Test Word and Character Markers: should pass ``` \id MAT 41MATGNT92.SFM, Good News Translation, June 2003 +\is Introduction +\ip \bk The Acts of the Apostles\bk* is a continuation of \bk The Gospel according to Luke\bk* Its chief purpose is... \c 6 \v 14 That is why \bk The Book of the \+nd Lord\+nd*'s Battles\bk* speaks of “...the town of Waheb in the area of ... \v 15 and the slope of the valleys ... +\s1 The Garden of Eden +\p When the \nd Lord\nd* \f + \fr 2.4: \fk the \+nd Lord\+nd*: \ft Where the Hebrew text has Yahweh, traditionally transliterated as Jehovah, this translation employs \+nd Lord\+nd* with capital letters, following a usage which is widespread in English versions.\f* God made the universe, +\v 5 there were no plants on the earth and no seeds had sprouted, because he had not sent any rain, and there was no one to cultivate the land; +\p +\v 29 И нарек ему имя: Ной, сказав: он утешит нас в работе нашей и в трудах рук наших при \add возделывании\add* земли, которую проклял Господь. +\v 3 Él es el resplandor glorioso de Dios,\f c \fr 1.3: \fk Resplandor: \ft Cf. Jn 1.4-9,14\+dc ; también Sab 7.25-26, donde algo parecido se dice de la sabiduría\+dc*.\f* la imagen misma de +\v 9 От Господа спасение. Над народом Твоим благословение Твое. +\lit Слава: +\v 15 Tell the Israelites that I, the \nd Lord\nd*, the God... +\v 2 It began as the prophet Isaiah had written: +\q1 \qt “God said, ‘I will send my messenger ahead of you\qt* +\q2 \qt to open the way for you.’\qt* +\v 18 With my own hand I write this: \sig Greetings from Paul\sig*. Do not... +\v 8 \sls Rehoum, chancelier, et Shimshaï, secrétaire, écrivirent au roi Artaxerxès la lettre suivante concernant Jérusalem, savoir:\sls* +\c 9 +\s1 Jesus Heals a Man // Who Could Not Walk +\r (Mark 2.1-12; Luke 5.17-26) +\v 46 At about three o'clock Jesus cried out with a loud shout, \tl “Eli, Eli, lema sabachthani?”\tl* which means, “My God, my God, why did you +\v 18 At once they left their nets and went with him.\fig At once they left their nets.|src="avnt016.jpg" size="span" ref="1.18"\fig* ``` ## Marker Wise Syntax diff --git a/grammarOperations.js b/grammarOperations.js index 4b59de45..94402868 100644 --- a/grammarOperations.js +++ b/grammarOperations.js @@ -462,8 +462,8 @@ sem.addOperation('composeJson', { return obj }, - figureElement: function(_, _, _, caption, text, attribs, _, _) { - return {'figure': {'caption': caption.sourceString, 'text': text.composeJson(), 'Attributes':attribs.composeJson()}} + figureElement: function(_, _, _, caption, attribs, _, _) { + return {'figure': {'caption': caption.sourceString, 'Attributes':attribs.composeJson()}} }, table: function(header, row) { diff --git a/test/test.js b/test/test.js index f2e48dbb..ffa13510 100644 --- a/test/test.js +++ b/test/test.js @@ -86,7 +86,7 @@ describe('Ensure all true positives', function () { }) it('Cross-reference Markers', function () { - let output = parser.validate('\\id MAT 41MATGNT92.SFM, Good News Translation, June 2003\n\\c 6\n\\v 18 “Why do you call me good?” Jesus asked him. “No one is good except God alone.\n\\v 19 \\x - \\xo 10.19: a \\xt Exo 20.13; Deu 5.17; \\xo b \\xt Exo 20.14; Deu 5.18; \\xo c \\xt Exo 20.15; Deu 5.19; \\xo d \\xt Exo 20.16; Deu 5.20; \\xo e \\xt Exo 20.12; Deu 5.16.\\x* You know the commandments: ‘Do not commit murder...\n\\c 2\n\\cd \\xt 1|GEN 2:1\\xt* Бог благословляет седьмой день; \\xt 8|GEN 2:8\\x* человек в раю Едемском; четыре реки; дерево познания добра и зла. \\xt 18|GEN 2:18\\x* Человек дает названия животным. \\xt 21|GEN 2:21\\xt* Создание женщины.\n\\p\n\\v 1 Так совершены небо и земля и все воинство их.\n\\c 3\n\\s1 The Preaching of John the Baptist\\x - \\xo 3.0 \\xta Compare with \\xt Mk 1.1-8; Lk 3.1-18; \\xta and \\xt Jn 1.19-28 \\xta parallel passages.\\x*\n\\p\n\\v 1 At that time John the Baptist came to...\n\\v 2 \\x - \\xo 1:1 \\xop Гл 1. (1)\\xop* \\xt 4 Царств. 14:25.\\x*И биде слово Господне към Иона, син Аматиев:\n\\v 3 Our God is in heaven;\n\\q2 he does whatever he wishes.\n\\q1\n\\v 4 \\x - \\xo 115.4-8: \\xt Ps 135.15-18; \\xdc Ltj Jr 4-73; \\xt Rev 9.20.\\x* Their gods are made of silver and gold,') + let output = parser.validate('\\id MAT 41MATGNT92.SFM, Good News Translation, June 2003\n\\is Introduction\n\\ip \\bk The Acts of the Apostles\\bk* is a continuation of \\bk The Gospel according to Luke\\bk* Its chief purpose is...\n\\c 6\n\\v 14 That is why \\bk The Book of the \\+nd Lord\\+nd*\'s Battles\\bk* speaks of “...the town of Waheb in the area of ...\n\\v 15 and the slope of the valleys ...\n\\s1 The Garden of Eden\n\\p When the \\nd Lord\\nd* \\f + \\fr 2.4: \\fk the \\+nd Lord\\+nd*: \\ft Where the Hebrew text has Yahweh, traditionally transliterated as Jehovah, this translation employs \\+nd Lord\\+nd* with capital letters, following a usage which is widespread in English versions.\\f* God made the universe,\n\\v 5 there were no plants on the earth and no seeds had sprouted, because he had not sent any rain, and there was no one to cultivate the land;\n\\p\n\\v 29 И нарек ему имя: Ной, сказав: он утешит нас в работе нашей и в трудах рук наших при \\add возделывании\\add* земли, которую проклял Господь.\n\\v 3 Él es el resplandor glorioso de Dios,\\f c \\fr 1.3: \\fk Resplandor: \\ft Cf. Jn 1.4-9,14\\+dc ; también Sab 7.25-26, donde algo parecido se dice de la sabiduría\\+dc*.\\f* la imagen misma de\n\\v 9 От Господа спасение. Над народом Твоим благословение Твое.\n\\lit Слава:\n\\v 15 Tell the Israelites that I, the \\nd Lord\\nd*, the God...\n\\v 2 It began as the prophet Isaiah had written:\n\\q1 \\qt “God said, ‘I will send my messenger ahead of you\\qt*\n\\q2 \\qt to open the way for you.’\\qt*\n\\v 18 With my own hand I write this: \\sig Greetings from Paul\\sig*. Do not...\n\\v 8 \\sls Rehoum, chancelier, et Shimshaï, secrétaire, écrivirent au roi Artaxerxès la lettre suivante concernant Jérusalem, savoir:\\sls*\n\\c 9\n\\s1 Jesus Heals a Man // Who Could Not Walk\n\\r (Mark 2.1-12; Luke 5.17-26)\n\\v 46 At about three o\'clock Jesus cried out with a loud shout, \\tl “Eli, Eli, lema sabachthani?”\\tl* which means, “My God, my God, why did you \n\\v 18 At once they left their nets and went with him.\\fig At once they left their nets.|src="avnt016.jpg" size="span" ref="1.18"\\fig*') assert.strictEqual(output, true) }) }) diff --git a/usfm.ohm b/usfm.ohm index 638d1f07..44e8aa23 100644 --- a/usfm.ohm +++ b/usfm.ohm @@ -260,7 +260,7 @@ usfmBible{ namedAttributeList = "|" (text "=\"" attributeName "\"" spaceChar?)+ attributeName = char+ - figureElement = backSlash "fig" spaceChar caption chapterContentTextContent attributes? backSlash "fig*" + figureElement = backSlash "fig" spaceChar caption attributes? backSlash "fig*" caption = text table = headerRow? row+ From 0c67ba204ba52cc1b99ed251e7072ab951ef7510 Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Thu, 29 Nov 2018 13:43:51 +0530 Subject: [PATCH 09/37] tested word and character markers --- test/test.js | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/test.js b/test/test.js index ffa13510..489649c8 100644 --- a/test/test.js +++ b/test/test.js @@ -86,6 +86,11 @@ describe('Ensure all true positives', function () { }) it('Cross-reference Markers', function () { + let output = parser.validate('\\id MAT 41MATGNT92.SFM, Good News Translation, June 2003\n\\c 6\n\\v 18 “Why do you call me good?” Jesus asked him. “No one is good except God alone.\n\\v 19 \\x - \\xo 10.19: a \\xt Exo 20.13; Deu 5.17; \\xo b \\xt Exo 20.14; Deu 5.18; \\xo c \\xt Exo 20.15; Deu 5.19; \\xo d \\xt Exo 20.16; Deu 5.20; \\xo e \\xt Exo 20.12; Deu 5.16.\\x* You know the commandments: ‘Do not commit murder...\n\\c 2\n\\cd \\xt 1|GEN 2:1\\xt* Бог благословляет седьмой день; \\xt 8|GEN 2:8\\x* человек в раю Едемском; четыре реки; дерево познания добра и зла. \\xt 18|GEN 2:18\\x* Человек дает названия животным. \\xt 21|GEN 2:21\\xt* Создание женщины.\n\\p\n\\v 1 Так совершены небо и земля и все воинство их.\n\\c 3\n\\s1 The Preaching of John the Baptist\\x - \\xo 3.0 \\xta Compare with \\xt Mk 1.1-8; Lk 3.1-18; \\xta and \\xt Jn 1.19-28 \\xta parallel passages.\\x*\n\\p\n\\v 1 At that time John the Baptist came to...\n\\v 2 \\x - \\xo 1:1 \\xop Гл 1. (1)\\xop* \\xt 4 Царств. 14:25.\\x*И биде слово Господне към Иона, син Аматиев:\n\\v 3 Our God is in heaven;\n\\q2 he does whatever he wishes.\n\\q1\n\\v 4 \\x - \\xo 115.4-8: \\xt Ps 135.15-18; \\xdc Ltj Jr 4-73; \\xt Rev 9.20.\\x* Their gods are made of silver and gold,') + assert.strictEqual(output, true) + }) + + it('Word and Character Markers', function () { let output = parser.validate('\\id MAT 41MATGNT92.SFM, Good News Translation, June 2003\n\\is Introduction\n\\ip \\bk The Acts of the Apostles\\bk* is a continuation of \\bk The Gospel according to Luke\\bk* Its chief purpose is...\n\\c 6\n\\v 14 That is why \\bk The Book of the \\+nd Lord\\+nd*\'s Battles\\bk* speaks of “...the town of Waheb in the area of ...\n\\v 15 and the slope of the valleys ...\n\\s1 The Garden of Eden\n\\p When the \\nd Lord\\nd* \\f + \\fr 2.4: \\fk the \\+nd Lord\\+nd*: \\ft Where the Hebrew text has Yahweh, traditionally transliterated as Jehovah, this translation employs \\+nd Lord\\+nd* with capital letters, following a usage which is widespread in English versions.\\f* God made the universe,\n\\v 5 there were no plants on the earth and no seeds had sprouted, because he had not sent any rain, and there was no one to cultivate the land;\n\\p\n\\v 29 И нарек ему имя: Ной, сказав: он утешит нас в работе нашей и в трудах рук наших при \\add возделывании\\add* земли, которую проклял Господь.\n\\v 3 Él es el resplandor glorioso de Dios,\\f c \\fr 1.3: \\fk Resplandor: \\ft Cf. Jn 1.4-9,14\\+dc ; también Sab 7.25-26, donde algo parecido se dice de la sabiduría\\+dc*.\\f* la imagen misma de\n\\v 9 От Господа спасение. Над народом Твоим благословение Твое.\n\\lit Слава:\n\\v 15 Tell the Israelites that I, the \\nd Lord\\nd*, the God...\n\\v 2 It began as the prophet Isaiah had written:\n\\q1 \\qt “God said, ‘I will send my messenger ahead of you\\qt*\n\\q2 \\qt to open the way for you.’\\qt*\n\\v 18 With my own hand I write this: \\sig Greetings from Paul\\sig*. Do not...\n\\v 8 \\sls Rehoum, chancelier, et Shimshaï, secrétaire, écrivirent au roi Artaxerxès la lettre suivante concernant Jérusalem, savoir:\\sls*\n\\c 9\n\\s1 Jesus Heals a Man // Who Could Not Walk\n\\r (Mark 2.1-12; Luke 5.17-26)\n\\v 46 At about three o\'clock Jesus cried out with a loud shout, \\tl “Eli, Eli, lema sabachthani?”\\tl* which means, “My God, my God, why did you \n\\v 18 At once they left their nets and went with him.\\fig At once they left their nets.|src="avnt016.jpg" size="span" ref="1.18"\\fig*') assert.strictEqual(output, true) }) From 6f10918d2ffae2358e63269031159ac8d1569bb8 Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Thu, 29 Nov 2018 14:55:08 +0530 Subject: [PATCH 10/37] added esb, ef and ex markers --- usfm.ohm | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/usfm.ohm b/usfm.ohm index 44e8aa23..caf90786 100644 --- a/usfm.ohm +++ b/usfm.ohm @@ -37,11 +37,11 @@ usfmBible{ bookIdentification = idElement usfmElement? - bookHeaders = hElement | ideElement | remElement | tocElement | tocaElement | stsElement | mt | mte + bookHeaders = hElement | ideElement | remElement | tocElement | tocaElement | stsElement | mt | mte | esbElement introduction = ibElement | ieElement | iexElement | ili | imElement | imiElement | imqElement | imt | imte | io - | iotElement | ipiElement | ipqElement | iprElement | ipElement | iqElement | isElement | remElement + | iotElement | ipiElement | ipqElement | iprElement | ipElement | iqElement | isElement | remElement | esbElement bookChapterLabel = clElement @@ -141,10 +141,11 @@ usfmBible{ notesElement = footnoteElement | crossrefElement - footnoteElement = fElement | feElement + footnoteElement = fElement | feElement | efElement fElement = backSlash "f" spaceChar? footnoteContent* backSlash "f*" spaceChar? feElement = backSlash "fe" spaceChar? footnoteContent* backSlash "fe*"spaceChar? - crossrefElement = backSlash ("xt"|"x") spaceChar? crossrefContent* backSlash ("xt*"|"x*") spaceChar? + efElement = backSlash "ef" spaceChar? footnoteContent* backSlash "ef*"spaceChar? + crossrefElement = backSlash ("xt"|"x" | "ex") spaceChar? crossrefContent* backSlash ("xt*"|"x*" | "ex") spaceChar? footnoteContent = text | footnoteContentElement | charElement footnoteContentElement = backSlash "fr" spaceChar @@ -285,8 +286,8 @@ usfmBible{ bookIntroductionTitlesTextContent = (text | notesElement | charElement | milestoneElement | figureElement | zNameSpace)+ bookTitlesTextContent = (text | notesElement | charElement | zNameSpace)+ - chapterContentTextContent = newLine? (text | charElement | milestoneElement| table | li | metaScripture)+ - bookIntroductionEndTitlesTextContent = (text | notesElement | charElement | milestoneElement | zNameSpace)+ + chapterContentTextContent = newLine? (text | charElement | milestoneElement| table | li | metaScripture | esbElement)+ + bookIntroductionEndTitlesTextContent = (text | notesElement | charElement | milestoneElement | zNameSpace | esbElement)+ milestoneElement = (backSlash "qt" number? "-s" spaceChar* attributes? backSlash "*" ) | (backSlash "qt" number? "-e" spaceChar* attributes? backSlash "*" ) @@ -294,4 +295,6 @@ usfmBible{ | (backSlash "ts" number? "-e" spaceChar* attributes? backSlash "*" ) zNameSpace = newLine? backSlash "z" char* spaceChar? text? (backSlash "z" char* "*" )? + + esbElement = newLine backSlash "esb" spaceChar? chapterContentTextContent+ newLine? backSlash "esbe" spaceChar? } \ No newline at end of file From c667592b32cc9b24e3de70146991a62c5b842428 Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Thu, 29 Nov 2018 15:08:01 +0530 Subject: [PATCH 11/37] tested extented markers and markers with links and attributes --- Testing Plan.md | 17 +++++++++++++++++ grammarOperations.js | 4 ++++ test/test.js | 5 +++++ usfm.ohm | 3 +++ 4 files changed, 29 insertions(+) diff --git a/Testing Plan.md b/Testing Plan.md index 4deea815..7980da38 100644 --- a/Testing Plan.md +++ b/Testing Plan.md @@ -384,6 +384,23 @@ Ensure true cases are being validated successfully, for maximum number of marker \v 18 At once they left their nets and went with him.\fig At once they left their nets.|src="avnt016.jpg" size="span" ref="1.18"\fig* ``` +### Test Markers with Attribtes : Should pass +``` +\id MAT 41MATGNT92.SFM, Good News Translation, June 2003 +\c 1 +\v 1 +\q1 “Someone is shouting in the desert, +\q2 ‘Prepare a road for the Lord; +\q2 make a straight path for him to travel!’ ” +\esb \cat People\cat* +\ms \jmp |link-id="article-john_the_baptist"\jmp*John the Baptist +\p John is sometimes called the last “Old Testament prophet” because of the warnings he brought about God's judgment and because he announced the coming of God's “Chosen One” (Messiah). +\esbe +\p +\v 2-6 From Abraham to King David, the following ancestors are listed: Abraham,...mother was \jmp Ruth|link-href="#article-Ruth"\jmp*), Jesse, and King David. +\w gracious|link-href="http://bibles.org/search/grace/eng-GNTD/all"\w* +``` + ## Marker Wise Syntax Check the behaviour of the parser/validator are proper under these situations where internal structure of a marker needs to be validated diff --git a/grammarOperations.js b/grammarOperations.js index 94402868..04dbc962 100644 --- a/grammarOperations.js +++ b/grammarOperations.js @@ -561,6 +561,10 @@ sem.addOperation('composeJson', { text: function(words) { return words.sourceString + }, + + esbElement: function (_, _, _, _, content, _, _, _, _) { + return {'esb' : content.composeJson()} } }) diff --git a/test/test.js b/test/test.js index 489649c8..0a019ef6 100644 --- a/test/test.js +++ b/test/test.js @@ -94,4 +94,9 @@ describe('Ensure all true positives', function () { let output = parser.validate('\\id MAT 41MATGNT92.SFM, Good News Translation, June 2003\n\\is Introduction\n\\ip \\bk The Acts of the Apostles\\bk* is a continuation of \\bk The Gospel according to Luke\\bk* Its chief purpose is...\n\\c 6\n\\v 14 That is why \\bk The Book of the \\+nd Lord\\+nd*\'s Battles\\bk* speaks of “...the town of Waheb in the area of ...\n\\v 15 and the slope of the valleys ...\n\\s1 The Garden of Eden\n\\p When the \\nd Lord\\nd* \\f + \\fr 2.4: \\fk the \\+nd Lord\\+nd*: \\ft Where the Hebrew text has Yahweh, traditionally transliterated as Jehovah, this translation employs \\+nd Lord\\+nd* with capital letters, following a usage which is widespread in English versions.\\f* God made the universe,\n\\v 5 there were no plants on the earth and no seeds had sprouted, because he had not sent any rain, and there was no one to cultivate the land;\n\\p\n\\v 29 И нарек ему имя: Ной, сказав: он утешит нас в работе нашей и в трудах рук наших при \\add возделывании\\add* земли, которую проклял Господь.\n\\v 3 Él es el resplandor glorioso de Dios,\\f c \\fr 1.3: \\fk Resplandor: \\ft Cf. Jn 1.4-9,14\\+dc ; también Sab 7.25-26, donde algo parecido se dice de la sabiduría\\+dc*.\\f* la imagen misma de\n\\v 9 От Господа спасение. Над народом Твоим благословение Твое.\n\\lit Слава:\n\\v 15 Tell the Israelites that I, the \\nd Lord\\nd*, the God...\n\\v 2 It began as the prophet Isaiah had written:\n\\q1 \\qt “God said, ‘I will send my messenger ahead of you\\qt*\n\\q2 \\qt to open the way for you.’\\qt*\n\\v 18 With my own hand I write this: \\sig Greetings from Paul\\sig*. Do not...\n\\v 8 \\sls Rehoum, chancelier, et Shimshaï, secrétaire, écrivirent au roi Artaxerxès la lettre suivante concernant Jérusalem, savoir:\\sls*\n\\c 9\n\\s1 Jesus Heals a Man // Who Could Not Walk\n\\r (Mark 2.1-12; Luke 5.17-26)\n\\v 46 At about three o\'clock Jesus cried out with a loud shout, \\tl “Eli, Eli, lema sabachthani?”\\tl* which means, “My God, my God, why did you \n\\v 18 At once they left their nets and went with him.\\fig At once they left their nets.|src="avnt016.jpg" size="span" ref="1.18"\\fig*') assert.strictEqual(output, true) }) + + it('Markers with attributes, links and extended content markers', function () { + let output = parser.validate('\\id MAT 41MATGNT92.SFM, Good News Translation, June 2003\n\\c 1\n\\v 1 \n\\q1 “Someone is shouting in the desert,\n\\q2 ‘Prepare a road for the Lord;\n\\q2 make a straight path for him to travel!’ ”\n\\esb \\cat People\\cat*\n\\ms \\jmp |link-id="article-john_the_baptist"\\jmp*John the Baptist\n\\p John is sometimes called the last “Old Testament prophet” because of the warnings he brought about God\'s judgment and because he announced the coming of God\'s “Chosen One” (Messiah).\n\\esbe\n\\p \n\\v 2-6 From Abraham to King David, the following ancestors are listed: Abraham,...mother was \\jmp Ruth|link-href="#article-Ruth"\\jmp*), Jesse, and King David.\n\\w gracious|link-href="http://bibles.org/search/grace/eng-GNTD/all"\\w*') + assert.strictEqual(output, true) + }) }) diff --git a/usfm.ohm b/usfm.ohm index caf90786..491ed85b 100644 --- a/usfm.ohm +++ b/usfm.ohm @@ -244,6 +244,8 @@ usfmBible{ | newLine? backSlash "+rq" spaceChar chapterContentTextContent backSlash "+rq" "*" spaceChar? | newLine? backSlash "ior" spaceChar bookIntroductionTitlesTextContent backSlash "ior" "*" spaceChar? | newLine? backSlash "+ior" spaceChar bookIntroductionTitlesTextContent backSlash "+ior" "*" spaceChar? + | newLine? backSlash "cat" spaceChar bookIntroductionTitlesTextContent backSlash "cat" "*" spaceChar? + | newLine? backSlash "+cat" spaceChar bookIntroductionTitlesTextContent backSlash "+cat" "*" spaceChar? @@ -297,4 +299,5 @@ usfmBible{ zNameSpace = newLine? backSlash "z" char* spaceChar? text? (backSlash "z" char* "*" )? esbElement = newLine backSlash "esb" spaceChar? chapterContentTextContent+ newLine? backSlash "esbe" spaceChar? + } \ No newline at end of file From 2fcf300e8036f95ed602915c1ff17560199cbe3b Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Thu, 6 Dec 2018 15:57:50 +0530 Subject: [PATCH 12/37] tested milestones and namespaces --- Testing Plan.md | 28 ++++++++++++++++++++++++++++ grammarOperations.js | 6 +++--- test/test.js | 5 +++++ usfm.ohm | 15 ++++++--------- 4 files changed, 42 insertions(+), 12 deletions(-) diff --git a/Testing Plan.md b/Testing Plan.md index 7980da38..1f28d2e5 100644 --- a/Testing Plan.md +++ b/Testing Plan.md @@ -401,6 +401,34 @@ Ensure true cases are being validated successfully, for maximum number of marker \w gracious|link-href="http://bibles.org/search/grace/eng-GNTD/all"\w* ``` +### Test Milestones: should pass +``` +\id MAT 41MATGNT92.SFM, Good News Translation, June 2003 +\c 1 +\v 1 +\q1 “Someone is shouting in the desert, +\qt-s |sid="qt_123" who="Pilate"\*“Are you the king of the Jews?”\qt-e |eid="qt_123"\* +\zms\* +\v 11 Jesus stood before the Roman governor, who questioned him. \qt-s |who="Pilate"\*“Are you the king of the Jews?”\qt-e\* he asked. +\p \qt-s |who="Jesus"\*“So you say,”\qt-e\* answered Jesus. +\v 12 But he said nothing in response to the accusations of the chief priests and elders. +\p +\v 13 So Pilate said to him, \qt-s |who="Pilate"\*“Don't you hear all these things they accuse you of?”\qt-e\* +\p +\v 14 But Jesus refused to answer ... +\ts\* +\p +\v 5 Now I wish to remind you, although... +\ts-s|sid="ts_JUD_5-6"\* +\p +\v 5 Now I wish to remind you, although you know everything, that the Lord once saved a +people out of the land of Egypt, but that afterward he destroyed those who did not believe. +\v 6 And angels who did not keep to their own principality, but left their proper dwelling +place—God has kept them in everlasting chains in darkness for the judgment of the +great day. +\ts-e|eid="ts_JUD_5-6"\* +``` + ## Marker Wise Syntax Check the behaviour of the parser/validator are proper under these situations where internal structure of a marker needs to be validated diff --git a/grammarOperations.js b/grammarOperations.js index 04dbc962..c2295236 100644 --- a/grammarOperations.js +++ b/grammarOperations.js @@ -547,7 +547,7 @@ sem.addOperation('composeJson', { return text }, - milestoneElement: function(_, ms, num, s_e, _, attribs, _, _) { + milestoneElement: function(_,_, ms, num, s_e, _, attribs, _, _, _) { milestoneElement = {} milestoneElement['milestone'] = ms.sourceString milestoneElement['start/end'] = s_e.sourceString @@ -555,8 +555,8 @@ sem.addOperation('composeJson', { return milestoneElement }, - zNameSpace: function(_, _, _, namespace, _, text, _, _, _, _) { - return {'namespace': namespace.sourceString, 'Content':text.sourceString} + zNameSpace: function(_, _, _, namespace, _, text, _, _) { + return {'namespace': "z"+namespace.sourceString, 'Content':text.sourceString} }, text: function(words) { diff --git a/test/test.js b/test/test.js index 0a019ef6..c58559f8 100644 --- a/test/test.js +++ b/test/test.js @@ -99,4 +99,9 @@ describe('Ensure all true positives', function () { let output = parser.validate('\\id MAT 41MATGNT92.SFM, Good News Translation, June 2003\n\\c 1\n\\v 1 \n\\q1 “Someone is shouting in the desert,\n\\q2 ‘Prepare a road for the Lord;\n\\q2 make a straight path for him to travel!’ ”\n\\esb \\cat People\\cat*\n\\ms \\jmp |link-id="article-john_the_baptist"\\jmp*John the Baptist\n\\p John is sometimes called the last “Old Testament prophet” because of the warnings he brought about God\'s judgment and because he announced the coming of God\'s “Chosen One” (Messiah).\n\\esbe\n\\p \n\\v 2-6 From Abraham to King David, the following ancestors are listed: Abraham,...mother was \\jmp Ruth|link-href="#article-Ruth"\\jmp*), Jesse, and King David.\n\\w gracious|link-href="http://bibles.org/search/grace/eng-GNTD/all"\\w*') assert.strictEqual(output, true) }) + + it('Milestone markers', function () { + let output = parser.validate('\\id MAT 41MATGNT92.SFM, Good News Translation, June 2003\n\\c 1\n\\v 1 \n\\q1 “Someone is shouting in the desert,\n\\qt-s |sid="qt_123" who="Pilate"\\*“Are you the king of the Jews?”\\qt-e |eid="qt_123"\\*\n\\zms\\*\n\\v 11 Jesus stood before the Roman governor, who questioned him. \\qt-s |who="Pilate"\\*“Are you the king of the Jews?”\\qt-e\\* he asked.\n\\p \\qt-s |who="Jesus"\\*“So you say,”\\qt-e\\* answered Jesus.\n\\v 12 But he said nothing in response to the accusations of the chief priests and elders.\n\\p\n\\v 13 So Pilate said to him, \\qt-s |who="Pilate"\\*“Don\'t you hear all these things they accuse you of?”\\qt-e\\*\n\\p\n\\v 14 But Jesus refused to answer ...\n\\ts\\*\n\\p\n\\v 5 Now I wish to remind you, although...\n\\ts-s|sid="ts_JUD_5-6"\\*\n\\p\n\\v 5 Now I wish to remind you, although you know everything, that the Lord once saved a\npeople out of the land of Egypt, but that afterward he destroyed those who did not believe.\n\\v 6 And angels who did not keep to their own principality, but left their proper dwelling\nplace—God has kept them in everlasting chains in darkness for the judgment of the\ngreat day.\n\\ts-e|eid="ts_JUD_5-6"\\*') + assert.strictEqual(output, true) + }) }) diff --git a/usfm.ohm b/usfm.ohm index 491ed85b..84f6140f 100644 --- a/usfm.ohm +++ b/usfm.ohm @@ -258,10 +258,7 @@ usfmBible{ - attributes = namedAttributeList | defaultAttribute - defaultAttribute = "|" "\"" attributeName "\"" - namedAttributeList = "|" (text "=\"" attributeName "\"" spaceChar?)+ - attributeName = char+ + attributes = "|" ( ~backSlash any)+ figureElement = backSlash "fig" spaceChar caption attributes? backSlash "fig*" caption = text @@ -291,12 +288,12 @@ usfmBible{ chapterContentTextContent = newLine? (text | charElement | milestoneElement| table | li | metaScripture | esbElement)+ bookIntroductionEndTitlesTextContent = (text | notesElement | charElement | milestoneElement | zNameSpace | esbElement)+ - milestoneElement = (backSlash "qt" number? "-s" spaceChar* attributes? backSlash "*" ) - | (backSlash "qt" number? "-e" spaceChar* attributes? backSlash "*" ) - | (backSlash "ts" number? "-s" spaceChar* attributes? backSlash "*" ) - | (backSlash "ts" number? "-e" spaceChar* attributes? backSlash "*" ) + milestoneElement = (newLine? backSlash "qt" number? "-s"? spaceChar* attributes? backSlash "*" spaceChar?) + | (newLine? backSlash "qt" number? "-e"? spaceChar* attributes? backSlash "*" spaceChar?) + | (newLine? backSlash "ts" number? "-s"? spaceChar* attributes? backSlash "*" spaceChar?) + | (newLine? backSlash "ts" number? "-e"? spaceChar* attributes? backSlash "*" spaceChar?) - zNameSpace = newLine? backSlash "z" char* spaceChar? text? (backSlash "z" char* "*" )? + zNameSpace = newLine? backSlash "z" char* spaceChar? text? (backSlash "*" )? esbElement = newLine backSlash "esb" spaceChar? chapterContentTextContent+ newLine? backSlash "esbe" spaceChar? From 5a769e89ba5090c3d4d82ecefec1c437f4a34555 Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Thu, 6 Dec 2018 16:29:14 +0530 Subject: [PATCH 13/37] fixed 'crash on match failure' --- grammarOperations.js | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/grammarOperations.js b/grammarOperations.js index c2295236..634ae87c 100644 --- a/grammarOperations.js +++ b/grammarOperations.js @@ -570,11 +570,11 @@ sem.addOperation('composeJson', { }) exports.match = function (str) { - // try { - let matchObj = bib.match(str) + try { + var matchObj = bib.match(str) let adaptor = sem(matchObj) return adaptor.composeJson() - // } catch (err) { - // return err - // } + } catch (err) { + return matchObj + } } From 6f4541a5a0d8743f86bb81adbe600a820c62b9e9 Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Fri, 7 Dec 2018 10:16:12 +0530 Subject: [PATCH 14/37] updated for publishing --- package.json | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/package.json b/package.json index c861121b..eb6ef3f5 100644 --- a/package.json +++ b/package.json @@ -1,4 +1,8 @@ { + "name": "usfm-grammar", + "description": "A simple usfm parser/validator that uses a grammar to model the usfm syntax. The grammar is written in ohm-js(https://ohmlang.github.io/). The USFM3.0 syntax is supported. The parser outputs the USFM content in a json structure which gives importance to the easy extraction of scripture content from the mark-ups and additional usfm contents.", + "version": "0.1.0", + "main": "parser.js", "scripts": { "test": "mocha", "start": "./server.js" @@ -7,5 +11,16 @@ "formidable": "^1.2.1", "ohm-js": "^0.14.0", "stringify-object": "^3.3.0" - } + }, + "repository": { + "type": "git", + "url": "https://github.com/Bridgeconn/usfm-grammar" + }, + "keywords": ["usfm", "mark-up", "parser", "ohm", "syntax validator", "json"], + "author": "https://github.com/kavitharaju and https://github.com/joelthe1", + "license": "GNU General Public License v3.0", + "bugs": { + "url": "https://github.com/Bridgeconn/usfm-grammar/issues" + }, + "homepage": "https://github.com/Bridgeconn/usfm-grammar" } From 51b797fa0f218b3c90b250c6ace2cb31068517e0 Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Fri, 7 Dec 2018 10:18:39 +0530 Subject: [PATCH 15/37] removed console output of matchObj --- parser.js | 1 - 1 file changed, 1 deletion(-) diff --git a/parser.js b/parser.js index bee14434..2b730ad5 100644 --- a/parser.js +++ b/parser.js @@ -44,7 +44,6 @@ exports.validate = function (str) { try { // Matching the input with grammar and obtaining the JSON output string let matchObj = match(str) - console.log(stringifyObject(matchObj)) output = true } catch (err) { output = false From 7b8ff129dbed6d3d6a5be313200ea4ab06538739 Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Fri, 7 Dec 2018 10:24:00 +0530 Subject: [PATCH 16/37] rewrote validate function --- parser.js | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/parser.js b/parser.js index 2b730ad5..8dfe2685 100644 --- a/parser.js +++ b/parser.js @@ -40,14 +40,11 @@ exports.parse = function (str) { } exports.validate = function (str) { - let output = '' - try { - // Matching the input with grammar and obtaining the JSON output string - let matchObj = match(str) - output = true - } catch (err) { - output = false + // Matching the input with grammar and obtaining the JSON output string + let matchObj = match(str) + if (matchObj.hasOwnProperty('_rightmostFailures')) { + return false + } else { + return true } - - return output } From 81e4972d99ee49b421a7e44465e0fab013b4573a Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Fri, 7 Dec 2018 10:32:02 +0530 Subject: [PATCH 17/37] enabled starting server with 'npm start' --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index eb6ef3f5..ec17ea4e 100644 --- a/package.json +++ b/package.json @@ -5,7 +5,7 @@ "main": "parser.js", "scripts": { "test": "mocha", - "start": "./server.js" + "start": "node server.js" }, "dependencies": { "formidable": "^1.2.1", From 2442a32ebbf1e46202a2493c50f6080946e65f2d Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Fri, 7 Dec 2018 11:24:37 +0530 Subject: [PATCH 18/37] updated description for publishing --- README.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 6a0c6966..8a3cfc6f 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,6 @@ # USFM Parser -A library that validates USFM files. -Uses [ohm-js](https://github.com/harc/ohm) for grammar implementation and validation. +A simple usfm parser/validator that uses a grammar to model the usfm syntax. The grammar is written in ohm-js(https://ohmlang.github.io/). The USFM3.0 syntax is supported. The parser outputs the USFM content in a json structure which gives importance to the easy extraction of scripture content from the mark-ups and additional usfm contents. Implemented in Node.js # Current implementation @@ -13,11 +12,11 @@ Implemented in Node.js Node server Node modules -`http, fs, formidable, ohm-js, path` +`http, fs, formidable, ohm-js` # Install and Run From the project directory, start the server, as -`node server.js` - -from browser, access +`node server.js` or `npm start` + +and from browser, access http://localhost:8080/index.html From cc959a87e65f7743e14a203f5892c99df8a9e041 Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Mon, 10 Dec 2018 11:29:06 +0530 Subject: [PATCH 19/37] edited documentations for publishing --- Disclaimer.md | 53 ++++++++++++++++++++++++++++++++++++ README.md | 2 +- Testing Plan.md | 72 +++---------------------------------------------- usfm.ohm | 2 +- 4 files changed, 59 insertions(+), 70 deletions(-) create mode 100644 Disclaimer.md diff --git a/Disclaimer.md b/Disclaimer.md new file mode 100644 index 00000000..34b86174 --- /dev/null +++ b/Disclaimer.md @@ -0,0 +1,53 @@ +## Document Structure +The USFM document structure is validated by the grammar. These are the basic document level criteria we check for + +* The document starts with an id marker +* The id and usfm marker which follows it, if present, constitutes the *identification* section +* Next section is *Book headers*. The following tags may come within the section; +> * ide +> * sts +> * h +> * toc +> * toca +> * mt +> * mte +> * esb +* This is to be followed by an Introduction section which can contain +> * ib +> * ie +> * iex +> * ili +> * im +> * imi +> * imq +> * imt +> * imte +> * io +> * iot +> * ipi +> * ipq +> * ipr +> * ip +> * iq +> * is +> * rem +> * esb +* Following the above 3 metadata sections, there will be multiple chapters marked by c +* Within Chapter,at its starting, we may have a set of metacontents +> * cl(may also come immediately above the first chapter(c)) +> * ca +> * cp +> * cd +* After the chapter metacontents, there comes the actual scripture plus some additional meta-Scripture contents(like sections, footnotes). The Following sections list the possiblities in the chapters content +> * v, va, vp +> * s, ms, mr, sr, r, d, sd +> * po, m, pr, cls, pmo, pm, pmc, pmr, pmi, nb, pc, b, pb, qr, qc, qd, lh, lf, p, pi, ph, q, qm, lim (treated as empty markers, and content treated along with v) +> * footnotes +> * cross references +> * fig +> * table, tr, th, thr, tc, tcr +> * li +> * lit +> * character markers: add, bk, dc, k, nd, ord, pn, png, addpn, qt, sig, sls, tl, wj, em, bd, it, bdit, no sc, sup, ndx, wg, wh, wa, qs, qac, litl, lik, rq, ior, cat, rb, w, jmp, liv +> * namespaces: z* +> * milestones: qt-s, qt-e, ts-s, ts-e diff --git a/README.md b/README.md index 8a3cfc6f..42dd8083 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Implemented in Node.js # Current implementation 1. Parse 2. Validate -(Only validates the internal structure of a set of markers and extracts their components as JSON.) +(Only validates the overall document structure and the internal structure of, a set of main markers relevant for scripture content.) # Dependancies Node server diff --git a/Testing Plan.md b/Testing Plan.md index 1f28d2e5..e9401e46 100644 --- a/Testing Plan.md +++ b/Testing Plan.md @@ -1,5 +1,8 @@ # Testing Plan +to run these tests included in the test.js file, +run the command `npm test` from the *usfm-grammar* directory + ## Mandatory Markers Check for the presence of the mandatory markers without which a USFM file will not be valid. @@ -429,7 +432,7 @@ great day. \ts-e|eid="ts_JUD_5-6"\* ``` -## Marker Wise Syntax +## Marker Wise Syntax(to be tested) Check the behaviour of the parser/validator are proper under these situations where internal structure of a marker needs to be validated * The markers with/without certain arguments where its optional @@ -487,70 +490,3 @@ Check the behaviour of the parser/validator are proper under these situations wh -## Document Structure -There is a somewhat loose structure defined for a valid USFM file. Check for these criteria - -* starts with an id -* ide comes just beneath id, if present -* All the following markers come before *chapter* start(*c*) and after *identification* -> * mt# -> * mte# -> * h -> * imt# -> * is# -> * ip -> * ipi -> * im -> * imi -> * ipq -> * imq -> * ipr -> * iq# -> * ib -> * ili -> * iot -> * io# -> * imte# -> * iex -> * cl - -* ca..ca\* comes just beneath *c*, if present -* The following markers occur after *chapter* start and before *parapgraph* start -> * ms# -> * mr -> * s# -> * sr -> * r -> * d -> * cl -> * cd - - -* There will be one or more *p* after a *c* -* There will be one or more *v* after a *p* -* Markers possible after(within) *v* and considered inline -> * rq...\*rq -> * q# -> * va...\*va -> * vp...\*vp - -* Markers that occurs within *p* along with *v* -> * sp -> * sd# -> * iex - -* There are markers that occur within text content of parent markers(**In-line**) -> * ior...ior\* within the *io#* text -> * va...va\* within *v* -> * vp...vp\* within *v* -> * bk - -## Parse Structure -Ensure that the struture is paresed correctly to the required JSON structure - -* The id, ide tags come within *identification* section -* The markers go to their repective *introduction, title, heading and label* sections -* *Chapter* becomes a parent object enclosing *sections,paragraphs* - - - diff --git a/usfm.ohm b/usfm.ohm index 84f6140f..66a89ca4 100644 --- a/usfm.ohm +++ b/usfm.ohm @@ -33,7 +33,7 @@ usfmBible{ content = chapter+ - metaData = bookIdentification bookHeaders* introduction* bookChapterLabel? + metaData = bookIdentification bookHeaders* introduction* bookChapterLabel? bookIdentification = idElement usfmElement? From 01f01516e7bf44faf3c1f77e07416bab53b0e9a8 Mon Sep 17 00:00:00 2001 From: Joel Mathew Date: Sun, 9 Dec 2018 22:18:46 -0800 Subject: [PATCH 20/37] Clean-up readme. --- README.md | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 42dd8083..0e9ed9df 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,20 @@ # USFM Parser -A simple usfm parser/validator that uses a grammar to model the usfm syntax. The grammar is written in ohm-js(https://ohmlang.github.io/). The USFM3.0 syntax is supported. The parser outputs the USFM content in a json structure which gives importance to the easy extraction of scripture content from the mark-ups and additional usfm contents. -Implemented in Node.js +A USFM parser/validator that uses [Parsing expression grammar](https://en.wikipedia.org/wiki/Parsing_expression_grammar) to model the USFM structure. It is implemented using [ohm.js](https://ohmlang.github.io/) and supports the [USFM 3.0](https://github.com/ubsicap/usfm/releases/tag/v3.0.0) specification. This library parses the USFM content into a JSON amenable to hassle-free extraction of scripture text. -# Current implementation +## Functions Available 1. Parse 2. Validate (Only validates the overall document structure and the internal structure of, a set of main markers relevant for scripture content.) -# Dependancies -Node server +## Dependancies +[Node](https://nodejs.org/en/download/) -Node modules -`http, fs, formidable, ohm-js` -# Install and Run -From the project directory, start the server, as -`node server.js` or `npm start` +## Setup Standalone Server +1. Clone this repository +2. From within the main directory +`npm start` + +3. On the browser go to http://localhost:8080/index.html -and from browser, access -http://localhost:8080/index.html From dc01cf81d06ab747f854452cb0a148c6665fc3d0 Mon Sep 17 00:00:00 2001 From: Joel Mathew Date: Sun, 9 Dec 2018 22:20:25 -0800 Subject: [PATCH 21/37] Update subheading. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0e9ed9df..3875ec60 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ A USFM parser/validator that uses [Parsing expression grammar](https://en.wikipedia.org/wiki/Parsing_expression_grammar) to model the USFM structure. It is implemented using [ohm.js](https://ohmlang.github.io/) and supports the [USFM 3.0](https://github.com/ubsicap/usfm/releases/tag/v3.0.0) specification. This library parses the USFM content into a JSON amenable to hassle-free extraction of scripture text. -## Functions Available +## API Documentation 1. Parse 2. Validate (Only validates the overall document structure and the internal structure of, a set of main markers relevant for scripture content.) From 5c3d36e28a39321ebfd1f40e89df5ccf30d94082 Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Thu, 13 Dec 2018 12:24:52 +0530 Subject: [PATCH 22/37] Add design limitations in disclaimer --- Disclaimer.md | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/Disclaimer.md b/Disclaimer.md index 34b86174..eed1c6fa 100644 --- a/Disclaimer.md +++ b/Disclaimer.md @@ -1,5 +1,9 @@ +# Disclaimer for usfm-grammar Beta-Release 0.1.0 + ## Document Structure -The USFM document structure is validated by the grammar. These are the basic document level criteria we check for + +We have refered the USFM 3.0 specifications along with the USX documenations to arrive at a stucture definition for the langauge. +The USFM document structure is validated by the grammar. These are the basic document level criteria we check for * The document starts with an id marker * The id and usfm marker which follows it, if present, constitutes the *identification* section @@ -51,3 +55,22 @@ The USFM document structure is validated by the grammar. These are the basic doc > * character markers: add, bk, dc, k, nd, ord, pn, png, addpn, qt, sig, sls, tl, wj, em, bd, it, bdit, no sc, sup, ndx, wg, wh, wa, qs, qac, litl, lik, rq, ior, cat, rb, w, jmp, liv > * namespaces: z* > * milestones: qt-s, qt-e, ts-s, ts-e + +## Some Design Limitations + +* We have not considered USFM files with peripherals +* We are not validating/parsing the internal contents of footnotes, crossreferences, milestones. But the markers are being identified and contents extracted, without checking for their correctness +* The markers are treated as either mandatory or optional. The valid number of occurances is not considered + eg: _\\usfm_ should ideally occur only once, if present, and similarly _\\sts_ can come multtple times. As per the current implemetation, the optional markers can occur any number of times. +* We have assumed certain structural constraints in USFM, which were not explicitly mentioned in the USFM spec. For example, the markers _\\ca_, _\\cl_, _\\cp_ and _\\cd_ occurs immediately below the _\\c_ marker, before the verse blocks start. +* Documentation says, _\\imt1, \\imt2, \\imt3_(similarly _imte, ili, ie, iq, mt_) are all parts of a major title. So we are combining them ignoring the numerical weightage factor/difference. +* As per USFM spec, there is no limit for possible numbers(not limited to 1,2,and 3) in numbered markers...though the USX _valid style types_ lists them as specifically numbered(1 & 2 or 1,2 & 3). We are following _no limit_ rules.(except for _\\toc & \\toca_) +* The valid attribute names for word-level markers are not checked. Any attribute name with valid syntax would be accepted +* The paragraph markers(showing indentation) that appear within verses, should ideally be attached to the text that follows it. But we are attaching it to the verse marker immediatedly above it. + +## To Do + +* As a pre-processing +> * Whitespace and line normalization +> * Captitalize book codes, if they are in small-case + and display a warning message From a2b6dcefd0929dfde3ae743ac696f241c26b120a Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Thu, 13 Dec 2018 12:25:13 +0530 Subject: [PATCH 23/37] refine the questions, keeping only most relevant --- Questions.md | 43 +++++++++++-------------------------------- 1 file changed, 11 insertions(+), 32 deletions(-) diff --git a/Questions.md b/Questions.md index 912bbe84..7d728e03 100644 --- a/Questions.md +++ b/Questions.md @@ -1,37 +1,16 @@ -## Questions/ Dev Notes -## -------------------- - -* Is an empty line wihin a USFM file valid? Ideally not an error, but worth raising a warning(possibility of warnings, to be checked). The doc says, _"All paragraph markers should be preceded by *a single* newline."_ That makes an empty line an error, though we are not treating it so. -* Inline markers like _\\x_ , _\\f_ etc can start without a space seperating it from text content -* Inline markers(character markers) may also occur on new lines -* A problem not handled: The markers are treated as either mandatory or optional. The valid number of occurances is not considered - eg: _\\usfm_ should ideally occur only once, if present, and similarly _\\sts_ can come multtple times. Now the optional markers can occur any number of times -* why are there common markers(that can occur in any three) for these sections and why are they divided as 3 in USX, as _bookTitles_, _bookIntroductionTiles_, and _bookIntroductionEndTitles_ ? -* For the same marker(eg: _\\imt_) being in _bookTitles_, _bookIntroductionTiles_, and _bookIntroductionEndTitles_ requires 3 different rules as allowed child elements(in-line markers) for these sections are different. We have only one rule defining it with the larger child elements set(_bookIntroductionTilesTextContent_). -* added _\\mt_ along with bookHeaders. (it actually includes all markers under the identification section in USFM doc, except _\\id_ and _\\usfm_) -* added _toca#_ elements also to _book headers_, though they were not listed in the USX document structure's valid style types for the section -* The peripheral in USX seems separate from the scripture part. Hence avoiding it in Grammar, for now. -* There are two overlapping structures for bible content, in USFM.1) the paragraph structures used to express the discourse / narrative of the text and 2) the division of the text into books, chapters and verses. We are following only the following structure in the parsed JSON output: Chapter as parent, and verses as children. Hence ingoring the paragraph wise structuring and treating para markers as only meant for indentation change. -* In chapter element it lists _\\imt1_ as a valid style type as the first element. All other _imt_ markers(_\\imt, \\imt2, \\imt3_) are missing. The list of vaild style type says its alphabetical list explicitly... So assuming that _\\imt1_ got there by mistake and hence avoiding that from Grammar -* Assuming that the markers _\\ca_, _\\cl_, _\\cp_ and _\\cd_ occurs immediately below the _\\c_ marker, before the verse blocks start -* Documentation says, _\\imt1, \\imt2, \\imt3_(similarly _imte, ili, ie, iq, mt_) are all parts of a major title. So we are combining them ignoring the numerical weightage factor/difference. _\\ms#, \\is#_ have not been combined so. -* Do not understand the doc explaning change for _\\h_ as USFM3.0 comes -* As per doc, there is no limit for possible numbers(not limited to 1,2,and 3) in numbered markers...though the USX _valid style types_ lists them as specifically numbered(1 & 2 or 1,2 & 3). We are following _no limit_ rules.(except for _\\toc & \\toca_) +## Questions +## --------- + +* In USX we see 3 sections in the introduction part, _bookTitles_, _bookIntroductionTiles_, and _bookIntroductionEndTitles_. What are their relevance when USFM is considered? + + * As per USFM doc examples, _\\iex_ and _\\imte_ occurs within/at the end of chapter content...But included in _bookIntroductionTitles_ in the Grammar(as per the list of valid style types in USX doc). -* The _\\iot, \\io# & \\ior_ elements could be clubbed into an outline division and their relative ordering ensured...But not done(now all those can come anywhere in the _bookIntroductionTitles_) -* _\\ms#_ defines a major section outside of section(_\\s#_) division. But we have not captured it structural relevance. Instead, treating it as an independant element, and attaching it to section header of the section immeditately following it + * Use of the markers _\\wg, \\wh, \\wa_ is not clear from documentation. Assuming that it encloses verse's content words and not add additional contents to the verse text. -* Removed _verseElement_ from chapterContentTextContent(though the USX doc defines it so), inorder to avoid un-necessary nesting of verse elements -* Internal structure of crossref markers and footnote markers are not validated/parsed, as of now. Considers everything from open marker to close marker as a single unit and verifies that whatever marker occured in there is a permitted one there( its content or syntax not checked/parsed) -* The valid attribute names for word-level markers are not checked. Any attribute name with valid syntax would be accepted -* The _optbreak_ break in USX doc seems to have not been implemented as such in USFM. So not including that in Grammar(do _\\pb, \\b, ~ and \\\\ etc_ serves its purpose in USFM?) -* There seems to be not marker in place of USX __ element in USFM. So removing that also from the Grammar. The reference text would be treated as normal text content itself.(looks like the character marker, _\\rq..\\rq*_, is a substitute) -* ms mentioned as valid child elements in USX spec, refers to milestones(_\\qt and \\ts_) in USFM rather than the _\\ms#_ element +* In USX we see _opt_break_. How are they/Are they implemented in USFM? (do _\\pb, \\b, ~ and \\\\ etc_ serves its purpose in USFM?) + -* where does _\\mte_ occur in USFM files? Doc has a mention of _at the end of the introduction_ ... USX doc indicates its within chapter content... so going with that. +* Where does _\\mte_ occur in USFM files? The USFM spec has a mention of _at the end of the introduction_ ... USX doc indicates its within chapter content... (we have assumed its valid within the chapter content). -* the USX doc says within a chapter we can have _\\ip_ element. Hence added that to _metaScripture_ -* took away the rule that says, there should be a paraElement at the start of chapter -* took away sections headings from the main JSON structure. Including them only as a metaScripture content. JSON follows Book-Chapter-verse structure now. -* the paragraph markers(showing indentation) that appear within verses should ideally be attached to the text that follows it. But we are attaching it to the verse marker immediatedly above it. \ No newline at end of file +* Is there a rule, that there should be a _\\p_, or similar marker that shows indentation, at the start of the chapter(or the start of the the first chapter)? From 125d11a674a40de01b2dde592431fccc8aec6583 Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Thu, 13 Dec 2018 12:26:00 +0530 Subject: [PATCH 24/37] update README with both usage options, as npm module and node server --- README.md | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/README.md b/README.md index 3875ec60..bddceb5e 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ # USFM Parser +<<<<<<< Updated upstream A USFM parser/validator that uses [Parsing expression grammar](https://en.wikipedia.org/wiki/Parsing_expression_grammar) to model the USFM structure. It is implemented using [ohm.js](https://ohmlang.github.io/) and supports the [USFM 3.0](https://github.com/ubsicap/usfm/releases/tag/v3.0.0) specification. This library parses the USFM content into a JSON amenable to hassle-free extraction of scripture text. ## API Documentation @@ -9,8 +10,31 @@ A USFM parser/validator that uses [Parsing expression grammar](https://en.wikipe ## Dependancies [Node](https://nodejs.org/en/download/) +======= +This is a simple usfm parser/validator that uses a grammar to model the usfm syntax. The grammar is written in ohm-js(https://ohmlang.github.io/). The USFM3.0 syntax is supported. The parser outputs the USFM content in a json structure which gives importance to the easy extraction of scripture content from the mark-ups and additional usfm contents. +Implemented in Node.js +## Setup: To use as NPM library +npm install usfm-grammar + +## Usage: As NPM library + +``` +var grammar = require('usfm-grammar) +var jsonOutput = garmmar.parse(/**The USFM Text to be converted to JSON**/) +var usfmValidity = grammar.validate(/**USFM Text to be checked**/) +``` + +## To use as a local Node server + +### Dependancies + +Node server +>>>>>>> Stashed changes + + +<<<<<<< Updated upstream ## Setup Standalone Server 1. Clone this repository 2. From within the main directory @@ -18,3 +42,16 @@ A USFM parser/validator that uses [Parsing expression grammar](https://en.wikipe 3. On the browser go to http://localhost:8080/index.html +======= +### Install and Run + +Clone the git repo + +`git clone https://github.com/Bridgeconn/usfm-grammar.git` + +From the project directory, start the server, as +`node server.js` or `npm start` + +and from browser, access +http://localhost:8080/index.html +>>>>>>> Stashed changes From 41cbd430da1e8a6b7b4e47b03475a1fa9cefcc65 Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Thu, 13 Dec 2018 12:26:49 +0530 Subject: [PATCH 25/37] Remove the first grammar we had --- grammar.md | 77 ------------------------------------------------------ 1 file changed, 77 deletions(-) delete mode 100644 grammar.md diff --git a/grammar.md b/grammar.md deleted file mode 100644 index 27db9b28..00000000 --- a/grammar.md +++ /dev/null @@ -1,77 +0,0 @@ -## To Test -Copy the [grammar](https://github.com/Bridgeconn/usfm-spew/new/master#grammar) and paste in [PEG.js online editor](https://pegjs.org/online). - -Here is a sample input to play around - -``` -\ddf* sdlkfna sdfa lskdfj -\c 1 sldk -``` - - -## Grammar -``` -content = line+ -line = t:(marker) c:(text)* [\n]* { return { - marker: t, - text: c.join(" ") - } - } -text = t:(word) space? { return t; } -marker = backslash t:(text_marker) space? { return t; } / - backslash t:(word) space? { return t; } -word = $ letter+ -letter = [a-zA-Z0-9*#!.-] -number = [0-9]+ -space = ' '+ -backslash = [\\] - -// 1 Markers with no text -simple_marker = 'ie' / 'b' / 'ib' - -// 2 Markers with text -text_marker = 'ide' / 'sts' / 'rem' - / 'h' / 'toc1' / 'toc2' - / 'toc3' / 'imt' number / 'is' number - / 'ip' / 'ipi' / 'im' - / 'imi' / 'ipq' / 'imq' - / 'ipr' / 'iq' number / 'ib' - / 'ili' / 'iot' / 'io' number - / 'mt' number / 'mte' number / 'ms' number - / 'mr' / 's' number / 'sr' - / 'r' / 'c' / 'cl' - / 'cp' / 'li' number / 'pc' - / 'pr' / 'tr' / 'th' number - / 'thr' number / 'tc' number / 'tcr' number - -// 3 Markers with optional text -opt_text_marker = 'p' / 'm' / 'q' number - / 'qr' / 'qc' - -// 4 Markers with a value and text -double_arg_marker = 'id' / 'v' - -// 5 Character style markers (e.g. \ior...\ior*) -char_marker = 'ior' / 'iqt' / 'rq' - / 'va' / 'vp' / 'qs' - / 'add' / 'bk' / 'dc' - / 'qac' / 'k' / 'nd' - / 'ord' / 'pn' / 'qt' - / 'sig' / 'sls' / 'tl' - / 'wj' / 'em' / 'bd' - / 'it' / 'bdit' / 'no' - / 'sc' / 'ndx' - -// 7 Notes makers -notes_marker = 'f...f*' / 'fe...fe*' - / 'fr' / 'fk' / 'fq' - / 'fqa' / 'fl' / 'fp' - / 'fv' / 'ft' / 'fdc...fdc*' - / 'fm...fm* ' / 'x...x*' / 'xo' - / 'xk' / 'xq' / 'xt' - / 'xot...xot*' / 'xnt...xnt*' / 'xdc...xdc*' - / 'fl' - -// 9 Markers with attributes -attrib_marker = 'fig...fig*' -``` From 73ddfdb718443a1c40286d3a85243d40fe1fd69c Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Sat, 15 Dec 2018 12:17:54 +0530 Subject: [PATCH 26/37] fix issues in README, that came upon, conflit resolution and merging --- README.md | 50 ++++++++++++++++---------------------------------- 1 file changed, 16 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index bddceb5e..10a949d9 100644 --- a/README.md +++ b/README.md @@ -1,24 +1,17 @@ -# USFM Parser +# USFM Grammar -<<<<<<< Updated upstream -A USFM parser/validator that uses [Parsing expression grammar](https://en.wikipedia.org/wiki/Parsing_expression_grammar) to model the USFM structure. It is implemented using [ohm.js](https://ohmlang.github.io/) and supports the [USFM 3.0](https://github.com/ubsicap/usfm/releases/tag/v3.0.0) specification. This library parses the USFM content into a JSON amenable to hassle-free extraction of scripture text. +This is a simple usfm parser/validator that uses a grammar to model the usfm syntax. The grammar is written in ohm-js( The USFM3.0 syntax is supported. The parser outputs the USFM content in a json structure which gives importance to the easy extraction of scripture content from the mark-ups and additional usfm contents. +Implemented in Node.js -## API Documentation -1. Parse -2. Validate -(Only validates the overall document structure and the internal structure of, a set of main markers relevant for scripture content.) +## To Setup -## Dependancies -[Node](https://nodejs.org/en/download/) -======= -This is a simple usfm parser/validator that uses a grammar to model the usfm syntax. The grammar is written in ohm-js(https://ohmlang.github.io/). The USFM3.0 syntax is supported. The parser outputs the USFM content in a json structure which gives importance to the easy extraction of scripture content from the mark-ups and additional usfm contents. -Implemented in Node.js +The project is available as an npm library, which can be installed with the following command. -## Setup: To use as NPM library +`npm install usfm-grammar` -npm install usfm-grammar +## Usage -## Usage: As NPM library +To use it from your node application: ``` var grammar = require('usfm-grammar) @@ -26,32 +19,21 @@ var jsonOutput = garmmar.parse(/**The USFM Text to be converted to JSON**/) var usfmValidity = grammar.validate(/**USFM Text to be checked**/) ``` -## To use as a local Node server - -### Dependancies +The `grammar.parse()` method returns a json structure for the USFM text contents, if it is a valid usfm file. +The `grammar.validate()` method returns a true/false, depending on whether the input usfm text's syntax is valid or not. -Node server ->>>>>>> Stashed changes +## To Use as a Local Node server +The project could also be installed locally for testing. For that there is a server setup provided. -<<<<<<< Updated upstream -## Setup Standalone Server -1. Clone this repository -2. From within the main directory -`npm start` - -3. On the browser go to http://localhost:8080/index.html - -======= -### Install and Run +### Install and Run: Local Node Server Clone the git repo - `git clone https://github.com/Bridgeconn/usfm-grammar.git` -From the project directory, start the server, as +From the project directory, start the server, as `node server.js` or `npm start` and from browser, access -http://localhost:8080/index.html ->>>>>>> Stashed changes + + From e8f580d8daaaf35d885a7d85fd5279d540671644 Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Sat, 15 Dec 2018 12:25:29 +0530 Subject: [PATCH 27/37] add link to peripherals and correct a typo --- Disclaimer.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Disclaimer.md b/Disclaimer.md index eed1c6fa..db42d927 100644 --- a/Disclaimer.md +++ b/Disclaimer.md @@ -58,8 +58,8 @@ The USFM document structure is validated by the grammar. These are the basic doc ## Some Design Limitations -* We have not considered USFM files with peripherals -* We are not validating/parsing the internal contents of footnotes, crossreferences, milestones. But the markers are being identified and contents extracted, without checking for their correctness +* We have not considered USFM files with peripherals () +* We are not validating/parsing the internal contents of footnotes, cross-references and milestones. But the markers are being identified and contents extracted, without checking for their correctness * The markers are treated as either mandatory or optional. The valid number of occurances is not considered eg: _\\usfm_ should ideally occur only once, if present, and similarly _\\sts_ can come multtple times. As per the current implemetation, the optional markers can occur any number of times. * We have assumed certain structural constraints in USFM, which were not explicitly mentioned in the USFM spec. For example, the markers _\\ca_, _\\cl_, _\\cp_ and _\\cd_ occurs immediately below the _\\c_ marker, before the verse blocks start. From ade57032c07499fb822c159eb0e1a59c3c533b8c Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Sat, 15 Dec 2018 13:23:23 +0530 Subject: [PATCH 28/37] remove unused declarations and unnecessary console messages --- grammarOperations.js | 2 -- parser.js | 7 +------ server.js | 3 +-- 3 files changed, 2 insertions(+), 10 deletions(-) diff --git a/grammarOperations.js b/grammarOperations.js index 634ae87c..995bdfeb 100644 --- a/grammarOperations.js +++ b/grammarOperations.js @@ -493,7 +493,6 @@ sem.addOperation('composeJson', { thElement: function(_, _, num, _, text) { - console.log(num) return {'th': text.sourceString, 'column':num.sourceString} }, @@ -538,7 +537,6 @@ sem.addOperation('composeJson', { chapterContentTextContent: function(_,element) { let text = element.composeJson() - console.log(text) return text }, diff --git a/parser.js b/parser.js index 8dfe2685..4ee11248 100644 --- a/parser.js +++ b/parser.js @@ -1,5 +1,4 @@ const match = require('./grammarOperations.js').match -const stringifyObject = require('stringify-object') exports.parse = function (str) { let matchObj = match(str) @@ -14,10 +13,7 @@ exports.parse = function (str) { var prevLineStart = 0 var nextLineStart = 0 var lineCount = 0 - console.log('Entering loop') while (nextLineStart < pos) { - console.log('nextLineStart:' + nextLineStart) - console.log('pos:' + pos) lineCount += 1 prevLineStart = nextLineStart nextLineStart = matchObj['input'].indexOf('\n', nextLineStart + 1) @@ -26,7 +22,6 @@ exports.parse = function (str) { } } } - console.log('out of toop') let inputSnippet = matchObj['input'].substring(prevLineStart, nextLineStart) @@ -36,7 +31,7 @@ exports.parse = function (str) { let output = 'Error at character' + inLinePos + ', in line ' + lineCount + " '" + outputSnippet + "': " + message - return matchObj['input'] + '\n' + output + return matchObj['input'] + '

' + output } exports.validate = function (str) { diff --git a/server.js b/server.js index dc073ec3..8bceb729 100644 --- a/server.js +++ b/server.js @@ -3,7 +3,6 @@ const fs = require('fs') const formidable = require('formidable') const stringifyObject = require('stringify-object') const parser = require('./parser.js') -const form = null console.log('server up...listening to 8080 at http://localhost') http.createServer(function (req, res) { @@ -39,7 +38,7 @@ http.createServer(function (req, res) { res.writeHead(200, { 'Content-Type': 'text/html; charset=utf-8' }) res.write('Back Home


') let data = fields.inputText - console.log(data) + // console.log(data) if (data.substr(-1) === '\'') { data = data.substr(0, data.length - 1) } From aff63f82181a95138c2a8217a50f48d3c7b4fcc7 Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Sun, 16 Dec 2018 13:01:21 +0530 Subject: [PATCH 29/37] add normalization for multi- spaces, multi-lines and lowercase bookcode --- Disclaimer.md | 9 +-------- parser.js | 21 +++++++++++++++++++-- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/Disclaimer.md b/Disclaimer.md index db42d927..5c6f22a8 100644 --- a/Disclaimer.md +++ b/Disclaimer.md @@ -66,11 +66,4 @@ The USFM document structure is validated by the grammar. These are the basic doc * Documentation says, _\\imt1, \\imt2, \\imt3_(similarly _imte, ili, ie, iq, mt_) are all parts of a major title. So we are combining them ignoring the numerical weightage factor/difference. * As per USFM spec, there is no limit for possible numbers(not limited to 1,2,and 3) in numbered markers...though the USX _valid style types_ lists them as specifically numbered(1 & 2 or 1,2 & 3). We are following _no limit_ rules.(except for _\\toc & \\toca_) * The valid attribute names for word-level markers are not checked. Any attribute name with valid syntax would be accepted -* The paragraph markers(showing indentation) that appear within verses, should ideally be attached to the text that follows it. But we are attaching it to the verse marker immediatedly above it. - -## To Do - -* As a pre-processing -> * Whitespace and line normalization -> * Captitalize book codes, if they are in small-case - and display a warning message +* The paragraph markers(showing indentation) that appear within verses, should ideally be attached to the text that follows it. But we are attaching it to the verse marker immediatedly above it. \ No newline at end of file diff --git a/parser.js b/parser.js index 4ee11248..a753c8a5 100644 --- a/parser.js +++ b/parser.js @@ -1,7 +1,23 @@ const match = require('./grammarOperations.js').match +const multiLinePattern = new RegExp('[\\n\\r]+', 'g') +const multiSpacePattern = new RegExp(' +', 'g') +const bookCodePattern = new RegExp('\\id ([a-z][a-z][a-z]) ', 'g') +function normalize (str) { + let newStr = '' + newStr = str.replace(multiLinePattern, '\n') + newStr = newStr.replace(multiSpacePattern, ' ') + let match = bookCodePattern.exec(newStr) + if (match) { + let bookCode = match[1] + newStr = newStr.replace(bookCode, bookCode.toUpperCase()) + } + return newStr +} + exports.parse = function (str) { - let matchObj = match(str) + let inStr = normalize(str) + let matchObj = match(inStr) if (!matchObj.hasOwnProperty('_rightmostFailures')) { return matchObj @@ -35,8 +51,9 @@ exports.parse = function (str) { } exports.validate = function (str) { + let inStr = normalize(str) // Matching the input with grammar and obtaining the JSON output string - let matchObj = match(str) + let matchObj = match(inStr) if (matchObj.hasOwnProperty('_rightmostFailures')) { return false } else { From ba952afbfea69d3b795653ab3eedfa2e543e9b79 Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Sun, 16 Dec 2018 15:15:19 +0530 Subject: [PATCH 30/37] add an option to get a clean parsed output, with only scripture content --- grammarOperations.js | 39 +----------------------------- parser.js | 21 +++++++++++++++-- server.js | 56 ++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 71 insertions(+), 45 deletions(-) diff --git a/grammarOperations.js b/grammarOperations.js index 995bdfeb..394fee49 100644 --- a/grammarOperations.js +++ b/grammarOperations.js @@ -13,44 +13,7 @@ console.log('Initializing grammar') sem.addOperation('composeJson', { File: function (e) { let res = e.composeJson() - let resString = JSON.stringify(res) - let indent_count = 0 - let i=0 - let beautifiedResString = '' - for( i=0; iBack Home


') - if (data === '') { data = '

File Empty!!!

' } else { data = parser.parse(data) } - res.write(stringifyObject(data)) + if (data === '') { + data = '

File Empty!!!

' + } else { + data = beautifyResultForHtml(parser.parse(data)) + } + res.write(data) res.end() }) }) @@ -42,8 +84,12 @@ http.createServer(function (req, res) { if (data.substr(-1) === '\'') { data = data.substr(0, data.length - 1) } - if (data === '') { data = '

Text Empty!!!

' } else { data = parser.parse(data) } - res.write(stringifyObject(data)) + if (data === '') { + data = '

Text Empty!!!

' + } else { + data = beautifyResultForHtml(parser.parse(data, 'clean')) + } + res.write(data) res.end() }) break From 4a2f96e653c256d349e1e8feb60775eb4ee36833 Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Tue, 18 Dec 2018 16:28:53 +0530 Subject: [PATCH 31/37] seperate verse text and verse metadata properly --- grammarOperations.js | 60 +++++++++++++++++++++++++++++++++++--------- server.js | 10 ++++---- usfm.ohm | 8 +++--- 3 files changed, 57 insertions(+), 21 deletions(-) diff --git a/grammarOperations.js b/grammarOperations.js index 394fee49..4133319b 100644 --- a/grammarOperations.js +++ b/grammarOperations.js @@ -95,15 +95,31 @@ sem.addOperation('composeJson', { return obj }, - verseElement: function (_, _, _, _, verseNumber, verseMeta, metaScripture, verseText) { + verseElement: function (_, _, _, _, verseNumber, verseMeta, verseContent) { let verse ={} verse['number'] = verseNumber.composeJson() - if ( verseMeta.sourceString!='' ) { verse['metadata'] = verseMeta.composeJson()} - - if ( metaScripture.sourceString!='' ) { verse['metadata_inline'] = metaScripture.composeJson()} - - verse['text'] = verseText.composeJson() - // if (verseTextMore.sourceString!='') { verse['text'].concat(verseTextMore.composeJson())} + verse['metadata'] = [] + if ( verseMeta.sourceString!='' ) { verse['metadata'].push(verseMeta.composeJson()) } + contents = verseContent.composeJson() + verse['text'] = '' + for (let i=0; iBack Home


') let data = fields.inputText - // console.log(data) - if (data.substr(-1) === '\'') { - data = data.substr(0, data.length - 1) - } + console.log(data) + // if (data.substr(-1) === '\'') { + // data = data.substr(0, data.length - 1) + // } if (data === '') { data = '

Text Empty!!!

' } else { - data = beautifyResultForHtml(parser.parse(data, 'clean')) + data = beautifyResultForHtml(parser.parse(data)) } res.write(data) res.end() diff --git a/usfm.ohm b/usfm.ohm index 66a89ca4..c53d1132 100644 --- a/usfm.ohm +++ b/usfm.ohm @@ -8,7 +8,7 @@ usfmBible{ char = ~(backSlash) ~(newLine) ~spaceChar any //char = letter | digit | punctuations //punctuations = "!" | "\"" | "#" | "$" | "%" | "&" | "(" | ")" | "+" | "-" | "." | "/" | ":" | ";" | "<" | "=" | ">" | "?" | "@" | "[" | "]" | "^" | "_" | "`" | "{" | "}" | "’" | "‘" | "“" | "”" | "।" | "]" | "~" - text = word+ + text = newLine? word+ number = digit+ word = char+ spaceChar? @@ -50,12 +50,12 @@ usfmBible{ chapterHeader = cElement (caElement | cpElement | clElement | cdElement)* - metaScripture = sectionHeader | mte | remElement | iexElement | ipElement | spElement | litElement | qaElement | paraElement | notesElement | figureElement | zNameSpace + metaScripture = sectionHeader | mte | remElement | iexElement | ipElement | spElement | litElement | qaElement | paraElement | notesElement | figureElement | zNameSpace | milestoneElement sectionHeader = sectionPreHeader? sectionElement sectionPostHeader* sectionPreHeader = msElement mrElement? sectionPostHeader = srElement | rElement | dElement - verseElement = newLine backSlash "v" spaceChar verseNumber (vaElement | vpElement)* (metaScripture? verseText)+ + verseElement = newLine backSlash "v" spaceChar verseNumber (vaElement | vpElement)* (verseText | metaScripture)+ verseNumber = number ("-" number)? spaceChar @@ -285,7 +285,7 @@ usfmBible{ bookIntroductionTitlesTextContent = (text | notesElement | charElement | milestoneElement | figureElement | zNameSpace)+ bookTitlesTextContent = (text | notesElement | charElement | zNameSpace)+ - chapterContentTextContent = newLine? (text | charElement | milestoneElement| table | li | metaScripture | esbElement)+ + chapterContentTextContent = text | charElement | table | li | esbElement bookIntroductionEndTitlesTextContent = (text | notesElement | charElement | milestoneElement | zNameSpace | esbElement)+ milestoneElement = (newLine? backSlash "qt" number? "-s"? spaceChar* attributes? backSlash "*" spaceChar?) From ade6ae04b88f76b568e237d39500d6f476fa3125 Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Wed, 19 Dec 2018 12:39:31 +0530 Subject: [PATCH 32/37] fix some issues introduced by separating versetext and metadata --- grammarOperations.js | 15 +--- usfm.ohm | 163 +++++++++++++++++++++---------------------- 2 files changed, 84 insertions(+), 94 deletions(-) diff --git a/grammarOperations.js b/grammarOperations.js index 4133319b..6f7f78d7 100644 --- a/grammarOperations.js +++ b/grammarOperations.js @@ -76,20 +76,14 @@ sem.addOperation('composeJson', { return elmt.composeJson() }, - sectionHeader: function (preHead, s, postHead) { + sectionHeader: function (s, postHead, ipElement) { let sectionHeaderVar = {} - if (preHead.sourceString!='') { sectionHeaderVar['section preheader'] = preHead.composeJson() } sectionHeaderVar['section'] = s.composeJson() - if (postHead.sourceString!='') { sectionHeaderVar['section postheader'] = postHead.composeJson() } + if (postHead.sourceString!='') { sectionHeaderVar['sectionPostheader'] = postHead.composeJson() } + if (ipElement.sourceString!='') { sectionHeaderVar['introductionParagraph'] = ipElement.composeJson() } return sectionHeaderVar }, - sectionPreHeader: function (ms, mr) { - let obj = ms.composeJson() - if ( mr.sourceString!='' ) { obj['mr'] = mr.composeJson()} - return obj - }, - sectionPostHeader: function (meta) { let obj = meta.composeJson() return obj @@ -118,8 +112,6 @@ sem.addOperation('composeJson', { } } if (verse['metadata'].length == 0) { delete verse.metadata} - console.log('verse:'+JSON.stringify(verse)) - console.log(verse['metadata']) return verse }, @@ -433,7 +425,6 @@ sem.addOperation('composeJson', { inLineCharAttributeElement: function(_, _, tag, _, text, attribs, _, _, _, _) { let obj = {} obj[tag.sourceString]= {'content': text.composeJson(), 'Attributes':attribs.sourceString} - // console.log(text.composeJson()) obj['text'] = obj[tag.sourceString]['content'] return obj }, diff --git a/usfm.ohm b/usfm.ohm index c53d1132..05c57622 100644 --- a/usfm.ohm +++ b/usfm.ohm @@ -50,18 +50,17 @@ usfmBible{ chapterHeader = cElement (caElement | cpElement | clElement | cdElement)* - metaScripture = sectionHeader | mte | remElement | iexElement | ipElement | spElement | litElement | qaElement | paraElement | notesElement | figureElement | zNameSpace | milestoneElement + metaScripture = sectionHeader | mte | remElement | iexElement | ipElement | spElement | litElement | qaElement | paraElement | notesElement | figureElement | zNameSpace | milestoneElement | esbElement - sectionHeader = sectionPreHeader? sectionElement sectionPostHeader* - sectionPreHeader = msElement mrElement? - sectionPostHeader = srElement | rElement | dElement + sectionHeader = sectionElement sectionPostHeader* ipElement* + sectionPostHeader = srElement | rElement | dElement | mrElement verseElement = newLine backSlash "v" spaceChar verseNumber (vaElement | vpElement)* (verseText | metaScripture)+ verseNumber = number ("-" number)? spaceChar - sectionElement = sectionElementWithTitle | sectionElementWithoutTitle + sectionElement = sectionElementWithTitle | sectionElementWithoutTitle | msElement - sectionElementWithTitle = sectionMarker chapterContentTextContent + sectionElementWithTitle = sectionMarker (chapterContentTextContent | notesElement | milestoneElement)+ sectionElementWithoutTitle = sdMarker sectionMarker = newLine backSlash ( "s1" | "s2" | "s3" | "s4" | "s" ) spaceChar sdMarker = newLine backSlash "sd" number spaceChar* @@ -74,7 +73,7 @@ usfmBible{ publishedCharElement = publishedCharMarker text publishedCharMarker "*" publishedCharMarker = backSlash "vp" - verseText = chapterContentTextContent + verseText = chapterContentTextContent+ paraMarker = paraUnNumberedMarker | paraNumberedMarker @@ -86,16 +85,16 @@ usfmBible{ cElement = newLine backSlash "c" spaceChar number caElement = newLine backSlash "ca" spaceChar number backSlash "ca*" - cdElement = newLine backSlash "cd" spaceChar chapterContentTextContent + cdElement = newLine backSlash "cd" spaceChar ((chapterContentTextContent | notesElement | milestoneElement)+ | notesElement)+ clElement = newLine backSlash "cl" spaceChar text cpElement = newLine backSlash "cp" spaceChar text - dElement = newLine backSlash "d" spaceChar chapterContentTextContent + dElement = newLine backSlash "d" spaceChar (chapterContentTextContent | notesElement | milestoneElement)+ hElement = newLine backSlash "h" number? spaceChar text - remElement = newLine backSlash "rem" spaceChar chapterContentTextContent + remElement = newLine backSlash "rem" spaceChar (chapterContentTextContent | notesElement | milestoneElement)+ stsElement = newLine backSlash "sts" spaceChar text - spElement = newLine backSlash "sp" spaceChar chapterContentTextContent+ + spElement = newLine backSlash "sp" spaceChar (chapterContentTextContent | notesElement | milestoneElement)+ ibElement = newLine backSlash "ib" spaceChar? idElement = backSlash "id" spaceChar bookCode spaceChar text @@ -126,12 +125,12 @@ usfmBible{ mrElement = newLine backSlash "mr" spaceChar text msElement = newLine backSlash "ms" number? spaceChar text mt = mtElement+ - mtElement = newLine backSlash "mt" number? spaceChar chapterContentTextContent + mtElement = newLine backSlash "mt" number? spaceChar (chapterContentTextContent | notesElement | milestoneElement)+ mte = mteElement+ - mteElement = newLine backSlash "mte" number? spaceChar chapterContentTextContent - rElement = newLine backSlash "r" spaceChar chapterContentTextContent + mteElement = newLine backSlash "mte" number? spaceChar (chapterContentTextContent | notesElement | milestoneElement)+ + rElement = newLine backSlash "r" spaceChar (chapterContentTextContent | notesElement | milestoneElement)+ - srElement = newLine backSlash "sr" spaceChar chapterContentTextContent + srElement = newLine backSlash "sr" spaceChar (chapterContentTextContent | notesElement | milestoneElement)+ tocElement = newLine backSlash ("toc1" | "toc2" | "toc3") spaceChar text tocaElement = newLine backSlash ("toca1" | "toca2" | "toca3") spaceChar text usfmElement = newLine backSlash "usfm" spaceChar version @@ -182,66 +181,66 @@ usfmBible{ charElement = inLineCharElement | inLineCharAttributeElement | inLineCharNumberedElement - inLineCharElement = newLine? backSlash "add" spaceChar chapterContentTextContent backSlash "add" "*" spaceChar? - | newLine? backSlash "+add" spaceChar chapterContentTextContent backSlash "+add" "*" spaceChar? - | newLine? backSlash "bk" spaceChar chapterContentTextContent backSlash "bk" "*" spaceChar? - | newLine? backSlash "+bk" spaceChar chapterContentTextContent backSlash "+bk" "*" spaceChar? - | newLine? backSlash "dc" spaceChar chapterContentTextContent backSlash "dc" "*" spaceChar? - | newLine? backSlash "+dc" spaceChar chapterContentTextContent backSlash "+dc" "*" spaceChar? - | newLine? backSlash "k" spaceChar chapterContentTextContent backSlash "k" "*" spaceChar? - | newLine? backSlash "+k" spaceChar chapterContentTextContent backSlash "+k" "*" spaceChar? - | newLine? backSlash "nd" spaceChar chapterContentTextContent backSlash "nd" "*" spaceChar? - | newLine? backSlash "+nd" spaceChar chapterContentTextContent backSlash "+nd" "*" spaceChar? - | newLine? backSlash "ord" spaceChar chapterContentTextContent backSlash "ord" "*" spaceChar? - | newLine? backSlash "+ord" spaceChar chapterContentTextContent backSlash "+ord" "*" spaceChar? - | newLine? backSlash "pn" spaceChar chapterContentTextContent backSlash "pn" "*" spaceChar? - | newLine? backSlash "+pn" spaceChar chapterContentTextContent backSlash "+pn" "*" spaceChar? - | newLine? backSlash "png" spaceChar chapterContentTextContent backSlash "png" "*" spaceChar? - | newLine? backSlash "+png" spaceChar chapterContentTextContent backSlash "+png" "*" spaceChar? - | newLine? backSlash "addpn" spaceChar chapterContentTextContent backSlash "addpn" "*" spaceChar? - | newLine? backSlash "+addpn" spaceChar chapterContentTextContent backSlash "+addpn" "*" spaceChar? - | newLine? backSlash "qt" spaceChar chapterContentTextContent backSlash "qt" "*" spaceChar? - | newLine? backSlash "+qt" spaceChar chapterContentTextContent backSlash "+qt" "*" spaceChar? - | newLine? backSlash "sig" spaceChar chapterContentTextContent backSlash "sig" "*" spaceChar? - | newLine? backSlash "+sig" spaceChar chapterContentTextContent backSlash "+sig" "*" spaceChar? - | newLine? backSlash "sls" spaceChar chapterContentTextContent backSlash "sls" "*" spaceChar? - | newLine? backSlash "+sls" spaceChar chapterContentTextContent backSlash "+sls" "*" spaceChar? - | newLine? backSlash "tl" spaceChar chapterContentTextContent backSlash "tl" "*" spaceChar? - | newLine? backSlash "+tl" spaceChar chapterContentTextContent backSlash "+tl" "*" spaceChar? - | newLine? backSlash "wj" spaceChar chapterContentTextContent backSlash "wj" "*" spaceChar? - | newLine? backSlash "+wj" spaceChar chapterContentTextContent backSlash "+wj" "*" spaceChar? - | newLine? backSlash "em" spaceChar chapterContentTextContent backSlash "em" "*" spaceChar? - | newLine? backSlash "+em" spaceChar chapterContentTextContent backSlash "+em" "*" spaceChar? - | newLine? backSlash "bd" spaceChar chapterContentTextContent backSlash "bd" "*" spaceChar? - | newLine? backSlash "+bd" spaceChar chapterContentTextContent backSlash "+bd" "*" spaceChar? - | newLine? backSlash "it" spaceChar chapterContentTextContent backSlash "it" "*" spaceChar? - | newLine? backSlash "+it" spaceChar chapterContentTextContent backSlash "+it" "*" spaceChar? - | newLine? backSlash "bdit" spaceChar chapterContentTextContent backSlash "bdit" "*" spaceChar? - | newLine? backSlash "+bdit" spaceChar chapterContentTextContent backSlash "+bdit" "*" spaceChar? - | newLine? backSlash "no" spaceChar chapterContentTextContent backSlash "no" "*" spaceChar? - | newLine? backSlash "+no" spaceChar chapterContentTextContent backSlash "+no" "*" spaceChar? - | newLine? backSlash "sc" spaceChar chapterContentTextContent backSlash "sc" "*" spaceChar? - | newLine? backSlash "+sc" spaceChar chapterContentTextContent backSlash "+sc" "*" spaceChar? - | newLine? backSlash "sup" spaceChar chapterContentTextContent backSlash "sup" "*" spaceChar? - | newLine? backSlash "+sup" spaceChar chapterContentTextContent backSlash "+sup" "*" spaceChar? - | newLine? backSlash "ndx" spaceChar chapterContentTextContent backSlash "ndx" "*" spaceChar? - | newLine? backSlash "+ndx" spaceChar chapterContentTextContent backSlash "+ndx" "*" spaceChar? - | newLine? backSlash "wg" spaceChar chapterContentTextContent backSlash "wg" "*" spaceChar? - | newLine? backSlash "+wg" spaceChar chapterContentTextContent backSlash "+wg" "*" spaceChar? - | newLine? backSlash "wh" spaceChar chapterContentTextContent backSlash "wh" "*" spaceChar? - | newLine? backSlash "+wh" spaceChar chapterContentTextContent backSlash "+wh" "*" spaceChar? - | newLine? backSlash "wa" spaceChar chapterContentTextContent backSlash "wa" "*" spaceChar? - | newLine? backSlash "+wa" spaceChar chapterContentTextContent backSlash "+wa" "*" spaceChar? - | newLine? backSlash "qs" spaceChar chapterContentTextContent backSlash "qs" "*" spaceChar? - | newLine? backSlash "+qs" spaceChar chapterContentTextContent backSlash "+qs" "*" spaceChar? - | newLine? backSlash "qac" spaceChar chapterContentTextContent backSlash "qac" "*" spaceChar? - | newLine? backSlash "+qac" spaceChar chapterContentTextContent backSlash "+qac" "*" spaceChar? - | newLine? backSlash "litl" spaceChar chapterContentTextContent backSlash "litl" "*" spaceChar? - | newLine? backSlash "+litl" spaceChar chapterContentTextContent backSlash "+litl" "*" spaceChar? - | newLine? backSlash "lik" spaceChar chapterContentTextContent backSlash "lik" "*" spaceChar? - | newLine? backSlash "+lik" spaceChar chapterContentTextContent backSlash "+lik" "*" spaceChar? - | newLine? backSlash "rq" spaceChar chapterContentTextContent backSlash "rq" "*" spaceChar? - | newLine? backSlash "+rq" spaceChar chapterContentTextContent backSlash "+rq" "*" spaceChar? + inLineCharElement = newLine? backSlash "add" spaceChar chapterContentTextContent+ backSlash "add" "*" spaceChar? + | newLine? backSlash "+add" spaceChar chapterContentTextContent+ backSlash "+add" "*" spaceChar? + | newLine? backSlash "bk" spaceChar chapterContentTextContent+ backSlash "bk" "*" spaceChar? + | newLine? backSlash "+bk" spaceChar chapterContentTextContent+ backSlash "+bk" "*" spaceChar? + | newLine? backSlash "dc" spaceChar chapterContentTextContent+ backSlash "dc" "*" spaceChar? + | newLine? backSlash "+dc" spaceChar chapterContentTextContent+ backSlash "+dc" "*" spaceChar? + | newLine? backSlash "k" spaceChar chapterContentTextContent+ backSlash "k" "*" spaceChar? + | newLine? backSlash "+k" spaceChar chapterContentTextContent+ backSlash "+k" "*" spaceChar? + | newLine? backSlash "nd" spaceChar chapterContentTextContent+ backSlash "nd" "*" spaceChar? + | newLine? backSlash "+nd" spaceChar chapterContentTextContent+ backSlash "+nd" "*" spaceChar? + | newLine? backSlash "ord" spaceChar chapterContentTextContent+ backSlash "ord" "*" spaceChar? + | newLine? backSlash "+ord" spaceChar chapterContentTextContent+ backSlash "+ord" "*" spaceChar? + | newLine? backSlash "pn" spaceChar chapterContentTextContent+ backSlash "pn" "*" spaceChar? + | newLine? backSlash "+pn" spaceChar chapterContentTextContent+ backSlash "+pn" "*" spaceChar? + | newLine? backSlash "png" spaceChar chapterContentTextContent+ backSlash "png" "*" spaceChar? + | newLine? backSlash "+png" spaceChar chapterContentTextContent+ backSlash "+png" "*" spaceChar? + | newLine? backSlash "addpn" spaceChar chapterContentTextContent+ backSlash "addpn" "*" spaceChar? + | newLine? backSlash "+addpn" spaceChar chapterContentTextContent+ backSlash "+addpn" "*" spaceChar? + | newLine? backSlash "qt" spaceChar chapterContentTextContent+ backSlash "qt" "*" spaceChar? + | newLine? backSlash "+qt" spaceChar chapterContentTextContent+ backSlash "+qt" "*" spaceChar? + | newLine? backSlash "sig" spaceChar chapterContentTextContent+ backSlash "sig" "*" spaceChar? + | newLine? backSlash "+sig" spaceChar chapterContentTextContent+ backSlash "+sig" "*" spaceChar? + | newLine? backSlash "sls" spaceChar chapterContentTextContent+ backSlash "sls" "*" spaceChar? + | newLine? backSlash "+sls" spaceChar chapterContentTextContent+ backSlash "+sls" "*" spaceChar? + | newLine? backSlash "tl" spaceChar chapterContentTextContent+ backSlash "tl" "*" spaceChar? + | newLine? backSlash "+tl" spaceChar chapterContentTextContent+ backSlash "+tl" "*" spaceChar? + | newLine? backSlash "wj" spaceChar chapterContentTextContent+ backSlash "wj" "*" spaceChar? + | newLine? backSlash "+wj" spaceChar chapterContentTextContent+ backSlash "+wj" "*" spaceChar? + | newLine? backSlash "em" spaceChar chapterContentTextContent+ backSlash "em" "*" spaceChar? + | newLine? backSlash "+em" spaceChar chapterContentTextContent+ backSlash "+em" "*" spaceChar? + | newLine? backSlash "bd" spaceChar chapterContentTextContent+ backSlash "bd" "*" spaceChar? + | newLine? backSlash "+bd" spaceChar chapterContentTextContent+ backSlash "+bd" "*" spaceChar? + | newLine? backSlash "it" spaceChar chapterContentTextContent+ backSlash "it" "*" spaceChar? + | newLine? backSlash "+it" spaceChar chapterContentTextContent+ backSlash "+it" "*" spaceChar? + | newLine? backSlash "bdit" spaceChar chapterContentTextContent+ backSlash "bdit" "*" spaceChar? + | newLine? backSlash "+bdit" spaceChar chapterContentTextContent+ backSlash "+bdit" "*" spaceChar? + | newLine? backSlash "no" spaceChar chapterContentTextContent+ backSlash "no" "*" spaceChar? + | newLine? backSlash "+no" spaceChar chapterContentTextContent+ backSlash "+no" "*" spaceChar? + | newLine? backSlash "sc" spaceChar chapterContentTextContent+ backSlash "sc" "*" spaceChar? + | newLine? backSlash "+sc" spaceChar chapterContentTextContent+ backSlash "+sc" "*" spaceChar? + | newLine? backSlash "sup" spaceChar chapterContentTextContent+ backSlash "sup" "*" spaceChar? + | newLine? backSlash "+sup" spaceChar chapterContentTextContent+ backSlash "+sup" "*" spaceChar? + | newLine? backSlash "ndx" spaceChar chapterContentTextContent+ backSlash "ndx" "*" spaceChar? + | newLine? backSlash "+ndx" spaceChar chapterContentTextContent+ backSlash "+ndx" "*" spaceChar? + | newLine? backSlash "wg" spaceChar chapterContentTextContent+ backSlash "wg" "*" spaceChar? + | newLine? backSlash "+wg" spaceChar chapterContentTextContent+ backSlash "+wg" "*" spaceChar? + | newLine? backSlash "wh" spaceChar chapterContentTextContent+ backSlash "wh" "*" spaceChar? + | newLine? backSlash "+wh" spaceChar chapterContentTextContent+ backSlash "+wh" "*" spaceChar? + | newLine? backSlash "wa" spaceChar chapterContentTextContent+ backSlash "wa" "*" spaceChar? + | newLine? backSlash "+wa" spaceChar chapterContentTextContent+ backSlash "+wa" "*" spaceChar? + | newLine? backSlash "qs" spaceChar chapterContentTextContent+ backSlash "qs" "*" spaceChar? + | newLine? backSlash "+qs" spaceChar chapterContentTextContent+ backSlash "+qs" "*" spaceChar? + | newLine? backSlash "qac" spaceChar chapterContentTextContent+ backSlash "qac" "*" spaceChar? + | newLine? backSlash "+qac" spaceChar chapterContentTextContent+ backSlash "+qac" "*" spaceChar? + | newLine? backSlash "litl" spaceChar chapterContentTextContent+ backSlash "litl" "*" spaceChar? + | newLine? backSlash "+litl" spaceChar chapterContentTextContent+ backSlash "+litl" "*" spaceChar? + | newLine? backSlash "lik" spaceChar chapterContentTextContent+ backSlash "lik" "*" spaceChar? + | newLine? backSlash "+lik" spaceChar chapterContentTextContent+ backSlash "+lik" "*" spaceChar? + | newLine? backSlash "rq" spaceChar chapterContentTextContent+ backSlash "rq" "*" spaceChar? + | newLine? backSlash "+rq" spaceChar chapterContentTextContent+ backSlash "+rq" "*" spaceChar? | newLine? backSlash "ior" spaceChar bookIntroductionTitlesTextContent backSlash "ior" "*" spaceChar? | newLine? backSlash "+ior" spaceChar bookIntroductionTitlesTextContent backSlash "+ior" "*" spaceChar? | newLine? backSlash "cat" spaceChar bookIntroductionTitlesTextContent backSlash "cat" "*" spaceChar? @@ -249,11 +248,11 @@ usfmBible{ - inLineCharAttributeElement = newLine? backSlash "rb" spaceChar chapterContentTextContent attributes* backSlash "rb" "*" spaceChar? - | newLine? backSlash "w" spaceChar chapterContentTextContent attributes* backSlash "w" "*" spaceChar? - | newLine? backSlash "jmp" spaceChar chapterContentTextContent attributes* backSlash "jmp" "*" spaceChar? + inLineCharAttributeElement = newLine? backSlash "rb" spaceChar chapterContentTextContent+ attributes* backSlash "rb" "*" spaceChar? + | newLine? backSlash "w" spaceChar chapterContentTextContent+ attributes* backSlash "w" "*" spaceChar? + | newLine? backSlash "jmp" spaceChar chapterContentTextContent* attributes* backSlash "jmp" "*" spaceChar? - inLineCharNumberedElement = newLine? backSlash "liv" number? spaceChar chapterContentTextContent backSlash "liv" "*" spaceChar? + inLineCharNumberedElement = newLine? backSlash "liv" number? spaceChar chapterContentTextContent+ backSlash "liv" "*" spaceChar? @@ -280,12 +279,12 @@ usfmBible{ - litElement = newLine backSlash "lit" spaceChar chapterContentTextContent + litElement = newLine backSlash "lit" spaceChar ((chapterContentTextContent | notesElement | milestoneElement) | notesElement | milestoneElement)+ bookIntroductionTitlesTextContent = (text | notesElement | charElement | milestoneElement | figureElement | zNameSpace)+ bookTitlesTextContent = (text | notesElement | charElement | zNameSpace)+ - chapterContentTextContent = text | charElement | table | li | esbElement + chapterContentTextContent = text | charElement | table | li bookIntroductionEndTitlesTextContent = (text | notesElement | charElement | milestoneElement | zNameSpace | esbElement)+ milestoneElement = (newLine? backSlash "qt" number? "-s"? spaceChar* attributes? backSlash "*" spaceChar?) @@ -295,6 +294,6 @@ usfmBible{ zNameSpace = newLine? backSlash "z" char* spaceChar? text? (backSlash "*" )? - esbElement = newLine backSlash "esb" spaceChar? chapterContentTextContent+ newLine? backSlash "esbe" spaceChar? + esbElement = newLine backSlash "esb" spaceChar? (chapterContentTextContent | metaScripture )+ newLine? backSlash "esbe" spaceChar? } \ No newline at end of file From 856f65ba73d5c5ada87a135f9920d623b247bad6 Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Wed, 19 Dec 2018 15:36:30 +0530 Subject: [PATCH 33/37] fix issue with table, as the verse text-metadata separation was made --- grammarOperations.js | 34 ++++++++++++++++++++-------------- parser.js | 2 +- server.js | 3 --- test/test.js | 5 ----- usfm.ohm | 2 +- 5 files changed, 22 insertions(+), 24 deletions(-) diff --git a/grammarOperations.js b/grammarOperations.js index 6f7f78d7..08c2a075 100644 --- a/grammarOperations.js +++ b/grammarOperations.js @@ -98,7 +98,8 @@ sem.addOperation('composeJson', { verse['text'] = '' for (let i=0; iBack Home


') let data = fields.inputText console.log(data) - // if (data.substr(-1) === '\'') { - // data = data.substr(0, data.length - 1) - // } if (data === '') { data = '

Text Empty!!!

' } else { diff --git a/test/test.js b/test/test.js index c58559f8..b59b438b 100644 --- a/test/test.js +++ b/test/test.js @@ -18,11 +18,6 @@ describe('Mandatory Markers', function () { assert.strictEqual(output, false) }) - // it('Chapter should start with one of paragraph markers', function () { - // let output = parser.validate('\\id PHM Longer Heading\n\\c 1\n\\v 1 ക്രിസ്തുയേശുവിന്റെ ബദ്ധനായ ...\n\\v 2 നമ്മുടെ പിതാവായ ...\n\\p\n\\v 3 കർത്താവായ യേശുവിനോടും ...') - // assert.strictEqual(output, false) - // }) - it('v is a mandatory marker', function () { let output = parser.validate('\\id PHM Longer Heading\n\\c 1\n\\p\n') assert.strictEqual(output, false) diff --git a/usfm.ohm b/usfm.ohm index 05c57622..29602b1b 100644 --- a/usfm.ohm +++ b/usfm.ohm @@ -73,7 +73,7 @@ usfmBible{ publishedCharElement = publishedCharMarker text publishedCharMarker "*" publishedCharMarker = backSlash "vp" - verseText = chapterContentTextContent+ + verseText = chapterContentTextContent paraMarker = paraUnNumberedMarker | paraNumberedMarker From 6026329d2e291145dbe737008384ea3477378d66 Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Wed, 19 Dec 2018 15:49:59 +0530 Subject: [PATCH 34/37] fix issue of milestone(qt) not being identified when given in a new line --- usfm.ohm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/usfm.ohm b/usfm.ohm index 29602b1b..1ed6d6a7 100644 --- a/usfm.ohm +++ b/usfm.ohm @@ -50,7 +50,7 @@ usfmBible{ chapterHeader = cElement (caElement | cpElement | clElement | cdElement)* - metaScripture = sectionHeader | mte | remElement | iexElement | ipElement | spElement | litElement | qaElement | paraElement | notesElement | figureElement | zNameSpace | milestoneElement | esbElement + metaScripture = sectionHeader | mte | remElement | iexElement | ipElement | spElement | litElement | qaElement | notesElement | figureElement | zNameSpace | milestoneElement | esbElement | paraElement sectionHeader = sectionElement sectionPostHeader* ipElement* sectionPostHeader = srElement | rElement | dElement | mrElement From bbf3125deec6753f36c2cec55706b19512202c2f Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Wed, 19 Dec 2018 16:33:45 +0530 Subject: [PATCH 35/37] use the const string SCRIPTURE as optional argument for a clean JSON --- README.md | 2 ++ parser.js | 2 ++ server.js | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 10a949d9..0a21abd5 100644 --- a/README.md +++ b/README.md @@ -16,10 +16,12 @@ To use it from your node application: ``` var grammar = require('usfm-grammar) var jsonOutput = garmmar.parse(/**The USFM Text to be converted to JSON**/) +var jsonOutput = grammar.parse(/**The USFM Text to be converted to JSON**/,grammar.SCRIPTURE) var usfmValidity = grammar.validate(/**USFM Text to be checked**/) ``` The `grammar.parse()` method returns a json structure for the USFM text contents, if it is a valid usfm file. +The `grammar.parse()` method can take an optional second argument, `grammar.SCRIPTURE`. If this is used, the returned json will contain only the most relevant scripture content, excluding all additional USFM contents The `grammar.validate()` method returns a true/false, depending on whether the input usfm text's syntax is valid or not. ## To Use as a Local Node server diff --git a/parser.js b/parser.js index 7461f21b..c011271b 100644 --- a/parser.js +++ b/parser.js @@ -15,6 +15,8 @@ function normalize (str) { return newStr } +exports.SCRIPTURE = 'clean' + exports.parse = function (str, resultType = 'all') { let inStr = normalize(str) let matchObj = match(inStr) diff --git a/server.js b/server.js index 59882f70..da69a586 100644 --- a/server.js +++ b/server.js @@ -84,7 +84,7 @@ http.createServer(function (req, res) { if (data === '') { data = '

Text Empty!!!

' } else { - data = beautifyResultForHtml(parser.parse(data)) + data = beautifyResultForHtml(parser.parse(data, parser.SCRIPTURE)) } res.write(data) res.end() From 606c1b37d3ef604f8ab429c03468016ecc23e010 Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Thu, 20 Dec 2018 10:12:24 +0530 Subject: [PATCH 36/37] add list contents also with verse text --- grammarOperations.js | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/grammarOperations.js b/grammarOperations.js index 08c2a075..449438b6 100644 --- a/grammarOperations.js +++ b/grammarOperations.js @@ -506,7 +506,10 @@ sem.addOperation('composeJson', { li: function (itemElement) { let li = {'list': itemElement.composeJson()} - // li = JSON.stringify(li) + li['text'] = '' + for ( let item of li['list']) { + li.text += item.item.text + ' | ' + } return li }, From d8c809e46520fa08e4f380b0defd1be06c9b2076 Mon Sep 17 00:00:00 2001 From: kavitharaju Date: Thu, 20 Dec 2018 10:19:29 +0530 Subject: [PATCH 37/37] rename the repeated variable in README, example script --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0a21abd5..303f8262 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ To use it from your node application: ``` var grammar = require('usfm-grammar) var jsonOutput = garmmar.parse(/**The USFM Text to be converted to JSON**/) -var jsonOutput = grammar.parse(/**The USFM Text to be converted to JSON**/,grammar.SCRIPTURE) +var jsonCleanOutput = grammar.parse(/**The USFM Text to be converted to JSON**/,grammar.SCRIPTURE) var usfmValidity = grammar.validate(/**USFM Text to be checked**/) ```