diff --git a/.editorconfig b/.editorconfig index fb6de55..c9e4e83 100644 --- a/.editorconfig +++ b/.editorconfig @@ -7,7 +7,7 @@ end_of_line = lf indent_size = 2 indent_style = space insert_final_newline = true -max_line_length = 100 +max_line_length = 120 trim_trailing_whitespace = true [*.ts] diff --git a/package.json b/package.json index ea5d4ea..b3e68df 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "songshowplus-parser", - "version": "1.0.1", + "version": "1.1.0", "description": "Parses and extracts data from SongShow Plus files", "main": "dist/main/index.js", "typings": "dist/main/index.d.ts", diff --git a/src/index.spec.ts b/src/index.spec.ts index 61d7b52..f54aa30 100644 --- a/src/index.spec.ts +++ b/src/index.spec.ts @@ -1,4 +1,5 @@ import { readFileSync } from 'fs'; +import { ISongShowPlusSong } from './models'; import { SongShowPlus } from '.'; describe('SongShowPlus', (): void => { @@ -13,209 +14,222 @@ describe('SongShowPlus', (): void => { }); it('should return a song for an ENGLISH SongShow Plus 7 file: "Be Near.sbsong"', () => { - const testFile = readFileSync('./sample-files/Be Near.sbsong').toString(); + const testFile = readFileSync('./sample-files/Be Near.sbsong'); expect(sspParser.parse(testFile)).toEqual({ + songNumber: '0707', title: 'Be Near', - keywords: ['Longing', "Security'"], artist: 'Barnard, Shane', copyright: '2003 Waiting Room Music', ccli: '4090362', - sections: [ + key: 'B', + keywords: ['Longing', 'Security'], + lyricSections: [ { title: 'Chorus 1', lyrics: - 'Be near O God\r\nBe near O God of us\r\nYour nearness is to us our good\r\nBe near O God\r\nBe near O God of us\r\nYour nearness is to us our good\r\nOur good', + 'Be near O God\nBe near O God of us\nYour nearness is to us our good\nBe near O God\nBe near O God of us\nYour nearness is to us our good\nOur good', }, + { title: 'Other', lyrics: '' }, { title: 'Verse 1', lyrics: - "You are all big and small\r\nBeautiful\r\nAnd wonderful\r\nTo trust in grace through faith\r\nBut I'm asking to taste", + "You are all big and small\nBeautiful\nAnd wonderful\nTo trust in grace through faith\nBut I'm asking to taste", }, { title: 'Verse 2', lyrics: - 'For dark is light to You\r\nDepths are height to You\r\nFar is near\r\nBut Lord I need to hear from You', + 'For dark is light to You\nDepths are height to You\nFar is near\nBut Lord I need to hear from You', }, { title: 'Verse 3', lyrics: - 'Your fullness is mine\r\nRevelation divine\r\nBut oh to taste\r\nTo know much more than a page\r\nTo feel Your embrace', + 'Your fullness is mine\nRevelation divine\nBut oh to taste\nTo know much more than a page\nTo feel Your embrace', }, { title: 'Verse 4', lyrics: - 'For dark is light to You\r\nDepths are height to You\r\nFar is near\r\nBut Lord I need to hear from You', + 'For dark is light to You\nDepths are height to You\nFar is near\nBut Lord I need to hear from You', }, { title: 'Ending', lyrics: 'My good', }, ], - }); + } as ISongShowPlusSong); }); it('should return a song for an ENGLISH SongShow Plus 7 file: "Give Us Clean Hands.sbsong"', () => { - const testFile = readFileSync('./sample-files/Give Us Clean Hands.sbsong').toString(); + const testFile = readFileSync('./sample-files/Give Us Clean Hands.sbsong'); expect(sspParser.parse(testFile)).toEqual({ + songNumber: '0707', title: 'Give Us Clean Hands', - keywords: ['Prayer', 'Repentance'], artist: 'Hall, Charlie', copyright: '2000 worshiptogether.com songs', ccli: '2060208', - sections: [ + key: 'Ab', + keywords: ['Prayer', 'Repentance'], + lyricSections: [ { title: 'Chorus 1', lyrics: - 'Give us clean hands\r\ngive us pure hearts\r\nLet us not lift our\r\nsouls to another\r\nGive us clean hands\r\ngive us pure hearts\r\nLet us not lift our\r\nsouls to another', + 'Give us clean hands\ngive us pure hearts\nLet us not lift our\nsouls to another\nGive us clean hands\ngive us pure hearts\nLet us not lift our\nsouls to another', }, { title: 'Chorus', lyrics: - 'And oh God let us be\r\na generation that seeks\r\nThat seeks Your face\r\noh God of Jacob\r\nAnd oh God let us be\r\na generation that seeks\r\nThat seeks Your face\r\noh God of Jacob', + 'And oh God let us be\na generation that seeks\nThat seeks Your face\noh God of Jacob\nAnd oh God let us be\na generation that seeks\nThat seeks Your face\noh God of Jacob', }, { title: 'Verse 1', lyrics: - 'We bow our hearts\r\nwe bend our knees\r\nOh Spirit come\r\nmake us humble\r\nWe turn our eyes\r\nfrom evil things\r\nOh Lord we cast\r\ndown our idols', + 'We bow our hearts\nwe bend our knees\nOh Spirit come\nmake us humble\nWe turn our eyes\nfrom evil things\nOh Lord we cast\ndown our idols', }, ], - }); + } as ISongShowPlusSong); }); it('should return a song for an ENGLISH SongShow Plus 7 file: "Jesus Saves.sbsong"', () => { - const testFile = readFileSync('./sample-files/Jesus Saves.sbsong').toString(); + const testFile = readFileSync('./sample-files/Jesus Saves.sbsong'); expect(sspParser.parse(testFile)).toEqual({ + songNumber: '0718', title: 'Jesus Saves (2)', - keywords: [], artist: 'Eddie James', - copyright: '© Fresh Wine Publishing', + copyright: 'Fresh Wine Publishing', ccli: '', - sections: [ + key: '', + keywords: [], + lyricSections: [ { title: 'Verse 1', lyrics: - 'Jesus saves from the power\r\nAnd the penalty of sin\r\nJesus saves from the torment\r\nAnd the anguish within\r\nJesus saves from the bondage\r\nAnd control of the enemy\r\nHe heals the broken heart\r\nAnd set the captives free', + 'Jesus saves from the power\nAnd the penalty of sin\nJesus saves from the torment\nAnd the anguish within\nJesus saves from the bondage\nAnd control of the enemy\nHe heals the broken heart\nAnd set the captives free', }, { title: 'Verse 2', lyrics: - "Jesus saves from the guilt\r\nAnd the shame of what you've done\r\nJesus saves from a life\r\nFilled with lies and deception\r\nJesus saves from the pain of the memories \r\nThat have scarred your past", + "Jesus saves from the guilt\nAnd the shame of what you've done\nJesus saves from a life\nFilled with lies and deception\nJesus saves from the pain of the memories \nThat have scarred your past", }, { title: 'Pre-chorus 1', lyrics: - 'Gabriel came to Joseph in a dream\r\nFor by the Holy Spirit Mary conceived\r\nAnd with a message from heaven\r\nGabriel came declaring', + 'Gabriel came to Joseph in a dream\nFor by the Holy Spirit Mary conceived\nAnd with a message from heaven\nGabriel came declaring', }, { title: 'Chorus 1', lyrics: - 'She shall bring forth a Son thou\r\nShalt call His name Jesus and He shall save\r\nSave His people from sin and give peace within\r\nWhat amazing grace - Jesus saves', + 'She shall bring forth a Son thou\nShalt call His name Jesus and He shall save\nSave His people from sin and give peace within\nWhat amazing grace - Jesus saves', }, { - title: 'Verse 3r', + title: 'Verse 3', lyrics: - "Jesus saves, He's the One that has never known sin\r\nJesus saves, yet for us He took on the sins of men\r\nJesus saves for he became our sin that we \r\nMay become the righteousness of God\r\nJesus saves and now I'll see His face in peace\r\nJesus saves and I will live with Him eternally\r\nJesus saves and I will join with the angels\r\nAround the throne Singing Holy, Holy, Holy", + "Jesus saves, He's the One that has never known sin\nJesus saves, yet for us He took on the sins of men\nJesus saves for he became our sin that we \nMay become the righteousness of God\nJesus saves and now I'll see His face in peace\nJesus saves and I will live with Him eternally\nJesus saves and I will join with the angels\nAround the throne Singing Holy, Holy, Holy", }, { - title: 'Verse 4z', + title: 'Verse 4', lyrics: - "Jesus saves and today you can leave here set free\r\nJesus saves and even now He will give you liberty\r\nJesus saves no matter what, no matter who, no\r\nMatter where just call Him, He'll answer your prayer\r\nJesus saves and your life will never be the same\r\nJesus saves and right now you can be born again\r\nJesus saves for he that be in Christ\r\nThe old has past and all is made new", + "Jesus saves and today you can leave here set free\nJesus saves and even now He will give you liberty\nJesus saves no matter what, no matter who, no\nMatter where just call Him, He'll answer your prayer\nJesus saves and your life will never be the same\nJesus saves and right now you can be born again\nJesus saves for he that be in Christ\nThe old has past and all is made new", }, { title: 'Bridge 1', lyrics: - "Redeemer, liberator, healer, emancipator \r\nChain-Breaker, strong deliverer, He fights for me\r\nWarrior, restorer, forgiver, peace make\r\nJustifier, Intercessor, through Him I'm free", + "Redeemer, liberator, healer, emancipator \nChain-Breaker, strong deliverer, He fights for me\nWarrior, restorer, forgiver, peace make\nJustifier, Intercessor, through Him I'm free", }, { title: 'Vamp 1', - lyrics: "Jesus saves (repeat) Hallelujah (repeat)\r\n'(", + lyrics: 'Jesus saves (repeat) Hallelujah (repeat)', }, ], - }); + } as ISongShowPlusSong); }); it('should return a song for an ENGLISH SongShow Plus 7 file: "You Are.sbsong"', () => { - const testFile = readFileSync('./sample-files/You Are.sbsong').toString(); + const testFile = readFileSync('./sample-files/You Are.sbsong'); expect(sspParser.parse(testFile)).toEqual({ + songNumber: '0718', title: 'You Are (2)', - keywords: ['Appreciation', 'Breakthrough', 'Christ', 'Declaration', "Jesus'"], artist: '8Jobe, Caleb | Cohen, Ezra | Hesami, Josh | Trimble, Paul', copyright: '2010 CFN Music', ccli: '5715921', - sections: [ + key: 'C', + keywords: ['Appreciation', 'Breakthrough', 'Christ', 'Declaration', 'Jesus'], + lyricSections: [ { title: 'Verse 1', lyrics: - "Bought back from a life in chains\r\nNow I can sing that I have been redeemed\r\nYour blood covers all of me\r\nI'm not ashamed to shout who You are\r\nI'm living in the light of grace\r\nMercy made a way to make things bright\r\nLove overcame the grave in victory in victory", + "Bought back from a life in chains\nNow I can sing that I have been redeemed\nYour blood covers all of me\nI'm not ashamed to shout who You are\nI'm living in the light of grace\nMercy made a way to make things bright\nLove overcame the grave in victory in victory", }, { title: 'Chorus 1', lyrics: - 'You are You are You are the Lord of all\r\nGave Your life to save us\r\nFreedom has embraced us\r\nYou are You are You are the Love for all\r\nHope of every nation\r\nAll creation shouts Your glory', + 'You are You are You are the Lord of all\nGave Your life to save us\nFreedom has embraced us\nYou are You are You are the Love for all\nHope of every nation\nAll creation shouts Your glory', }, { title: 'Verse 2', lyrics: - "I'm running with a heart that's changed\r\nEverything to see Your kingdom come\r\nYou guide every step I take\r\nI'm not ashamed to shout who You are\r\nI'm living in a brand new day\r\nA life to give a life to lift You high\r\nAll glory and power and praise\r\nTo You alone to You alone", + "I'm running with a heart that's changed\nEverything to see Your kingdom come\nYou guide every step I take\nI'm not ashamed to shout who You are\nI'm living in a brand new day\nA life to give a life to lift You high\nAll glory and power and praise\nTo You alone to You alone", }, { title: 'Bridge', lyrics: - "I'm not ashamed of who You are\r\nYour love broke through\r\nAnd grace has made me Yours\r\nNow upon this Rock I stand\r\nIn victory in victory\r\n(REPEAT 3X)", + "I'm not ashamed of who You are\nYour love broke through\nAnd grace has made me Yours\nNow upon this Rock I stand\nIn victory in victory\n(REPEAT 3X)", }, ], - }); + } as ISongShowPlusSong); }); it('should return a song for a SPANISH SongShow Plus 7 file: "Devuelveme El Gozo.sbsong"', () => { - const testFile = readFileSync('./sample-files/Devuelveme El Gozo.sbsong').toString(); + const testFile = readFileSync('./sample-files/Devuelveme El Gozo.sbsong'); expect(sspParser.parse(testFile)).toEqual({ + songNumber: '0707', title: 'Devuelveme El Gozo', - keywords: ['Tahoma', '�?'], artist: '', copyright: '', ccli: '', - sections: [ + key: '', + keywords: [], + lyricSections: [ { title: 'Verse 1', lyrics: - 'En medio del dolor \r\nEn medio de la aflicción\r\nTu me das paz y \r\nMe enseñas tu amor \r\nTodo lo que perdí \r\nLo restauras Señor en mi \r\nMe das las fuerza para seguir', + 'En medio del dolor \nEn medio de la aflicción\nTu me das paz y \nMe enseñas tu amor \nTodo lo que perdí \nLo restauras Señor en mi \nMe das las fuerza para seguir', }, { title: 'Chorus', lyrics: - 'Devuelve me el gozo \r\nDe Tu salvación y\r\nTu Espíritu noble me sustente \r\nTe necesito Dios \r\nSin ti no soy nada \r\nTe necesito Dios \r\nDame un nuevo corazón', + 'Devuelve me el gozo \nDe Tu salvación y\nTu Espíritu noble me sustente \nTe necesito Dios \nSin ti no soy nada \nTe necesito Dios \nDame un nuevo corazón', }, ], - }); + } as ISongShowPlusSong); }); it('should return a song for a SPANISH SongShow Plus 7 file: "La Sangre (The Blood).sbsong"', () => { - const testFile = readFileSync('./sample-files/La Sangre (The Blood).sbsong').toString(); + const testFile = readFileSync('./sample-files/La Sangre (The Blood).sbsong'); expect(sspParser.parse(testFile)).toEqual({ + songNumber: '0707', title: 'La Sangre (The Blood)', - keywords: ['Background Improv'], artist: '', copyright: '', ccli: '', - sections: [ + key: '', + keywords: [], + lyricSections: [ { title: 'Verse 1', lyrics: - 'La sangre de mi Cristo\r\nque el vertio por mi en la cruz\r\naun es eficaz, para limpiar tu ser\r\nporque Cristo nunca perdera su fuerza', + 'La sangre de mi Cristo\nque el vertio por mi en la cruz\naun es eficaz, para limpiar tu ser\nporque Cristo nunca perdera su fuerza', }, { title: 'Chorus', lyrics: - 'Oh, porque alcanza a limpiar nuestras manchas\r\ny alcanza a curar nuestras llagas\r\npecador ven al manantial \r\nque fluyendo esta\r\ny lavara tu ser\r\nporque Cristo nunca perdera su fuerza!!"', + 'Oh, porque alcanza a limpiar nuestras manchas\ny alcanza a curar nuestras llagas\npecador ven al manantial \nque fluyendo esta\ny lavara tu ser\nporque Cristo nunca perdera su fuerza!!"', }, ], - }); + } as ISongShowPlusSong); }); }); diff --git a/src/index.ts b/src/index.ts index 6b3ce2f..b70e3cf 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,282 +1,308 @@ -import { ISongShowPlusSection, ISongShowPlusSong } from './models'; -import { TextCleaner } from './text-cleaner'; +/* eslint-disable no-unused-vars */ +import { ISongShowPlusLyricSection, ISongShowPlusSong } from './models'; + +enum Block { + TITLE = 1, + AUTHOR = 2, + COPYRIGHT = 3, + CCLI_NO = 5, + KEY = 11, + VERSE = 12, + CHORUS = 20, + BRIDGE = 24, + TOPIC = 29, + COMMENTS = 30, + VERSE_ORDER = 31, + SONG_BOOK = 35, + SONG_NUMBER = 36, + CUSTOM_VERSE = 37, + FILE_START = 38, +} -export class SongShowPlus { - //Regex pattern AS A STRING to match invisible control characters - //Slashes are double escaped here so it can be in a string! - private readonly patternInvisibleCharsStr = '[\\xA0\\x00-\\x09\\x0B\\x0C\\x0E-\\x1F\\x7F]'; - //Same pattern, but as a real RexExp object - private readonly patternInvisibleChars = new RegExp(this.patternInvisibleCharsStr, 'g'); +interface ISongSectionBufferInfo { + type: Block; + nextBlockStart: number; + numBytesFollows: number; + thisBlockLength: number; + newByteOffset: number; +} + +interface ISongSection { + type: Block; + value: string; + lyricContent?: string; +} - private readonly textCleaner = new TextCleaner(); +export class SongShowPlus { + parse(fileBuffer: Buffer): ISongShowPlusSong { + const sections = this.getSections(fileBuffer); - parse(fileContent: string): ISongShowPlusSong { + let songNumber = ''; let title = ''; let artist = ''; let copyright = ''; let ccli = ''; - let keywords: string[] = []; - let sections: ISongShowPlusSection[] = []; - - //We don't want any properties XML tags which can sometimes begin the file. - //Splitting these out and then taking the first array item can prevent this. - //Each song sections seems to be split up by a percent sign, so make an array by splitting on that - const propSections = fileContent.split(''); - if (propSections[0]) { - const sectionParts = propSections[0].split('%'); - - if (sectionParts.length > 0) { - //Pass all the sections in here to get the lyrics - //We will get out the sections and the keywords - const sectionContent = this.getSectionsAndKeywords(sectionParts); - keywords = sectionContent.keywords; - sections = sectionContent.sections; - - if (sectionParts[0]) { - //The info is all contained in the first section, so only pass that in and pass in the keywords from above - const parsedInfo = this.getSongAttributes(sectionParts[0]); - title = parsedInfo.title; - artist = parsedInfo.artist; - copyright = parsedInfo.copyright; - ccli = parsedInfo.ccli; - } - } - } + let key = ''; + const keywords: string[] = []; + const lyricSections: ISongShowPlusLyricSection[] = []; - return { + const returnObj: ISongShowPlusSong = { + songNumber, title, artist, copyright, ccli, + key, keywords, - sections, + lyricSections, }; + + // console.log(returnObj); + + return returnObj; } - private getSongAttributes(firstSection: string): { - title: string; - artist: string; - copyright: string; - ccli: string; - } { - //Split the info up into an array by the invisible characters - //Then remove all empty items and items that are only 1 character long - const infoArray = firstSection - .split(this.patternInvisibleChars) - .filter((n) => n.trim().replace(/\r\n\t/g, '').length > 1); + //------------------------------------------ + //File Parsing + /** + * @description Splits up the file by the separators and returns each section as an array of character code numbers + */ + private getSections(fileBuffer: Buffer): Array { + /* + The SongShow Plus song file format is as follows: + + * Each piece of data in the song file has some information that precedes it. + * The general format of this data is as follows: + | 4 Bytes, forming a 32 bit number, a key if you will, this describes what the data is (see Block Enum) + | 4 Bytes, forming a 32 bit number, which is the number of bytes until the next block starts + | 1 Byte, which tells how many bytes follows + | 1 or 4 Bytes, describes how long the string is, if its 1 byte, the string is less than 255 + | The next bytes are the actual data. + | The next block of data follows on. + */ + + let byteOffset = 0; + const sections: Array = []; + + //Convert Node Buffer to an array buffer + //https://stackoverflow.com/a/71211814/79677 + const arrayBuffer = fileBuffer.buffer.slice( + fileBuffer.byteOffset, + fileBuffer.byteOffset + fileBuffer.byteLength + ); + const dataView = new DataView(arrayBuffer); - let title = ''; - let artist = ''; - let copyright = ''; - let ccli = ''; + console.log('==============================================================='); + // Loop through the buffer and read bytes based on the context + while (byteOffset < fileBuffer.byteLength) { + const sectionInfo = this.getSectionInfo(dataView.buffer, byteOffset); + byteOffset = sectionInfo.newByteOffset; - if (infoArray.length > 0) { - if (infoArray[0]) { - //If the first items is a number between 1 and 4 digits, remove it - if (/[0-9]{1,4}/.test(infoArray[0])) { - infoArray.splice(0, 1); - } + const dataArr = dataView.buffer.slice(byteOffset, byteOffset + sectionInfo.thisBlockLength); + const sectionContent = this.processCharsAsString(dataArr); - //Remove dollar signs from the title - title = infoArray[0].replace(/\$/g, ''); - } - if (infoArray[1]) { - artist = this.textCleaner.convertWin1252ToUtf8(infoArray[1].trim()); - } + const thisSection: ISongSection = { + type: sectionInfo.type, + value: sectionContent, + }; - //If the copyright exists, add it - if (infoArray[2]) { - //copyright info tends to end with a $ sign, so remove it - copyright = this.textCleaner.convertWin1252ToUtf8(infoArray[2].replace('$', '').trim()); + if ( + sectionInfo.type === Block.BRIDGE || + sectionInfo.type === Block.CHORUS || + sectionInfo.type === Block.VERSE || + sectionInfo.type === Block.CUSTOM_VERSE + ) { + //extend parsing to look for lyric content + thisSection.lyricContent = this.getLyrics( + dataView.buffer, + byteOffset + sectionInfo.thisBlockLength + 1 + ); } - //If the CCLI exists, add it - if (infoArray[3]) { - ccli = this.textCleaner.convertWin1252ToUtf8(infoArray[3].trim()); - } + sections.push(thisSection); + // console.log(sectionInfo, Block[sectionInfo.type], sectionContent); + byteOffset += sectionInfo.nextBlockStart; } - //Convert characters as needed - useful for non-UTF8 character (like accented characters in Spanish) - //This is partially needed due to the binary file format that we are scraping for text - title = this.textCleaner.convertWin1252ToUtf8(title); - - return { - title, - artist, - copyright, - ccli, - }; + // console.groupEnd(); + console.log(sections); + return sections; } - private cleanOddCharsFromSectionTitles(lyrics: string): string { - //Convert character encodings - useful for non-English alphabets (Spanish) - return ( - this.textCleaner - .convertWin1252ToUtf8(lyrics) - //Sometime section titles will end with an odd character - //If the last character isn't a letter, number, or closing parenthesis then remove it - .replace(/[^a-z0-9)]$/i, '') - ); - } + private getLyrics(buffer: ArrayBuffer, lyricStartOffset: number): string { + const thisBlockLength = new Uint8Array(buffer.slice(lyricStartOffset, lyricStartOffset + 1))[0]; + lyricStartOffset++; - private cleanOddCharsFromSectionLyrics(lyrics: string): string { - return ( - //Convert character encodings - useful for non-English alphabets (Spanish) - this.textCleaner - .convertWin1252ToUtf8(lyrics) - //Replace multiple slashes sometimes? - //Also remove some strange ugly characters... - .replace(/\/+|¶/g, '') - //remove beginning/ending whitespace - .trim() - //Sometimes the first character of lyrics is a random lowercase letter - //If we have a lowercase letter first and then an uppercase letter, remove that first character - .replace(/^[a-z]([A-Z])/, '$1') - //If the last characters are newlines followed by a non-letter character, remove them - .replace(/[\n\r]+[^a-z]$/i, '') + const content = this.processCharsAsString( + buffer.slice(lyricStartOffset, lyricStartOffset + thisBlockLength) ); + + return content; } - private createInitialSectionsArray(sections: string[]): ISongShowPlusSection[] { - const sectionsArray = []; - - //Sections tend to begin with N number of control characters, a random print character, - // more control characters, and then the title "Verse 1" or something - //After that is the actual song lyrics, but it may be proceeded by one non-word character - //Slashes are double escaped here so it can be in a string! - const sectionPattern = new RegExp( - '^' + - this.patternInvisibleCharsStr + - '+.{1}' + - this.patternInvisibleCharsStr + - '+(.+)' + - this.patternInvisibleCharsStr + - '+\\W*([\\s\\S]+)', - 'm' - ); + private getSectionInfo(buffer: ArrayBuffer, byteOffset: number): ISongSectionBufferInfo { + const blockLength = 4; - //Loop through the sections - //But SKIP the first one since it contains the song info we don't need here - for (let i = 1; i < sections.length; i++) { - const thisSection = sections[i] ?? /* istanbul ignore next */ ''; - //Run the regex on each section to split out the section title from the lyrics - const matches = thisSection.match(sectionPattern); - let sectionTitle = ''; - let sectionLyrics = ''; - - //Remove whitespace from the title - if (matches != null) { - if (matches[1]) { - sectionTitle = matches[1].replace(this.patternInvisibleChars, '').trim(); - } - if (matches[2]) { - //Remove any more invisible chars from the lyrics and remove whitespace - sectionLyrics = matches[2].replace(this.patternInvisibleChars, '').trim(); - } - } + const type = new Uint32Array(buffer.slice(byteOffset, byteOffset + blockLength))[0]; + byteOffset += blockLength; - sectionTitle = this.cleanOddCharsFromSectionTitles(sectionTitle); - sectionLyrics = this.cleanOddCharsFromSectionLyrics(sectionLyrics); + //subtract 2 to get the next block start position relative to where the data ends, not relative to where the type byte and this byte begin + const nextBlockStart = + new Uint32Array(buffer.slice(byteOffset, byteOffset + blockLength))[0] - 2; + byteOffset += blockLength; - //don't add sections with empty lyrics - if (sectionLyrics !== '') { - sectionsArray.push({ - title: sectionTitle, - lyrics: sectionLyrics, - }); - } + const numBytesFollows = new Uint8Array(buffer.slice(byteOffset, byteOffset + 1))[0]; + byteOffset++; + + //TODO: This might be 4 bytes in some cases! + const thisBlockLength = new Uint8Array(buffer.slice(byteOffset, byteOffset + 1))[0]; + byteOffset++; + + return { + type, + nextBlockStart, + numBytesFollows, + thisBlockLength, + newByteOffset: byteOffset, + }; + } + + /** + * @description Takes each section as an array of character code numbers and returns the ASCII characters + */ + private processCharsAsString(sectionCharArr: ArrayBuffer): string { + // console.groupCollapsed("string"); + + let txt = ''; + let offset = 0; + const charArr = new Uint8Array(sectionCharArr); + while (offset < sectionCharArr.byteLength) { + const char = charArr[offset]; + // console.log(char, String.fromCharCode(char)); + txt += this.charToAscii(char); + offset++; } + // console.groupEnd(); - return sectionsArray; + return this.cleanString(txt); } - private getSectionsAndKeywords(sections: string[]): { - sections: ISongShowPlusSection[]; - keywords: string[]; - } { - const sectionsArray = this.createInitialSectionsArray(sections); - - //The last section also contains the keywords, we need to parse these out separately - const lastSectionObj = this.getKeywordsFromLastSection(sections.slice(-1)[0]); - let keywords: string[] = []; - if (lastSectionObj.lastLyrics !== '') { - //If we have no sections, and what we think are keywords are longer than the lyrics... - //Then we might need to switch them for some reason... - if ( - sectionsArray.length === 0 && - lastSectionObj.keywords.length > lastSectionObj.lastLyrics.length - ) { - keywords = [lastSectionObj.lastLyrics]; + /** + * @description Takes each section as an array of character code numbers and returns the ASCII characters + */ + private processCharsAsLyricSection(sectionCharArr: Array): ISongShowPlusLyricSection { + // console.groupCollapsed("lyrics"); + + const lyricObj: ISongShowPlusLyricSection = { + title: '', + lyrics: '', + }; - sectionsArray.push({ - title: '', - lyrics: lastSectionObj.keywords.join('').replace(/\/+/g, ''), - }); + let separatorCount = 0; + let offset = 0; + while (offset < sectionCharArr.length) { + const char = sectionCharArr[offset]; + // console.log(char, String.fromCharCode(char)); + + if (char === 6) { + //always a /x06 followed by another character + offset += 2; + separatorCount++; } else { - keywords = lastSectionObj.keywords; - if (sectionsArray.length > 0) { - const lastSection = sectionsArray.slice(-1)[0]; - lastSection.lyrics = lastSectionObj.lastLyrics; + offset++; + + if (separatorCount <= 1) { + lyricObj.title += this.charToAscii(char); } else { - sectionsArray.push({ - title: 'All Found Lyrics', - lyrics: lastSectionObj.lastLyrics.replace(/\/+/g, ''), - }); + lyricObj.lyrics += this.charToAscii(char); } } } - - //Only add it if the title and the lyrics don't match. Sometimes they do for some reason... - const finalArray = []; - for (const s of sectionsArray) { - if (s.title.trim().toLowerCase() !== s.lyrics.trim().toLowerCase()) { - finalArray.push(s); - } + // console.log("separator count: ", separatorCount); + // console.groupEnd(); + + //Don't add any if no separators were found + if (separatorCount === 0) { + lyricObj.title = ''; + lyricObj.lyrics = ''; + } else { + //Remove ending percent symbols for some reason + //Trim any trailing whitespace/newlines + lyricObj.title = this.cleanString(lyricObj.title); + lyricObj.lyrics = this.cleanString(lyricObj.lyrics); } + return lyricObj; + } + + private processStringAsLyricSection(str: string): ISongShowPlusLyricSection { + console.log(str); + return { - sections: finalArray, - keywords, + title: '', + lyrics: '', }; } - private getKeywordsFromLastSection(lastSectionRaw: string | undefined): { - keywords: string[]; - lastLyrics: string; - } { - let keywords: string[] = []; - let lastLyrics = ''; - - if (lastSectionRaw != null) { - //Remove all empty items and items that are only 1 character long - const infoArray = lastSectionRaw - .split(this.patternInvisibleChars) - .filter((n: string) => n.trim().length > 1); - - //If we have at least 3 sections, then we have keywords - if (infoArray.length > 2) { - //The keywords are the entire array except for the first two items - keywords = infoArray - .splice(2) - .map((x) => this.textCleaner.convertWin1252ToUtf8(x.replace(/[\r\n\t]*/g, ''))); - - if (infoArray.length > 0 && infoArray[1]) { - //Return the last section minus the keywords, then parse out the optional beginning non-word character - const lastSectionNonWordsRemoved = /^\W*([\s\S]+)/m.exec(infoArray[1]); - - if (lastSectionNonWordsRemoved?.[1] != null) { - lastLyrics = lastSectionNonWordsRemoved[1]; - } - } - - //Convert characters as needed - useful for non-english alphabets (Spanish) - lastLyrics = this.cleanOddCharsFromSectionLyrics(lastLyrics); - } + //------------------------------------------ + //Helpers + private charToAscii(char: number): string { + if (this.charIsPrintableCharacter(char)) { + // If the char code is a printable ASCII character, append to output string + return String.fromCharCode(char); + } else if (char === 10) { + // If the char code corresponds to a newline, add a newline character + return '\n'; } + return ''; + } - return { - keywords, - lastLyrics, - }; + private isSectionSeparator( + dataView: DataView, + byteOffset: number, + totalByteLength: number + ): boolean { + //\x00\x00\x00\x??\x00\x00\x00 + //3 null-byte chars, then 1 char, then 3 null-byte chars will separate the sections + if (totalByteLength < byteOffset + 6) return false; + return ( + dataView.getInt8(byteOffset) === 0 && + dataView.getInt8(byteOffset + 1) === 0 && + dataView.getInt8(byteOffset + 2) === 0 && + // // + 3 would be a random character + dataView.getInt8(byteOffset + 4) === 0 && + dataView.getInt8(byteOffset + 5) === 0 && + dataView.getInt8(byteOffset + 6) === 0 + ); + } + + private cleanString(str: string): string { + //Remove random ending characters and whitespace + return str.replace(/[$%'"]$/, '').trim(); + } + + private charIsNumber(charCode: number): boolean { + //0-9 + return charCode >= 48 && charCode <= 57; + } + + private charIsEnglishLetter(charCode: number): boolean { + //A-Za-z + return (charCode >= 65 && charCode <= 90) || (charCode >= 97 && charCode <= 122); + } + + private charIsLetter(charCode: number): boolean { + //English letters or other non-symbol letter characters + return this.charIsEnglishLetter(charCode) || charCode > 192; + } + + private charIsPrintableCharacter(char: number): boolean { + //letters, numbers, symbols. No control characters or newlines + return ( + this.charIsLetter(char) || + this.charIsNumber(char) || + (char >= 32 && char <= 47) || + (char >= 58 && char <= 64) + ); } } diff --git a/src/models.ts b/src/models.ts index b1a4cd5..48d6190 100644 --- a/src/models.ts +++ b/src/models.ts @@ -1,12 +1,14 @@ export interface ISongShowPlusSong { + songNumber: string; title: string; artist: string; copyright: string; ccli: string; + key: string; keywords: string[]; - sections: ISongShowPlusSection[]; + lyricSections: ISongShowPlusLyricSection[]; } -export interface ISongShowPlusSection { +export interface ISongShowPlusLyricSection { title: string; lyrics: string; }