From aba7aa0531d89904daf71ea24f270cdfb0a7840f Mon Sep 17 00:00:00 2001 From: oozcitak Date: Fri, 30 Jul 2021 18:11:19 +0300 Subject: [PATCH] Decode numeric entities and quotes. Fixes #99 --- src/readers/BaseReader.ts | 23 ++++++++++++++++------- src/writers/BaseWriter.ts | 5 ++--- test/callback/basic.test.ts | 4 ++-- test/issues/issue-016.test.ts | 2 +- test/issues/issue-099.test.ts | 8 ++++++-- test/writers/XMLWriter.test.ts | 4 ++-- 6 files changed, 29 insertions(+), 17 deletions(-) diff --git a/src/readers/BaseReader.ts b/src/readers/BaseReader.ts index 77556f7..be447ef 100644 --- a/src/readers/BaseReader.ts +++ b/src/readers/BaseReader.ts @@ -2,11 +2,18 @@ import { XMLBuilderOptions, ExpandObject, XMLBuilder } from "../interfaces" import { sanitizeInput } from "../builder/dom" /** - * Pre-serializes XML nodes. + * Parses XML nodes. */ export abstract class BaseReader { protected _builderOptions: XMLBuilderOptions + private static _entityTable: { [key: string]: string } = { + "lt": "<", + "gt": ">", + "amp": "&", + "quot": '"', + "apos": "'", + } /** * Initializes a new instance of `BaseReader`. @@ -60,9 +67,13 @@ export abstract class BaseReader { * @param text - text value to serialize */ _decodeText(text: string): string { - return text == null ? text : text.replace(/</g, '<') - .replace(/>/g, '>') - .replace(/&/g, '&') + if (text == null) return text + + return text.replace(/&(quot|amp|apos|lt|gt);/g, (_match, tag) => + BaseReader._entityTable[tag] + ).replace(/&#(?:x([a-fA-F0-9]+)|([0-9]+));/g, (_match, hexStr, numStr) => + String.fromCodePoint(parseInt(hexStr || numStr, hexStr ? 16 : 10)) + ) } /** @@ -71,9 +82,7 @@ export abstract class BaseReader { * @param text - attribute value to serialize */ _decodeAttributeValue(text: string): string { - return text == null ? text : text.replace(/</g, '<') - .replace(/>/g, '>') - .replace(/&/g, '&') + return this._decodeText(text) } /** diff --git a/src/writers/BaseWriter.ts b/src/writers/BaseWriter.ts index cf78b1a..e4d1a63 100644 --- a/src/writers/BaseWriter.ts +++ b/src/writers/BaseWriter.ts @@ -5,7 +5,6 @@ import { } from "@oozcitak/dom/lib/dom/interfaces" import { LocalNameSet } from "@oozcitak/dom/lib/serializer/LocalNameSet" import { NamespacePrefixMap } from "@oozcitak/dom/lib/serializer/NamespacePrefixMap" -import { InvalidStateError } from "@oozcitak/dom/lib/dom/DOMException" import { namespace as infraNamespace } from "@oozcitak/infra" import { xml_isName, xml_isLegalChar, xml_isPubidChar } from "@oozcitak/dom/lib/algorithm" @@ -929,7 +928,7 @@ export abstract class BaseWriter" in markup by ">". * 6. Return the value of markup. */ - const markup = node.data.replace(/(?!&(lt|gt|amp|apos|quot);)&/g, '&') + const markup = node.data.replace(/(?!&([^&;]*);)&/g, '&') .replace(//g, '>') @@ -1598,7 +1597,7 @@ export abstract class BaseWriter" characters. */ - return value.replace(/(?!&(lt|gt|amp|apos|quot);)&/g, '&') + return value.replace(/(?!&([^&;]*);)&/g, '&') .replace(//g, '>') .replace(/"/g, '"') diff --git a/test/callback/basic.test.ts b/test/callback/basic.test.ts index c7d5c93..76bbc1b 100644 --- a/test/callback/basic.test.ts +++ b/test/callback/basic.test.ts @@ -173,8 +173,8 @@ describe('basic callback API tests', () => { xmlStream.ele(obj).end() $$.expectCBResult(xmlStream, - '' + - 'XML entities for ampersand are & and &#38;.' + + '' + + 'XML entities for ampersand are & and &.' + '', done) }) diff --git a/test/issues/issue-016.test.ts b/test/issues/issue-016.test.ts index 84183c4..08b81b0 100644 --- a/test/issues/issue-016.test.ts +++ b/test/issues/issue-016.test.ts @@ -9,7 +9,7 @@ describe("Replicate issue", () => { { format: "xml" } ) ).toBe( - '<p>Hello&nbsp;World</p>' + '<p>Hello World</p>' ); expect( $$.convert( diff --git a/test/issues/issue-099.test.ts b/test/issues/issue-099.test.ts index f9a0acd..e020aed 100644 --- a/test/issues/issue-099.test.ts +++ b/test/issues/issue-099.test.ts @@ -3,10 +3,14 @@ import $$ from "../TestHelpers"; describe("Replicate issue", () => { // https://github.com/oozcitak/xmlbuilder2/issues/99 test(`#99 - Most XML entities not being decoded by reader (with unknown entity)`, () => { - expect(() => $$.create('Me & Myself's WiFi ©𝌆')).toThrow() + const xmlResponse = $$.create('Me & Myself's WiFi ©𝌆'); + const networkResponse = xmlResponse.end({format: 'object'}); + expect(networkResponse).toEqual({ + ssid: `Me & Myself's WiFi ©𝌆` + }) }) - test(`#99 - Most XML entities not being decoded by reader`, () => { + test(`#99 - Most XML entities not being decoded by reader (without unknown entity)`, () => { const xmlResponse = $$.create('Me & Myself's WiFi 𝌆'); const networkResponse = xmlResponse.end({format: 'object'}); expect(networkResponse).toEqual({ diff --git a/test/writers/XMLWriter.test.ts b/test/writers/XMLWriter.test.ts index 3c43e07..c08673d 100644 --- a/test/writers/XMLWriter.test.ts +++ b/test/writers/XMLWriter.test.ts @@ -596,8 +596,8 @@ describe('XMLWriter', () => { expect($$.create(obj).end()).toBe( '' + - '' + - 'XML entities for ampersand are & and &#38;.' + + '' + + 'XML entities for ampersand are & and &.' + '') })