From f2b722bb390911d83ad83cbdb1a103fc80e91f47 Mon Sep 17 00:00:00 2001 From: Jeffrey Stedfast Date: Thu, 30 Jan 2025 11:28:07 -0500 Subject: [PATCH] Fixed logic for validating HTML Tag and Attribute names Apparently, < is a valid tag name character based on the HTML5 tokenizer specification. Fixes https://github.com/jstedfast/MimeKit/discussions/1136 --- MimeKit/Text/HtmlAttribute.cs | 2 +- MimeKit/Text/HtmlUtils.cs | 27 +++++++++++++++++++++------ MimeKit/Text/HtmlWriter.cs | 4 ++-- UnitTests/Text/HtmlUtilsTests.cs | 10 ++++++++-- 4 files changed, 32 insertions(+), 11 deletions(-) diff --git a/MimeKit/Text/HtmlAttribute.cs b/MimeKit/Text/HtmlAttribute.cs index 3c61f10a03..fa9eddcf64 100644 --- a/MimeKit/Text/HtmlAttribute.cs +++ b/MimeKit/Text/HtmlAttribute.cs @@ -80,7 +80,7 @@ public HtmlAttribute (string name, string value) if (name.Length == 0) throw new ArgumentException ("The attribute name cannot be empty.", nameof (name)); - if (!HtmlUtils.IsValidTokenName (name)) + if (!HtmlUtils.IsValidAttributeName (name)) throw new ArgumentException ("Invalid attribute name.", nameof (name)); Value = value; diff --git a/MimeKit/Text/HtmlUtils.cs b/MimeKit/Text/HtmlUtils.cs index 0516552fc0..836e628820 100644 --- a/MimeKit/Text/HtmlUtils.cs +++ b/MimeKit/Text/HtmlUtils.cs @@ -37,6 +37,12 @@ namespace MimeKit.Text { /// public static class HtmlUtils { + // https://dev.w3.org/html5/spec-LC/tokenization.html#attribute-name-state + static readonly string InvalidAttributeNameCharacters = "\0\t\r\n\f /=>\"\'<"; + + // https://dev.w3.org/html5/spec-LC/tokenization.html#tag-name-state + static readonly string InvalidTagNameCharacters = "\0\t\r\n\f />"; + #if NETSTANDARD2_0 || NETFRAMEWORK static void Write (this TextWriter writer, ReadOnlySpan value) { @@ -51,18 +57,27 @@ static void Write (this TextWriter writer, ReadOnlySpan value) } #endif - internal static bool IsValidTokenName (string name) + internal static bool IsValidAttributeName (string name) { if (string.IsNullOrEmpty (name)) return false; for (int i = 0; i < name.Length; i++) { - switch (name[i]) { - case '\t': case '\r': case '\n': case '\f': case ' ': - case '<': case '>': case '\'': case '"': - case '/': case '=': + if (InvalidAttributeNameCharacters.IndexOf (name[i]) != -1) + return false; + } + + return true; + } + + internal static bool IsValidTagName (string name) + { + if (string.IsNullOrEmpty (name)) + return false; + + for (int i = 0; i < name.Length; i++) { + if (InvalidTagNameCharacters.IndexOf (name[i]) != -1) return false; - } } return true; diff --git a/MimeKit/Text/HtmlWriter.cs b/MimeKit/Text/HtmlWriter.cs index 31a25840b2..13b0784a99 100644 --- a/MimeKit/Text/HtmlWriter.cs +++ b/MimeKit/Text/HtmlWriter.cs @@ -135,7 +135,7 @@ static void ValidateAttributeName (string name) if (name.Length == 0) throw new ArgumentException ("The attribute name cannot be empty.", nameof (name)); - if (!HtmlUtils.IsValidTokenName (name)) + if (!HtmlUtils.IsValidAttributeName (name)) throw new ArgumentException ($"Invalid attribute name: {name}", nameof (name)); } @@ -147,7 +147,7 @@ static void ValidateTagName (string name) if (name.Length == 0) throw new ArgumentException ("The tag name cannot be empty.", nameof (name)); - if (!HtmlUtils.IsValidTokenName (name)) + if (!HtmlUtils.IsValidTagName (name)) throw new ArgumentException ($"Invalid tag name: {name}", nameof (name)); } diff --git a/UnitTests/Text/HtmlUtilsTests.cs b/UnitTests/Text/HtmlUtilsTests.cs index c66c36588c..6ce21df718 100644 --- a/UnitTests/Text/HtmlUtilsTests.cs +++ b/UnitTests/Text/HtmlUtilsTests.cs @@ -267,9 +267,15 @@ public void TestHtmlNamespaces () } [Test] - public void TestIsValidTokenName () + public void TestIsValidAttributeName () { - Assert.That (HtmlUtils.IsValidTokenName (string.Empty), Is.False, "string.Empty"); + Assert.That (HtmlUtils.IsValidAttributeName (string.Empty), Is.False, "string.Empty"); + } + + [Test] + public void TestIsValidTagName () + { + Assert.That (HtmlUtils.IsValidTagName (string.Empty), Is.False, "string.Empty"); } } }