From 8e5debfafeaec444afa89bfdd37e4ab17ae8a270 Mon Sep 17 00:00:00 2001 From: Danielle Smith Date: Mon, 1 Nov 2021 13:51:11 +0200 Subject: [PATCH] add: email string parser --- lib/kdl/types.rb | 1 + lib/kdl/types/email.rb | 26 +++++++ lib/kdl/types/email/parser.rb | 136 ++++++++++++++++++++++++++++++++++ test/types/email_test.rb | 58 +++++++++++++++ test/types_test.rb | 8 +- 5 files changed, 226 insertions(+), 3 deletions(-) create mode 100644 lib/kdl/types/email.rb create mode 100644 lib/kdl/types/email/parser.rb create mode 100644 test/types/email_test.rb diff --git a/lib/kdl/types.rb b/lib/kdl/types.rb index 3a4ede3..112c6ee 100644 --- a/lib/kdl/types.rb +++ b/lib/kdl/types.rb @@ -15,5 +15,6 @@ module Types require 'kdl/types/base64' require 'kdl/types/decimal' require 'kdl/types/hostname' +require 'kdl/types/email' KDL::Types::MAPPING.freeze diff --git a/lib/kdl/types/email.rb b/lib/kdl/types/email.rb new file mode 100644 index 0000000..e57b0bd --- /dev/null +++ b/lib/kdl/types/email.rb @@ -0,0 +1,26 @@ +require_relative './email/parser' + +module KDL + module Types + class Email < Value + attr_reader :local, :domain + + LOCAL_PART_CHARS = /[a-zA-Z0-9!#\$%&'*+\-\/=?\^_`{|}~]/ + LOCAL_PART_RGX = /^[a-zA-Z0-9!#\$%&'*+\-\/=?\^_`{|}~]{1,64}$/ + + def initialize(value, local:, domain:, **kwargs) + super(value, **kwargs) + @local = local + @domain = domain + end + + def self.call(value, type = 'email') + local, domain = Parser.new(value.value).parse + + new(value.value, type: type, local: local, domain: domain) + end + + end + MAPPING['email'] = Email + end +end diff --git a/lib/kdl/types/email/parser.rb b/lib/kdl/types/email/parser.rb new file mode 100644 index 0000000..23fe967 --- /dev/null +++ b/lib/kdl/types/email/parser.rb @@ -0,0 +1,136 @@ +module KDL + module Types + class Email < Value + class Parser + def initialize(string) + @string = string + @tokenizer = Tokenizer.new(string) + end + + def parse + local = '' + domain = nil + context = :start + + loop do + type, value = @tokenizer.next_token + + case type + when :part + case context + when :start, :after_dot + local += value + context = :after_part + else + raise ArgumentError, "invalid email #{@string} (unexpected part #{value} at #{context})" + end + when :dot + case context + when :after_part + local += value + context = :after_dot + else + raise ArgumentError, "invalid email #{@string} (unexpected dot at #{context})" + end + when :at + case context + when :after_part + context = :after_at + end + when :domain + case context + when :after_at + raise ArgumentError, "invalid hostname #{value}" unless Hostname.valid_hostname?(value) + domain = value + context = :after_domain + else + raise ArgumentError, "invalid email #{@string} (unexpected domain at #{context})" + end + when :end + case context + when :after_domain + if local.length > 64 + raise ArgumentError, "invalid email #{@string} (local part length #{local.length} exceeds maximaum of 64)" + end + + return [local, domain] + else + raise ArgumentError, "invalid email #{@string} (unexpected end at #{context})" + end + end + end + end + end + + class Tokenizer + def initialize(string) + @string = string + @index = 0 + @after_at = false + end + + def next_token + if @after_at + if @index < @string.length + domain_start = @index + @index = @string.length + return [:domain, @string[domain_start..-1]] + else + return [:end, nil] + end + end + @context = nil + @buffer = '' + loop do + c = @string[@index] + return [:end, nil] if c.nil? + + case @context + when nil + case c + when '.' + @index += 1 + return [:dot, '.'] + when '@' + @after_at = true + @index += 1 + return [:at, '@'] + when '"' + @context = :quote + @index += 1 + when LOCAL_PART_CHARS + @context = :part + @buffer += c + @index += 1 + else + raise ArgumentError, "invalid email #{@string} (unexpected #{c})" + end + when :part + case c + when LOCAL_PART_CHARS + @buffer += c + @index += 1 + when '.', '@' + return [:part, @buffer] + else + raise ArgumentError, "invalid email #{@string} (unexpected #{c})" + end + when :quote + case c + when '"' + n = @string[@index + 1] + raise ArgumentError, "invalid email #{@string} (unexpected #{c})" unless n == '.' || n == '@' + + @index += 1 + return [:part, @buffer] + else + @buffer += c + @index += 1 + end + end + end + end + end + end + end +end diff --git a/test/types/email_test.rb b/test/types/email_test.rb new file mode 100644 index 0000000..88ff7e5 --- /dev/null +++ b/test/types/email_test.rb @@ -0,0 +1,58 @@ +require "test_helper" + +class EmailTest < Minitest::Test + def test_email + value = KDL::Types::Email.call(::KDL::Value::String.new('danielle@example.com')) + assert_equal 'danielle@example.com', value.value + assert_equal 'danielle', value.local + assert_equal 'example.com', value.domain + + assert_raises { KDL::Types::Email.call(::KDL::Value::String.new('not an email')) } + end + + VALID_EMAILS = [ + ['simple@example.com', 'simple', 'example.com'], + ['very.common@example.com', 'very.common', 'example.com'], + ['disposable.style.email.with+symbol@example.com', 'disposable.style.email.with+symbol', 'example.com'], + ['other.email-with-hyphen@example.com', 'other.email-with-hyphen', 'example.com'], + ['fully-qualified-domain@example.com', 'fully-qualified-domain', 'example.com'], + ['user.name+tag+sorting@example.com', 'user.name+tag+sorting', 'example.com'], + ['x@example.com', 'x', 'example.com'], + ['example-indeed@strange-example.com', 'example-indeed', 'strange-example.com'], + ['test/test@test.com', 'test/test', 'test.com'], + ['admin@mailserver1', 'admin', 'mailserver1'], + ['example@s.example', 'example', 's.example'], + ['" "@example.org', ' ', 'example.org'], + ['"john..doe"@example.org', 'john..doe', 'example.org'], + ['mailhost!username@example.org', 'mailhost!username', 'example.org'], + ['user%example.com@example.org', 'user%example.com', 'example.org'], + ['user-@example.org', 'user-', 'example.org'] + ] + + def test_valid_emails + VALID_EMAILS.each do |email, local, domain| + value = KDL::Types::Email.call(::KDL::Value::String.new(email)) + assert_equal email, value.value + assert_equal local, value.local + assert_equal domain, value.domain + end + end + + INVALID_EMAILS = [ + 'Abc.example.com', + 'A@b@c@example.com', + 'a"b(c)d,e:f;gi[j\k]l@example.com', + 'just"not"right@example.com', + 'this is"not\allowed@example.com', + 'this\ still\"not\\allowed@example.com', + '1234567890123456789012345678901234567890123456789012345678901234+x@example.com', + '-some-user-@-example-.com', + 'QA🦄CHOCOLATE🌈@test.com' + ] + + def test_invalid_emails + INVALID_EMAILS.each do |email| + assert_raises { KDL::Types::Email.call(::KDL::Value::String.new(email)) } + end + end +end diff --git a/test/types_test.rb b/test/types_test.rb index bde8429..18b1616 100644 --- a/test/types_test.rb +++ b/test/types_test.rb @@ -16,9 +16,10 @@ def test_types (uuid)"f81d4fae-7dec-11d0-a765-00a0c91e6bf6" \\ (regex)"asdf" \\ (base64)"U2VuZCByZWluZm9yY2VtZW50cw==\n" \\ - (decimal)"10000000000000\n" \\ - (hostname)"www.example.com\n" \\ - (idn-hostname)"xn--bcher-kva.example\n" + (decimal)"10000000000000" \\ + (hostname)"www.example.com" \\ + (idn-hostname)"xn--bcher-kva.example" \\ + (email)"simple@example.com" KDL refute_nil doc @@ -38,6 +39,7 @@ def test_types assert_kind_of ::KDL::Types::Decimal, doc.nodes.first.arguments[13] assert_kind_of ::KDL::Types::Hostname, doc.nodes.first.arguments[14] assert_kind_of ::KDL::Types::IDNHostname, doc.nodes.first.arguments[15] + assert_kind_of ::KDL::Types::Email, doc.nodes.first.arguments[16] end def test_custom_types