From: Georgios Kontaxis Date: Sun, 28 Mar 2021 21:29:40 +0000 (+0000) Subject: Parse RFC 5321 mailbox addresses. X-Git-Url: http://git.99rst.org/?a=commitdiff_plain;h=5d78a6cd7b7ea52365c1abddfe4aaf946e563c7a;p=ismailaddr.git Parse RFC 5321 mailbox addresses. --- 5d78a6cd7b7ea52365c1abddfe4aaf946e563c7a diff --git a/MailAddr.pm b/MailAddr.pm new file mode 100644 index 0000000..3fb38fa --- /dev/null +++ b/MailAddr.pm @@ -0,0 +1,129 @@ +# kontaxis 2021-03-28 + +package MailAddr; + +use strict; +use warnings; + +# dcontent = %d33-90 / %d94-126 +my $dcontent = qr/(?:[\x21-\x5A]|(?# + )[\x5E-\x7E])/; + +my $digit = qr/[0-9]/; + +my $alpha = qr/[A-Z]/i; + +# Let-dig = = ALPHA / DIGIT +my $let_dig = qr/(?:$alpha|$digit)/; + +# Ldh-str = *( ALPHA / DIGIT / "-" ) Let-dig +my $ldh_str = qr/(?:$alpha|$digit|\-)*$let_dig/; + +# Standardized-tag = Ldh-str +my $standardized_tag = qr/$ldh_str/; + +# General-address-literal = Standardized-tag ":" 1*dcontent +my $general_address_literal = qr/$standardized_tag\:$dcontent+/; + +# Decimal integer value in the range 0 through 255. +my $snum = qr/(?:1?[0-9]{1,2}|2[0-4][0-9]|25[0-5])/; + +# IPv4-address-literal = Snum 3("." Snum) +my $ipv4_address_literal = qr/$snum(?:\.$snum){3}/; + +# HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" +my $hexdig = qr/(?:$digit|A|B|C|D|E|F)/; + +# IPv6-hex = 1*4HEXDIG +my $ipv6_hex = qr/$hexdig{1,4}/; + +# IPv6v4-comp = [IPv6-hex *3(":" IPv6-hex)] "::" +# [IPv6-hex *3(":" IPv6-hex) ":"] +# IPv4-address-literal +my $ipv6v4_comp = qr/(?:$ipv6_hex(?:\:$ipv6_hex){0,3})?\:\:(?# + )(?:$ipv6_hex(?:\:$ipv6_hex){0,3}\:)?$ipv4_address_literal/; + +# IPv6v4-full = IPv6-hex 5(":" IPv6-hex) ":" IPv4-address-literal +my $ipv6v4_full = qr/$ipv6_hex(?:\:$ipv6_hex){0,5}\:$ipv4_address_literal/; + +# IPv6-comp = [IPv6-hex *5(":" IPv6-hex)] "::" +# [IPv6-hex *5(":" IPv6-hex)] +my $ipv6_comp = qr/(?:$ipv6_hex(?:\:$ipv6_hex){0,5})?\:\:(?# + )(?:$ipv6_hex(?:\:$ipv6_hex){0,5})?/; + +# IPv6-full = IPv6-hex 7(":" IPv6-hex) +my $ipv6_full = qr/$ipv6_hex(?:\:$ipv6_hex){7}/; + +# IPv6-addr = IPv6-full / IPv6-comp / IPv6v4-full / IPv6v4-comp +my $ipv6_addr = qr/(?:$ipv6_full|$ipv6_comp|$ipv6v4_full|$ipv6v4_comp)/; + +# IPv6-address-literal = "IPv6:" IPv6-addr +my $ipv6_address_literal = qr/IPv6\:$ipv6_addr/; + +# address-literal = "[" ( IPv4-address-literal / +# IPv6-address-literal / +# General-address-literal ) "]" +my $address_literal = qr/\[(?:$ipv4_address_literal|(?# + )$ipv6_address_literal|(?# + )$general_address_literal)\]/; + + +# sub-domain = = Let-dig [Ldh-str] +my $sub_domain = qr/$let_dig(?:$ldh_str)?/; + +# Domain = sub-domain *("." sub-domain) +my $domain = qr/$sub_domain(?:\.$sub_domain)*/; + + +# quoted-pairSMTP = %d92 %d32-126 +my $quoted_pairSMTP = qr/\x5C[\x20-\x7E]/; + +# qtextSMTP = %d32-33 / %d35-91 / %d93-126 +my $qtextSMTP = qr/(?:[\x20-\x21]|(?# + )[\x23-\x5B]|(?# + )[\x5D-\x7E])/; + +# QcontentSMTP = qtextSMTP / quoted-pairSMTP +my $qcontentSMTP = qr/(?:$qtextSMTP|$quoted_pairSMTP)/; + +my $dquote = qr/\"/; + +# Quoted-string = DQUOTE *QcontentSMTP DQUOTE +my $quoted_string = qr/$dquote(?:$qcontentSMTP)*$dquote/; + +# atext = ALPHA / DIGIT / +# "!" / "#" / "$" / "%" / +# "&" / "'" / "*" / "+" / +# "-" / "/" / "=" / "?" / +# "^" / "_" / "`" / "{" / +# "|" / "}" / "~" +# See RFC 5322. +my $atext = qr/(?:$alpha|$digit|(?# + )\!|\#|\$|\%|(?# + )\&|\'|\*|\+|(?# + )\-|\/|\=|\?|(?# + )\^|\_|\`|\{|(?# + )\||\}|\~)/; + +# Atom = 1*atext +my $atom = qr/(?:$atext)+/; + +# Dot-string = Atom *("." Atom) +my $dot_string = qr/$atom(?:\.$atom)*/; + +# Local-part = Dot-string / Quoted-string +my $local_part = qr/(?:$dot_string|$quoted_string)/; + +# Mailbox = Local-part "@" ( Domain / address-literal ) +# See RFC 5321. +my $mailbox = qr/(?$local_part)@(?# + )(?$domain|$address_literal)/; + +sub mailaddr_parse +{ + return undef unless shift =~ m/(?:<$mailbox>|(?# + )\($mailbox\)|(?# + )^$mailbox$)/; + return $+{local_part}, $+{domain}; +} + diff --git a/is_mailaddr.pl b/is_mailaddr.pl new file mode 100755 index 0000000..dfa6d94 --- /dev/null +++ b/is_mailaddr.pl @@ -0,0 +1,15 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +use MailAddr; + +foreach my $arg(@ARGV) { + my ($local, $domain) = MailAddr::mailaddr_parse $arg; + if (!$local || !$domain) { + next; + } + print "$arg\n"; +} + diff --git a/other/alt_is_mailaddr.pl b/other/alt_is_mailaddr.pl new file mode 100755 index 0000000..63a43e5 --- /dev/null +++ b/other/alt_is_mailaddr.pl @@ -0,0 +1,23 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +my $mailbox = qr/(?[^@>]+)@(?[^>]+)/; + +sub mailaddr_parse +{ + return undef unless shift =~ m/(?:<$mailbox>|(?# + )\($mailbox\)|(?# + )^$mailbox$)/; + return $+{local_part}, $+{domain}; +} + +foreach my $arg(@ARGV) { + my ($local, $domain) = mailaddr_parse $arg; + if (!$local || !$domain) { + next; + } + print "$arg\n"; +} + diff --git a/tests/test_data.tgz b/tests/test_data.tgz new file mode 100644 index 0000000..507a1b7 Binary files /dev/null and b/tests/test_data.tgz differ diff --git a/tests/test_results.tgz b/tests/test_results.tgz new file mode 100644 index 0000000..84c38be Binary files /dev/null and b/tests/test_results.tgz differ