Parse RFC 5321 mailbox addresses. master
authorGeorgios Kontaxis <redacted>
Sun, 28 Mar 2021 21:29:40 +0000 (21:29 +0000)
committerGeorgios Kontaxis <redacted>
Sun, 4 Apr 2021 21:46:33 +0000 (21:46 +0000)
MailAddr.pm [new file with mode: 0644]
is_mailaddr.pl [new file with mode: 0755]
other/alt_is_mailaddr.pl [new file with mode: 0755]
tests/test_data.tgz [new file with mode: 0644]
tests/test_results.tgz [new file with mode: 0644]

diff --git a/MailAddr.pm b/MailAddr.pm
new file mode 100644 (file)
index 0000000..3fb38fa
--- /dev/null
@@ -0,0 +1,129 @@
+# kontaxis 2021-03-28
+
+package MailAddr;
+
+use strict;
+use warnings;
+
+# dcontent = %d33-90 / %d94-126
+my $dcontent = qr/(?:[\x21-\x5A]|(?#
+                    )[\x5E-\x7E])/;
+
+my $digit = qr/[0-9]/;
+
+my $alpha = qr/[A-Z]/i;
+
+# Let-dig = = ALPHA / DIGIT
+my $let_dig = qr/(?:$alpha|$digit)/;
+
+# Ldh-str = *( ALPHA / DIGIT / "-" ) Let-dig
+my $ldh_str = qr/(?:$alpha|$digit|\-)*$let_dig/;
+
+# Standardized-tag = Ldh-str
+my $standardized_tag = qr/$ldh_str/;
+
+# General-address-literal = Standardized-tag ":" 1*dcontent
+my $general_address_literal = qr/$standardized_tag\:$dcontent+/;
+
+# Decimal integer value in the range 0 through 255.
+my $snum = qr/(?:1?[0-9]{1,2}|2[0-4][0-9]|25[0-5])/;
+
+# IPv4-address-literal = Snum 3("."  Snum)
+my $ipv4_address_literal = qr/$snum(?:\.$snum){3}/;
+
+# HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
+my $hexdig = qr/(?:$digit|A|B|C|D|E|F)/;
+
+# IPv6-hex = 1*4HEXDIG
+my $ipv6_hex = qr/$hexdig{1,4}/;
+
+# IPv6v4-comp = [IPv6-hex *3(":" IPv6-hex)] "::"
+#               [IPv6-hex *3(":" IPv6-hex) ":"]
+#               IPv4-address-literal
+my $ipv6v4_comp = qr/(?:$ipv6_hex(?:\:$ipv6_hex){0,3})?\:\:(?#
+                    )(?:$ipv6_hex(?:\:$ipv6_hex){0,3}\:)?$ipv4_address_literal/;
+
+# IPv6v4-full = IPv6-hex 5(":" IPv6-hex) ":" IPv4-address-literal
+my $ipv6v4_full = qr/$ipv6_hex(?:\:$ipv6_hex){0,5}\:$ipv4_address_literal/;
+
+# IPv6-comp = [IPv6-hex *5(":" IPv6-hex)] "::"
+#             [IPv6-hex *5(":" IPv6-hex)]
+my $ipv6_comp = qr/(?:$ipv6_hex(?:\:$ipv6_hex){0,5})?\:\:(?#
+                  )(?:$ipv6_hex(?:\:$ipv6_hex){0,5})?/;
+
+# IPv6-full = IPv6-hex 7(":" IPv6-hex)
+my $ipv6_full = qr/$ipv6_hex(?:\:$ipv6_hex){7}/;
+
+# IPv6-addr = IPv6-full / IPv6-comp / IPv6v4-full / IPv6v4-comp
+my $ipv6_addr = qr/(?:$ipv6_full|$ipv6_comp|$ipv6v4_full|$ipv6v4_comp)/;
+
+# IPv6-address-literal = "IPv6:" IPv6-addr
+my $ipv6_address_literal = qr/IPv6\:$ipv6_addr/;
+
+# address-literal = "[" ( IPv4-address-literal /
+#                   IPv6-address-literal /
+#                   General-address-literal ) "]"
+my $address_literal = qr/\[(?:$ipv4_address_literal|(?#
+                            )$ipv6_address_literal|(?#
+                            )$general_address_literal)\]/;
+
+
+# sub-domain = = Let-dig [Ldh-str]
+my $sub_domain = qr/$let_dig(?:$ldh_str)?/;
+
+# Domain = sub-domain *("." sub-domain)
+my $domain = qr/$sub_domain(?:\.$sub_domain)*/;
+
+
+# quoted-pairSMTP = %d92 %d32-126
+my $quoted_pairSMTP = qr/\x5C[\x20-\x7E]/;
+
+# qtextSMTP = %d32-33 / %d35-91 / %d93-126
+my $qtextSMTP = qr/(?:[\x20-\x21]|(?#
+                     )[\x23-\x5B]|(?#
+                     )[\x5D-\x7E])/;
+
+# QcontentSMTP = qtextSMTP / quoted-pairSMTP
+my $qcontentSMTP = qr/(?:$qtextSMTP|$quoted_pairSMTP)/;
+
+my $dquote = qr/\"/;
+
+# Quoted-string = DQUOTE *QcontentSMTP DQUOTE
+my $quoted_string = qr/$dquote(?:$qcontentSMTP)*$dquote/;
+
+# atext = ALPHA / DIGIT /
+#         "!" / "#" / "$" / "%" /
+#         "&" / "'" / "*" / "+" /
+#         "-" / "/" / "=" / "?" /
+#         "^" / "_" / "`" / "{" /
+#         "|" / "}" / "~"
+# See RFC 5322.
+my $atext = qr/(?:$alpha|$digit|(?#
+                )\!|\#|\$|\%|(?#
+                )\&|\'|\*|\+|(?#
+                )\-|\/|\=|\?|(?#
+                )\^|\_|\`|\{|(?#
+                )\||\}|\~)/;
+
+# Atom = 1*atext
+my $atom = qr/(?:$atext)+/;
+
+# Dot-string = Atom *("." Atom)
+my $dot_string =  qr/$atom(?:\.$atom)*/;
+
+# Local-part = Dot-string / Quoted-string
+my $local_part = qr/(?:$dot_string|$quoted_string)/;
+
+# Mailbox = Local-part "@" ( Domain / address-literal )
+# See RFC 5321.
+my $mailbox = qr/(?<local_part>$local_part)@(?#
+                )(?<domain>$domain|$address_literal)/;
+
+sub mailaddr_parse
+{
+    return undef unless shift =~ m/(?:<$mailbox>|(?#
+                                    )\($mailbox\)|(?#
+                                    )^$mailbox$)/;
+    return $+{local_part}, $+{domain};
+}
+
diff --git a/is_mailaddr.pl b/is_mailaddr.pl
new file mode 100755 (executable)
index 0000000..dfa6d94
--- /dev/null
@@ -0,0 +1,15 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+use MailAddr;
+
+foreach my $arg(@ARGV) {
+    my ($local, $domain) = MailAddr::mailaddr_parse $arg;
+    if (!$local || !$domain) {
+        next;
+    }
+    print "$arg\n";
+}
+
diff --git a/other/alt_is_mailaddr.pl b/other/alt_is_mailaddr.pl
new file mode 100755 (executable)
index 0000000..63a43e5
--- /dev/null
@@ -0,0 +1,23 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+my $mailbox = qr/(?<local_part>[^@>]+)@(?<domain>[^>]+)/;
+
+sub mailaddr_parse
+{
+    return undef unless shift =~ m/(?:<$mailbox>|(?#
+                                    )\($mailbox\)|(?#
+                                    )^$mailbox$)/;
+    return $+{local_part}, $+{domain};
+}
+
+foreach my $arg(@ARGV) {
+    my ($local, $domain) = mailaddr_parse $arg;
+    if (!$local || !$domain) {
+        next;
+    }
+    print "$arg\n";
+}
+
diff --git a/tests/test_data.tgz b/tests/test_data.tgz
new file mode 100644 (file)
index 0000000..507a1b7
Binary files /dev/null and b/tests/test_data.tgz differ
diff --git a/tests/test_results.tgz b/tests/test_results.tgz
new file mode 100644 (file)
index 0000000..84c38be
Binary files /dev/null and b/tests/test_results.tgz differ
git clone https://git.99rst.org/PROJECT