From: funilrys <redacted>
Date: Thu, 10 Aug 2023 14:33:53 +0000 (+0200)
Subject: Add support for exotic TLDs.
X-Git-Url: http://git.99rst.org/?a=commitdiff_plain;h=560615dd7fcb0a73f9ac05c033e62ada6b31b8ad;p=stevenblack-hosts.git

Add support for exotic TLDs.

Indeed, before this patch, we were not supporting TLD which
contains digits and dashes (-) when "puny-encoded".
---

diff --git a/testUpdateHostsFile.py b/testUpdateHostsFile.py
index 59bae8e61..0c971478a 100644
--- a/testUpdateHostsFile.py
+++ b/testUpdateHostsFile.py
@@ -840,9 +840,12 @@ class TestNormalizeRule(BaseStdout):
         # Note: "Bare"- Domains are accepted. IP are excluded.
         for rule in [
             "128.0.0.1",
+            "::1",
+            "0.0.0.0 128.0.0.2",
             "0.0.0 google",
             "0.1.2.3.4 foo/bar",
             "0.0.0.0 https",
+            "0.0.0.0 https..",
         ]:
             self.assertEqual(normalize_rule(rule, **kwargs), (None, None))
 
@@ -905,7 +908,7 @@ class TestNormalizeRule(BaseStdout):
             sys.stdout = StringIO()
 
     def test_no_comment_raw(self):
-        for rule in ("twitter.com", "google.com", "foo.bar.edu", "www.example-foo.bar.edu", "www.example-3045.foobar.com"):
+        for rule in ("twitter.com", "google.com", "foo.bar.edu", "www.example-foo.bar.edu", "www.example-3045.foobar.com", "www.example.xn--p1ai"):
             expected = (rule, "0.0.0.0 " + rule + "\n")
 
             actual = normalize_rule(
diff --git a/updateHostsFile.py b/updateHostsFile.py
index 6a96878e9..511565118 100755
--- a/updateHostsFile.py
+++ b/updateHostsFile.py
@@ -1061,7 +1061,10 @@ def normalize_rule(rule, target_ip, keep_domain_comments):
     """
     first try: IP followed by domain
     """
-    regex = r"^\s*(\d{1,3}\.){3}\d{1,3}\s+((?:\w+\.)+[a-zA-Z\.-]+)(.*)"
+
+    # WARNING:
+    #   [a-zA-Z0-9\-]+ is NOT an issue. (e.g., xn--p1ai TLD - and others).
+    regex = r"^\s*(\d{1,3}\.){3}\d{1,3}\s+((?:[\w\-\.]+\.)+[a-zA-Z0-9\-]+)(.*)"
     result = re.search(regex, rule)
 
     if result:
@@ -1090,7 +1093,9 @@ def normalize_rule(rule, target_ip, keep_domain_comments):
     """
     # deny any potential IPv6 address here.
     if ":" not in rule:
-        regex = r"^\s*((?:\w+\.)+[a-zA-Z\.-]+)(.*)"
+        # WARNING:
+        #   [a-zA-Z0-9\-]+ is NOT an issue. (e.g., xn--p1ai TLD - and others).
+        regex = r"^\s*((?:[\w\-\.]+\.)+[a-zA-Z0-9\-]+)(.*)"
         result = re.search(regex, rule)
 
         if result: