Add support for exotic TLDs.
authorfunilrys <redacted>
Thu, 10 Aug 2023 14:33:53 +0000 (16:33 +0200)
committerfunilrys <redacted>
Thu, 10 Aug 2023 14:35:38 +0000 (16:35 +0200)
Indeed, before this patch, we were not supporting TLD which
contains digits and dashes (-) when "puny-encoded".

testUpdateHostsFile.py
updateHostsFile.py

index 59bae8e61a19d0273d9c21f78f73de962fd61e5e..0c971478a1e6803e9c0de63f3aac37739b08400e 100644 (file)
@@ -840,9 +840,12 @@ class TestNormalizeRule(BaseStdout):
         # Note: "Bare"- Domains are accepted. IP are excluded.
         for rule in [
             "128.0.0.1",
+            "::1",
+            "0.0.0.0 128.0.0.2",
             "0.0.0 google",
             "0.1.2.3.4 foo/bar",
             "0.0.0.0 https",
+            "0.0.0.0 https..",
         ]:
             self.assertEqual(normalize_rule(rule, **kwargs), (None, None))
 
@@ -905,7 +908,7 @@ class TestNormalizeRule(BaseStdout):
             sys.stdout = StringIO()
 
     def test_no_comment_raw(self):
-        for rule in ("twitter.com", "google.com", "foo.bar.edu", "www.example-foo.bar.edu", "www.example-3045.foobar.com"):
+        for rule in ("twitter.com", "google.com", "foo.bar.edu", "www.example-foo.bar.edu", "www.example-3045.foobar.com", "www.example.xn--p1ai"):
             expected = (rule, "0.0.0.0 " + rule + "\n")
 
             actual = normalize_rule(
index 6a96878e92aaa97b49f44e8b8ef3e7e3d89529f7..511565118ad49f07feb3438fef38f9191f5acfb8 100755 (executable)
@@ -1061,7 +1061,10 @@ def normalize_rule(rule, target_ip, keep_domain_comments):
     """
     first try: IP followed by domain
     """
-    regex = r"^\s*(\d{1,3}\.){3}\d{1,3}\s+((?:\w+\.)+[a-zA-Z\.-]+)(.*)"
+
+    # WARNING:
+    #   [a-zA-Z0-9\-]+ is NOT an issue. (e.g., xn--p1ai TLD - and others).
+    regex = r"^\s*(\d{1,3}\.){3}\d{1,3}\s+((?:[\w\-\.]+\.)+[a-zA-Z0-9\-]+)(.*)"
     result = re.search(regex, rule)
 
     if result:
@@ -1090,7 +1093,9 @@ def normalize_rule(rule, target_ip, keep_domain_comments):
     """
     # deny any potential IPv6 address here.
     if ":" not in rule:
-        regex = r"^\s*((?:\w+\.)+[a-zA-Z\.-]+)(.*)"
+        # WARNING:
+        #   [a-zA-Z0-9\-]+ is NOT an issue. (e.g., xn--p1ai TLD - and others).
+        regex = r"^\s*((?:[\w\-\.]+\.)+[a-zA-Z0-9\-]+)(.*)"
         result = re.search(regex, rule)
 
         if result:
git clone https://git.99rst.org/PROJECT