Add support for exotic TLDs.

author funilrys <redacted>

Thu, 10 Aug 2023 14:33:53 +0000 (16:33 +0200)

committer funilrys <redacted>

Thu, 10 Aug 2023 14:35:38 +0000 (16:35 +0200)
author funilrys <redacted>
Thu, 10 Aug 2023 14:33:53 +0000 (16:33 +0200)
committer funilrys <redacted>
Thu, 10 Aug 2023 14:35:38 +0000 (16:35 +0200)
diff --git a/testUpdateHostsFile.py b/testUpdateHostsFile.py

index 59bae8e61a19d0273d9c21f78f73de962fd61e5e..0c971478a1e6803e9c0de63f3aac37739b08400e 100644 (file)
--- a/testUpdateHostsFile.py
+++ b/testUpdateHostsFile.py
@@ -840,9 +840,12 @@ class TestNormalizeRule(BaseStdout):
          # Note: "Bare"- Domains are accepted. IP are excluded.
          for rule in [
              "128.0.0.1",
+            "::1",
+            "0.0.0.0 128.0.0.2",
              "0.0.0 google",
              "0.1.2.3.4 foo/bar",
              "0.0.0.0 https",
+            "0.0.0.0 https..",
          ]:
              self.assertEqual(normalize_rule(rule, **kwargs), (None, None))
  
@@ -905,7 +908,7 @@ class TestNormalizeRule(BaseStdout):
              sys.stdout = StringIO()
  
      def test_no_comment_raw(self):
-        for rule in ("twitter.com", "google.com", "foo.bar.edu", "www.example-foo.bar.edu", "www.example-3045.foobar.com"):
+        for rule in ("twitter.com", "google.com", "foo.bar.edu", "www.example-foo.bar.edu", "www.example-3045.foobar.com", "www.example.xn--p1ai"):
              expected = (rule, "0.0.0.0 " + rule + "\n")
  
              actual = normalize_rule(
diff --git a/updateHostsFile.py b/updateHostsFile.py

index 6a96878e92aaa97b49f44e8b8ef3e7e3d89529f7..511565118ad49f07feb3438fef38f9191f5acfb8 100755 (executable)
--- a/updateHostsFile.py
+++ b/updateHostsFile.py
@@ -1061,7 +1061,10 @@ def normalize_rule(rule, target_ip, keep_domain_comments):
      """
      first try: IP followed by domain
      """
-    regex = r"^\s*(\d{1,3}\.){3}\d{1,3}\s+((?:\w+\.)+[a-zA-Z\.-]+)(.*)"
+
+    # WARNING:
+    #   [a-zA-Z0-9\-]+ is NOT an issue. (e.g., xn--p1ai TLD - and others).
+    regex = r"^\s*(\d{1,3}\.){3}\d{1,3}\s+((?:[\w\-\.]+\.)+[a-zA-Z0-9\-]+)(.*)"
      result = re.search(regex, rule)
  
      if result:
@@ -1090,7 +1093,9 @@ def normalize_rule(rule, target_ip, keep_domain_comments):
      """
      # deny any potential IPv6 address here.
      if ":" not in rule:
-        regex = r"^\s*((?:\w+\.)+[a-zA-Z\.-]+)(.*)"
+        # WARNING:
+        #   [a-zA-Z0-9\-]+ is NOT an issue. (e.g., xn--p1ai TLD - and others).
+        regex = r"^\s*((?:[\w\-\.]+\.)+[a-zA-Z0-9\-]+)(.*)"
          result = re.search(regex, rule)
  
          if result:
author	funilrys <redacted>
	Thu, 10 Aug 2023 14:33:53 +0000 (16:33 +0200)
committer	funilrys <redacted>
	Thu, 10 Aug 2023 14:35:38 +0000 (16:35 +0200)
testUpdateHostsFile.py		patch \| blob \| history
updateHostsFile.py		patch \| blob \| history