From: funilrys Date: Thu, 10 Aug 2023 14:33:53 +0000 (+0200) Subject: Add support for exotic TLDs. X-Git-Url: http://git.99rst.org/?a=commitdiff_plain;h=560615dd7fcb0a73f9ac05c033e62ada6b31b8ad;p=stevenblack-hosts.git Add support for exotic TLDs. Indeed, before this patch, we were not supporting TLD which contains digits and dashes (-) when "puny-encoded". --- diff --git a/testUpdateHostsFile.py b/testUpdateHostsFile.py index 59bae8e61..0c971478a 100644 --- a/testUpdateHostsFile.py +++ b/testUpdateHostsFile.py @@ -840,9 +840,12 @@ class TestNormalizeRule(BaseStdout): # Note: "Bare"- Domains are accepted. IP are excluded. for rule in [ "128.0.0.1", + "::1", + "0.0.0.0 128.0.0.2", "0.0.0 google", "0.1.2.3.4 foo/bar", "0.0.0.0 https", + "0.0.0.0 https..", ]: self.assertEqual(normalize_rule(rule, **kwargs), (None, None)) @@ -905,7 +908,7 @@ class TestNormalizeRule(BaseStdout): sys.stdout = StringIO() def test_no_comment_raw(self): - for rule in ("twitter.com", "google.com", "foo.bar.edu", "www.example-foo.bar.edu", "www.example-3045.foobar.com"): + for rule in ("twitter.com", "google.com", "foo.bar.edu", "www.example-foo.bar.edu", "www.example-3045.foobar.com", "www.example.xn--p1ai"): expected = (rule, "0.0.0.0 " + rule + "\n") actual = normalize_rule( diff --git a/updateHostsFile.py b/updateHostsFile.py index 6a96878e9..511565118 100755 --- a/updateHostsFile.py +++ b/updateHostsFile.py @@ -1061,7 +1061,10 @@ def normalize_rule(rule, target_ip, keep_domain_comments): """ first try: IP followed by domain """ - regex = r"^\s*(\d{1,3}\.){3}\d{1,3}\s+((?:\w+\.)+[a-zA-Z\.-]+)(.*)" + + # WARNING: + # [a-zA-Z0-9\-]+ is NOT an issue. (e.g., xn--p1ai TLD - and others). + regex = r"^\s*(\d{1,3}\.){3}\d{1,3}\s+((?:[\w\-\.]+\.)+[a-zA-Z0-9\-]+)(.*)" result = re.search(regex, rule) if result: @@ -1090,7 +1093,9 @@ def normalize_rule(rule, target_ip, keep_domain_comments): """ # deny any potential IPv6 address here. if ":" not in rule: - regex = r"^\s*((?:\w+\.)+[a-zA-Z\.-]+)(.*)" + # WARNING: + # [a-zA-Z0-9\-]+ is NOT an issue. (e.g., xn--p1ai TLD - and others). + regex = r"^\s*((?:[\w\-\.]+\.)+[a-zA-Z0-9\-]+)(.*)" result = re.search(regex, rule) if result: