From: Dennis van de Hoef Date: Tue, 23 May 2023 18:03:43 +0000 (+0200) Subject: Add versions that only render the extensions X-Git-Url: http://git.99rst.org/?a=commitdiff_plain;h=6d48930c6f4374508cc9b6466e65a003a12bfa91;p=stevenblack-hosts.git Add versions that only render the extensions --- diff --git a/makeHosts.py b/makeHosts.py index e18a0b37a..6c7b5a21c 100644 --- a/makeHosts.py +++ b/makeHosts.py @@ -52,6 +52,7 @@ def update_readme_file(): if subprocess.call([sys.executable, "updateReadme.py"]): print_failure("Failed to update readme file") + def recursively_loop_extensions(extension, extensions, current_extensions): """ Helper function that recursively calls itself to prevent manually creating @@ -59,6 +60,7 @@ def recursively_loop_extensions(extension, extensions, current_extensions): Will call update_hosts_file for all combinations of extensions """ + c_extensions = extensions.copy() c_current_extensions = current_extensions.copy() c_current_extensions.append(extension) @@ -68,6 +70,9 @@ def recursively_loop_extensions(extension, extensions, current_extensions): params = ("-a", "-n", "-o", "alternates/"+name, "-e") + tuple(c_current_extensions) update_hosts_file(*params) + params = ("-a", "-n", "-s", "--nounifiedhosts", "-o", "alternates/"+name+"-only", "-e") + tuple(c_current_extensions) + update_hosts_file(*params) + while len(c_extensions) > 0: recursively_loop_extensions(c_extensions.pop(0), c_extensions, c_current_extensions) diff --git a/readme_template.md b/readme_template.md index 0fa19cd5c..f5c04b706 100644 --- a/readme_template.md +++ b/readme_template.md @@ -20,7 +20,7 @@ sources can be found in the `hosts/data/` directory. [![last commit](https://img.shields.io/github/last-commit/StevenBlack/hosts.svg)](https://github.com/StevenBlack/hosts/commits/master) [![commit activity](https://img.shields.io/github/commit-activity/y/StevenBlack/hosts.svg)](https://github.com/StevenBlack/hosts/commits/master) -# Unified hosts file @EXTENSIONS_HEADER@ +# @EXTENSIONS_HEADER@ This repository consolidates several reputable `hosts` files, and merges them into a unified hosts file with duplicates removed. A variety of tailored hosts @@ -41,7 +41,7 @@ files are provided. This repository offers [15 different host file variants](https://github.com/StevenBlack/hosts/tree/master/alternates), -in addition to the base variant. +in addition to the base variant, with and without the unified hosts included. The **Non GitHub mirror** is the link to use for some hosts file managers like [Hostsman for Windows](https://www.abelhadigital.com/hostsman/) that don't work @@ -213,6 +213,9 @@ readmeData.json file used for generating readme.md files. This is useful if you are generating host files with additional whitelists or blacklists and want to keep your local checkout of this repo unmodified. +`--nounifiedhosts`: `false` (default) or `true`, do not include the unified hosts +file in the final hosts file. Usually used together with `--extensions`. + `--compress`, or `-c`: `false` (default) or `true`, _Compress_ the hosts file ignoring non-necessary lines (empty lines and comments) and putting multiple domains in each line. Reducing the number of lines of the hosts file improves diff --git a/testUpdateHostsFile.py b/testUpdateHostsFile.py index 182122be8..12fe695fb 100644 --- a/testUpdateHostsFile.py +++ b/testUpdateHostsFile.py @@ -113,6 +113,7 @@ class TestGetDefaults(Base): "keepdomaincomments": True, "extensionspath": "foo" + self.sep + "extensions", "extensions": [], + "nounifiedhosts": False, "compress": False, "minimise": False, "outputsubfolder": "", @@ -679,6 +680,7 @@ class TestUpdateSourcesData(Base): datapath=self.data_path, extensionspath=self.extensions_path, sourcedatafilename=self.source_data_filename, + nounifiedhosts=False, ) def update_sources_data(self, sources_data, extensions): @@ -990,7 +992,7 @@ class TestWriteOpeningHeader(BaseMockDir): def test_missing_keyword(self): kwargs = dict( - extensions="", outputsubfolder="", numberofrules=5, skipstatichosts=False + extensions="", outputsubfolder="", numberofrules=5, skipstatichosts=False, nounifiedhosts=False ) for k in kwargs.keys(): @@ -1003,7 +1005,7 @@ class TestWriteOpeningHeader(BaseMockDir): def test_basic(self): kwargs = dict( - extensions="", outputsubfolder="", numberofrules=5, skipstatichosts=True + extensions="", outputsubfolder="", numberofrules=5, skipstatichosts=True, nounifiedhosts=False ) write_opening_header(self.final_file, **kwargs) @@ -1032,7 +1034,7 @@ class TestWriteOpeningHeader(BaseMockDir): def test_basic_include_static_hosts(self): kwargs = dict( - extensions="", outputsubfolder="", numberofrules=5, skipstatichosts=False + extensions="", outputsubfolder="", numberofrules=5, skipstatichosts=False, nounifiedhosts=False ) with self.mock_property("platform.system") as obj: obj.return_value = "Windows" @@ -1059,7 +1061,7 @@ class TestWriteOpeningHeader(BaseMockDir): def test_basic_include_static_hosts_linux(self): kwargs = dict( - extensions="", outputsubfolder="", numberofrules=5, skipstatichosts=False + extensions="", outputsubfolder="", numberofrules=5, skipstatichosts=False, nounifiedhosts=False ) with self.mock_property("platform.system") as system: system.return_value = "Linux" @@ -1096,6 +1098,7 @@ class TestWriteOpeningHeader(BaseMockDir): outputsubfolder="", numberofrules=5, skipstatichosts=True, + nounifiedhosts=False, ) write_opening_header(self.final_file, **kwargs) @@ -1123,6 +1126,41 @@ class TestWriteOpeningHeader(BaseMockDir): ): self.assertNotIn(expected, contents) + def test_no_unified_hosts(self): + kwargs = dict( + extensions=["epsilon", "gamma"], + outputsubfolder="", + numberofrules=5, + skipstatichosts=True, + nounifiedhosts=True, + ) + write_opening_header(self.final_file, **kwargs) + + contents = self.final_file.getvalue() + contents = contents.decode("UTF-8") + + # Expected contents. + for expected in ( + ", ".join(kwargs["extensions"]), + "# The unified hosts file was not used while generating this file.", + "# Extensions used to generate this file:", + "# This hosts file is a merged collection", + "# with a dash of crowd sourcing via GitHub", + "# Number of unique domains: {count}".format(count=kwargs["numberofrules"]), + "Fetch the latest version of this file:", + "Project home page: https://github.com/StevenBlack/hosts", + ): + self.assertIn(expected, contents) + + # Expected non-contents. + for expected in ( + "127.0.0.1 localhost", + "127.0.0.1 local", + "127.0.0.53", + "127.0.1.1", + ): + self.assertNotIn(expected, contents) + def _check_preamble(self, check_copy): hosts_file = os.path.join(self.test_dir, "myhosts") hosts_file += ".example" if check_copy else "" @@ -1131,7 +1169,7 @@ class TestWriteOpeningHeader(BaseMockDir): f.write("peter-piper-picked-a-pepper") kwargs = dict( - extensions="", outputsubfolder="", numberofrules=5, skipstatichosts=True + extensions="", outputsubfolder="", numberofrules=5, skipstatichosts=True, nounifiedhosts=False ) with self.mock_property("updateHostsFile.BASEDIR_PATH"): @@ -1180,7 +1218,7 @@ class TestUpdateReadmeData(BaseMockDir): def test_missing_keyword(self): kwargs = dict( - extensions="", outputsubfolder="", numberofrules="", sourcesdata="" + extensions="", outputsubfolder="", numberofrules="", sourcesdata="", nounifiedhosts=False ) for k in kwargs.keys(): @@ -1196,7 +1234,7 @@ class TestUpdateReadmeData(BaseMockDir): json.dump({"foo": "bar"}, f) kwargs = dict( - extensions=None, outputsubfolder="foo", numberofrules=5, sourcesdata="hosts" + extensions=None, outputsubfolder="foo", numberofrules=5, sourcesdata="hosts", nounifiedhosts=False ) update_readme_data(self.readme_file, **kwargs) @@ -1206,7 +1244,7 @@ class TestUpdateReadmeData(BaseMockDir): sep = self.sep expected = { - "base": {"location": "foo" + sep, "sourcesdata": "hosts", "entries": 5}, + "base": {"location": "foo" + sep, 'no_unified_hosts': False, "sourcesdata": "hosts", "entries": 5}, "foo": "bar", } @@ -1219,7 +1257,7 @@ class TestUpdateReadmeData(BaseMockDir): json.dump({"base": "soprano"}, f) kwargs = dict( - extensions=None, outputsubfolder="foo", numberofrules=5, sourcesdata="hosts" + extensions=None, outputsubfolder="foo", numberofrules=5, sourcesdata="hosts", nounifiedhosts=False ) update_readme_data(self.readme_file, **kwargs) @@ -1229,7 +1267,7 @@ class TestUpdateReadmeData(BaseMockDir): sep = self.sep expected = { - "base": {"location": "foo" + sep, "sourcesdata": "hosts", "entries": 5} + "base": {"location": "foo" + sep, 'no_unified_hosts': False, "sourcesdata": "hosts", "entries": 5}, } with open(self.readme_file, "r") as f: @@ -1245,6 +1283,33 @@ class TestUpdateReadmeData(BaseMockDir): outputsubfolder="foo", numberofrules=5, sourcesdata="hosts", + nounifiedhosts=False, + ) + update_readme_data(self.readme_file, **kwargs) + + if platform.system().lower() == "windows": + sep = "/" + else: + sep = self.sep + + expected = { + "com-org": {"location": "foo" + sep, 'no_unified_hosts': False, "sourcesdata": "hosts", "entries": 5} + } + + with open(self.readme_file, "r") as f: + actual = json.load(f) + self.assertEqual(actual, expected) + + def test_set_no_unified_hosts(self): + with open(self.readme_file, "w") as f: + json.dump({}, f) + + kwargs = dict( + extensions=["com", "org"], + outputsubfolder="foo", + numberofrules=5, + sourcesdata="hosts", + nounifiedhosts=True, ) update_readme_data(self.readme_file, **kwargs) @@ -1254,7 +1319,7 @@ class TestUpdateReadmeData(BaseMockDir): sep = self.sep expected = { - "com-org": {"location": "foo" + sep, "sourcesdata": "hosts", "entries": 5} + "com-org-only": {"location": "foo" + sep, 'no_unified_hosts': True, "sourcesdata": "hosts", "entries": 5} } with open(self.readme_file, "r") as f: @@ -1424,52 +1489,53 @@ class TestFlushDnsCache(BaseStdout): class TestRemoveOldHostsFile(BaseMockDir): def setUp(self): super(TestRemoveOldHostsFile, self).setUp() - self.hosts_file = os.path.join(self.test_dir, "hosts") + self.hosts_file = "hosts" + self.full_hosts_path = os.path.join(self.test_dir, "hosts") def test_remove_hosts_file(self): old_dir_count = self.dir_count - remove_old_hosts_file(self.hosts_file, backup=False) + remove_old_hosts_file(self.test_dir, self.hosts_file, backup=False) new_dir_count = old_dir_count + 1 self.assertEqual(self.dir_count, new_dir_count) - with open(self.hosts_file, "r") as f: + with open(self.full_hosts_path, "r") as f: contents = f.read() self.assertEqual(contents, "") def test_remove_hosts_file_exists(self): - with open(self.hosts_file, "w") as f: + with open(self.full_hosts_path, "w") as f: f.write("foo") old_dir_count = self.dir_count - remove_old_hosts_file(self.hosts_file, backup=False) + remove_old_hosts_file(self.test_dir, self.hosts_file, backup=False) new_dir_count = old_dir_count self.assertEqual(self.dir_count, new_dir_count) - with open(self.hosts_file, "r") as f: + with open(self.full_hosts_path, "r") as f: contents = f.read() self.assertEqual(contents, "") @mock.patch("time.strftime", return_value="new") def test_remove_hosts_file_backup(self, _): - with open(self.hosts_file, "w") as f: + with open(self.full_hosts_path, "w") as f: f.write("foo") old_dir_count = self.dir_count - remove_old_hosts_file(self.hosts_file, backup=True) + remove_old_hosts_file(self.test_dir, self.hosts_file, backup=True) new_dir_count = old_dir_count + 1 self.assertEqual(self.dir_count, new_dir_count) - with open(self.hosts_file, "r") as f: + with open(self.full_hosts_path, "r") as f: contents = f.read() self.assertEqual(contents, "") - new_hosts_file = self.hosts_file + "-new" + new_hosts_file = self.full_hosts_path + "-new" with open(new_hosts_file, "r") as f: contents = f.read() diff --git a/updateHostsFile.py b/updateHostsFile.py index 57a80eb58..fc6bf1915 100755 --- a/updateHostsFile.py +++ b/updateHostsFile.py @@ -72,6 +72,7 @@ def get_defaults(): "keepdomaincomments": True, "extensionspath": path_join_robust(BASEDIR_PATH, "extensions"), "extensions": [], + "nounifiedhosts": False, "compress": False, "minimise": False, "outputsubfolder": "", @@ -124,6 +125,13 @@ def main(): nargs="*", help="Host extensions to include in the final hosts file.", ) + parser.add_argument( + "--nounifiedhosts", + dest="nounifiedhosts", + default=False, + action="store_true", + help="Do not include the unified hosts file in the final hosts file. Usually used together with `--extensions`.", + ) parser.add_argument( "--ip", "-i", @@ -248,6 +256,7 @@ def main(): auto = settings["auto"] exclusion_regexes = settings["exclusionregexes"] source_data_filename = settings["sourcedatafilename"] + no_unified_hosts = settings["nounifiedhosts"] update_sources = prompt_for_update(freshen=settings["freshen"], update_auto=auto) if update_sources: @@ -271,9 +280,12 @@ def main(): extensions=extensions, extensionspath=extensions_path, sourcedatafilename=source_data_filename, + nounifiedhosts=no_unified_hosts, ) - merge_file = create_initial_file() + merge_file = create_initial_file( + nounifiedhosts=no_unified_hosts, + ) remove_old_hosts_file(settings["outputpath"], "hosts", settings["backup"]) if settings["compress"]: final_file = open(path_join_robust(settings["outputpath"], "hosts"), "w+b") @@ -298,6 +310,7 @@ def main(): numberofrules=number_of_rules, outputsubfolder=output_subfolder, skipstatichosts=skip_static_hosts, + nounifiedhosts=no_unified_hosts, ) final_file.close() @@ -308,6 +321,7 @@ def main(): numberofrules=number_of_rules, outputsubfolder=output_subfolder, sourcesdata=sources_data, + nounifiedhosts=no_unified_hosts, ) print_success( @@ -666,6 +680,7 @@ def update_sources_data(sources_data, **sources_params): 2) extensions 3) extensionspath 4) sourcedatafilename + 5) nounifiedhosts Returns ------- @@ -675,13 +690,14 @@ def update_sources_data(sources_data, **sources_params): source_data_filename = sources_params["sourcedatafilename"] - for source in sort_sources( - recursive_glob(sources_params["datapath"], source_data_filename) - ): - update_file = open(source, "r", encoding="UTF-8") - update_data = json.load(update_file) - sources_data.append(update_data) - update_file.close() + if not sources_params["nounifiedhosts"]: + for source in sort_sources( + recursive_glob(sources_params["datapath"], source_data_filename) + ): + update_file = open(source, "r", encoding="UTF-8") + update_data = json.load(update_file) + sources_data.append(update_data) + update_file.close() for source in sources_params["extensions"]: source_dir = path_join_robust(sources_params["extensionspath"], source) @@ -776,23 +792,32 @@ def update_all_sources(source_data_filename, host_filename): # File Logic -def create_initial_file(): +def create_initial_file(**initial_file_params): """ Initialize the file in which we merge all host files for later pruning. + + Parameters + ---------- + header_params : kwargs + Dictionary providing additional parameters for populating the initial file + information. Currently, those fields are: + + 1) nounifiedhosts """ merge_file = tempfile.NamedTemporaryFile() - # spin the sources for the base file - for source in sort_sources( - recursive_glob(settings["datapath"], settings["hostfilename"]) - ): + if not initial_file_params["nounifiedhosts"]: + # spin the sources for the base file + for source in sort_sources( + recursive_glob(settings["datapath"], settings["hostfilename"]) + ): - start = "# Start {}\n\n".format(os.path.basename(os.path.dirname(source))) - end = "\n# End {}\n\n".format(os.path.basename(os.path.dirname(source))) + start = "# Start {}\n\n".format(os.path.basename(os.path.dirname(source))) + end = "\n# End {}\n\n".format(os.path.basename(os.path.dirname(source))) - with open(source, "r", encoding="UTF-8") as curFile: - write_data(merge_file, start + curFile.read() + end) + with open(source, "r", encoding="UTF-8") as curFile: + write_data(merge_file, start + curFile.read() + end) # spin the sources for extensions to the base file for source in settings["extensions"]: @@ -1113,6 +1138,7 @@ def write_opening_header(final_file, **header_params): 2) numberofrules 3) outputsubfolder 4) skipstatichosts + 5) nounifiedhosts """ final_file.seek(0) # Reset file pointer. @@ -1120,22 +1146,41 @@ def write_opening_header(final_file, **header_params): final_file.seek(0) # Write at the top. + no_unified_hosts = header_params["nounifiedhosts"] + if header_params["extensions"]: - if len(header_params["extensions"]) > 1: - write_data( - final_file, - "# Title: StevenBlack/hosts with the {0} and {1} extensions\n#\n".format( - ", ".join(header_params["extensions"][:-1]), - header_params["extensions"][-1], - ), - ) + if no_unified_hosts: + if len(header_params["extensions"]) > 1: + write_data( + final_file, + "# Title: StevenBlack/hosts extensions {0} and {1} \n#\n".format( + ", ".join(header_params["extensions"][:-1]), + header_params["extensions"][-1], + ), + ) + else: + write_data( + final_file, + "# Title: StevenBlack/hosts extension {0}\n#\n".format( + ", ".join(header_params["extensions"]) + ), + ) else: - write_data( - final_file, - "# Title: StevenBlack/hosts with the {0} extension\n#\n".format( - ", ".join(header_params["extensions"]) - ), - ) + if len(header_params["extensions"]) > 1: + write_data( + final_file, + "# Title: StevenBlack/hosts with the {0} and {1} extensions\n#\n".format( + ", ".join(header_params["extensions"][:-1]), + header_params["extensions"][-1], + ), + ) + else: + write_data( + final_file, + "# Title: StevenBlack/hosts with the {0} extension\n#\n".format( + ", ".join(header_params["extensions"]) + ), + ) else: write_data(final_file, "# Title: StevenBlack/hosts\n#\n") @@ -1151,12 +1196,21 @@ def write_opening_header(final_file, **header_params): ) if header_params["extensions"]: - write_data( - final_file, - "# Extensions added to this file: " - + ", ".join(header_params["extensions"]) - + "\n", - ) + if header_params["nounifiedhosts"]: + write_data( + final_file, + "# The unified hosts file was not used while generating this file.\n" + "# Extensions used to generate this file: " + + ", ".join(header_params["extensions"]) + + "\n", + ) + else: + write_data( + final_file, + "# Extensions added to this file: " + + ", ".join(header_params["extensions"]) + + "\n", + ) write_data( final_file, @@ -1234,17 +1288,22 @@ def update_readme_data(readme_file, **readme_updates): 2) sourcesdata 3) numberofrules 4) outputsubfolder + 5) nounifiedhosts """ extensions_key = "base" extensions = readme_updates["extensions"] + no_unified_hosts = readme_updates["nounifiedhosts"] if extensions: extensions_key = "-".join(extensions) + if no_unified_hosts: + extensions_key = extensions_key + "-only" output_folder = readme_updates["outputsubfolder"] generation_data = { "location": path_join_robust(output_folder, ""), + "no_unified_hosts": no_unified_hosts, "entries": readme_updates["numberofrules"], "sourcesdata": readme_updates["sourcesdata"], } diff --git a/updateReadme.py b/updateReadme.py index 1267b704d..4ab581ac3 100755 --- a/updateReadme.py +++ b/updateReadme.py @@ -32,7 +32,7 @@ def main(): keys = list(data.keys()) # Sort by the number of en-dashes in the key # and then by the key string itself. - keys.sort(key=lambda item: (item.count("-"), item)) + keys.sort(key=lambda item: (item.replace("-only", "").count("-"), item.replace("-only", ""))) toc_rows = "" for key in keys: @@ -40,9 +40,14 @@ def main(): if key == "base": data[key]["description"] = "Unified hosts = **(adware + malware)**" else: - data[key]["description"] = ( - "Unified hosts **+ " + key.replace("-", " + ") + "**" - ) + if data[key]["no_unified_hosts"]: + data[key]["description"] = ( + "**" + key.replace("-only", "").replace("-", " + ") + "**" + ) + else: + data[key]["description"] = ( + "Unified hosts **+ " + key.replace("-", " + ") + "**" + ) if "\\" in data[key]["location"]: data[key]["location"] = data[key]["location"].replace("\\", "/") @@ -64,9 +69,12 @@ def main(): ) size_history_graph = "![Size history](https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts_file_size_history.png)" for key in keys: - extensions = key.replace("-", ", ") + extensions = key.replace("-only", "").replace("-", ", ") extensions_str = "* Extensions: **" + extensions + "**." - extensions_header = "with " + extensions + " extensions" + if data[key]["no_unified_hosts"]: + extensions_header = "Limited to the extensions: " + extensions + else: + extensions_header = "Unified hosts file with " + extensions + " extensions" source_rows = "" source_list = data[key]["sourcesdata"]