Add versions that only render the extensions
authorDennis van de Hoef <redacted>
Tue, 23 May 2023 18:03:43 +0000 (20:03 +0200)
committerSteven Black <redacted>
Mon, 12 Jun 2023 02:32:05 +0000 (22:32 -0400)
makeHosts.py
readme_template.md
testUpdateHostsFile.py
updateHostsFile.py
updateReadme.py

index e18a0b37ad39a07400285c28fa4aa37073b0824d..6c7b5a21cfb49b0416104e6f0f581cd302eddfbd 100644 (file)
@@ -52,6 +52,7 @@ def update_readme_file():
     if subprocess.call([sys.executable, "updateReadme.py"]):
         print_failure("Failed to update readme file")
 
+
 def recursively_loop_extensions(extension, extensions, current_extensions):
     """
     Helper function that recursively calls itself to prevent manually creating
@@ -59,6 +60,7 @@ def recursively_loop_extensions(extension, extensions, current_extensions):
 
     Will call update_hosts_file for all combinations of extensions
     """
+
     c_extensions = extensions.copy()
     c_current_extensions = current_extensions.copy()
     c_current_extensions.append(extension)
@@ -68,6 +70,9 @@ def recursively_loop_extensions(extension, extensions, current_extensions):
     params = ("-a", "-n", "-o", "alternates/"+name, "-e") + tuple(c_current_extensions)
     update_hosts_file(*params)
 
+    params = ("-a", "-n", "-s", "--nounifiedhosts", "-o", "alternates/"+name+"-only", "-e") + tuple(c_current_extensions)
+    update_hosts_file(*params)
+
     while len(c_extensions) > 0:
         recursively_loop_extensions(c_extensions.pop(0), c_extensions, c_current_extensions)
 
index 0fa19cd5c47dd77c44a8a419493bd331a9889bd2..f5c04b706e29279319ddd4e1bb56cd293ea02608 100644 (file)
@@ -20,7 +20,7 @@ sources can be found in the `hosts/data/` directory.
 [![last commit](https://img.shields.io/github/last-commit/StevenBlack/hosts.svg)](https://github.com/StevenBlack/hosts/commits/master)
 [![commit activity](https://img.shields.io/github/commit-activity/y/StevenBlack/hosts.svg)](https://github.com/StevenBlack/hosts/commits/master)
 
-# Unified hosts file @EXTENSIONS_HEADER@
+# @EXTENSIONS_HEADER@
 
 This repository consolidates several reputable `hosts` files, and merges them
 into a unified hosts file with duplicates removed. A variety of tailored hosts
@@ -41,7 +41,7 @@ files are provided.
 
 This repository offers
 [15 different host file variants](https://github.com/StevenBlack/hosts/tree/master/alternates),
-in addition to the base variant.
+in addition to the base variant, with and without the unified hosts included.
 
 The **Non GitHub mirror** is the link to use for some hosts file managers like
 [Hostsman for Windows](https://www.abelhadigital.com/hostsman/) that don't work
@@ -213,6 +213,9 @@ readmeData.json file used for generating readme.md files. This is useful if you
 are generating host files with additional whitelists or blacklists and want to
 keep your local checkout of this repo unmodified.
 
+`--nounifiedhosts`: `false` (default) or `true`, do not include the unified hosts
+file in the final hosts file. Usually used together with `--extensions`.
+
 `--compress`, or `-c`: `false` (default) or `true`, _Compress_ the hosts file
 ignoring non-necessary lines (empty lines and comments) and putting multiple
 domains in each line. Reducing the number of lines of the hosts file improves
index 182122be8234b3c1e4d8844581bfb2b442ef5d19..12fe695fb964e9814d7bbd4960bfd56ce6121291 100644 (file)
@@ -113,6 +113,7 @@ class TestGetDefaults(Base):
                 "keepdomaincomments": True,
                 "extensionspath": "foo" + self.sep + "extensions",
                 "extensions": [],
+                "nounifiedhosts": False,
                 "compress": False,
                 "minimise": False,
                 "outputsubfolder": "",
@@ -679,6 +680,7 @@ class TestUpdateSourcesData(Base):
             datapath=self.data_path,
             extensionspath=self.extensions_path,
             sourcedatafilename=self.source_data_filename,
+            nounifiedhosts=False,
         )
 
     def update_sources_data(self, sources_data, extensions):
@@ -990,7 +992,7 @@ class TestWriteOpeningHeader(BaseMockDir):
 
     def test_missing_keyword(self):
         kwargs = dict(
-            extensions="", outputsubfolder="", numberofrules=5, skipstatichosts=False
+            extensions="", outputsubfolder="", numberofrules=5, skipstatichosts=False, nounifiedhosts=False
         )
 
         for k in kwargs.keys():
@@ -1003,7 +1005,7 @@ class TestWriteOpeningHeader(BaseMockDir):
 
     def test_basic(self):
         kwargs = dict(
-            extensions="", outputsubfolder="", numberofrules=5, skipstatichosts=True
+            extensions="", outputsubfolder="", numberofrules=5, skipstatichosts=True, nounifiedhosts=False
         )
         write_opening_header(self.final_file, **kwargs)
 
@@ -1032,7 +1034,7 @@ class TestWriteOpeningHeader(BaseMockDir):
 
     def test_basic_include_static_hosts(self):
         kwargs = dict(
-            extensions="", outputsubfolder="", numberofrules=5, skipstatichosts=False
+            extensions="", outputsubfolder="", numberofrules=5, skipstatichosts=False, nounifiedhosts=False
         )
         with self.mock_property("platform.system") as obj:
             obj.return_value = "Windows"
@@ -1059,7 +1061,7 @@ class TestWriteOpeningHeader(BaseMockDir):
 
     def test_basic_include_static_hosts_linux(self):
         kwargs = dict(
-            extensions="", outputsubfolder="", numberofrules=5, skipstatichosts=False
+            extensions="", outputsubfolder="", numberofrules=5, skipstatichosts=False, nounifiedhosts=False
         )
         with self.mock_property("platform.system") as system:
             system.return_value = "Linux"
@@ -1096,6 +1098,7 @@ class TestWriteOpeningHeader(BaseMockDir):
             outputsubfolder="",
             numberofrules=5,
             skipstatichosts=True,
+            nounifiedhosts=False,
         )
         write_opening_header(self.final_file, **kwargs)
 
@@ -1123,6 +1126,41 @@ class TestWriteOpeningHeader(BaseMockDir):
         ):
             self.assertNotIn(expected, contents)
 
+    def test_no_unified_hosts(self):
+        kwargs = dict(
+            extensions=["epsilon", "gamma"],
+            outputsubfolder="",
+            numberofrules=5,
+            skipstatichosts=True,
+            nounifiedhosts=True,
+        )
+        write_opening_header(self.final_file, **kwargs)
+
+        contents = self.final_file.getvalue()
+        contents = contents.decode("UTF-8")
+
+        # Expected contents.
+        for expected in (
+            ", ".join(kwargs["extensions"]),
+            "# The unified hosts file was not used while generating this file.",
+            "# Extensions used to generate this file:",
+            "# This hosts file is a merged collection",
+            "# with a dash of crowd sourcing via GitHub",
+            "# Number of unique domains: {count}".format(count=kwargs["numberofrules"]),
+            "Fetch the latest version of this file:",
+            "Project home page: https://github.com/StevenBlack/hosts",
+        ):
+            self.assertIn(expected, contents)
+
+        # Expected non-contents.
+        for expected in (
+            "127.0.0.1 localhost",
+            "127.0.0.1 local",
+            "127.0.0.53",
+            "127.0.1.1",
+        ):
+            self.assertNotIn(expected, contents)
+
     def _check_preamble(self, check_copy):
         hosts_file = os.path.join(self.test_dir, "myhosts")
         hosts_file += ".example" if check_copy else ""
@@ -1131,7 +1169,7 @@ class TestWriteOpeningHeader(BaseMockDir):
             f.write("peter-piper-picked-a-pepper")
 
         kwargs = dict(
-            extensions="", outputsubfolder="", numberofrules=5, skipstatichosts=True
+            extensions="", outputsubfolder="", numberofrules=5, skipstatichosts=True, nounifiedhosts=False
         )
 
         with self.mock_property("updateHostsFile.BASEDIR_PATH"):
@@ -1180,7 +1218,7 @@ class TestUpdateReadmeData(BaseMockDir):
 
     def test_missing_keyword(self):
         kwargs = dict(
-            extensions="", outputsubfolder="", numberofrules="", sourcesdata=""
+            extensions="", outputsubfolder="", numberofrules="", sourcesdata="", nounifiedhosts=False
         )
 
         for k in kwargs.keys():
@@ -1196,7 +1234,7 @@ class TestUpdateReadmeData(BaseMockDir):
             json.dump({"foo": "bar"}, f)
 
         kwargs = dict(
-            extensions=None, outputsubfolder="foo", numberofrules=5, sourcesdata="hosts"
+            extensions=None, outputsubfolder="foo", numberofrules=5, sourcesdata="hosts", nounifiedhosts=False
         )
         update_readme_data(self.readme_file, **kwargs)
 
@@ -1206,7 +1244,7 @@ class TestUpdateReadmeData(BaseMockDir):
             sep = self.sep
 
         expected = {
-            "base": {"location": "foo" + sep, "sourcesdata": "hosts", "entries": 5},
+            "base": {"location": "foo" + sep, 'no_unified_hosts': False, "sourcesdata": "hosts", "entries": 5},
             "foo": "bar",
         }
 
@@ -1219,7 +1257,7 @@ class TestUpdateReadmeData(BaseMockDir):
             json.dump({"base": "soprano"}, f)
 
         kwargs = dict(
-            extensions=None, outputsubfolder="foo", numberofrules=5, sourcesdata="hosts"
+            extensions=None, outputsubfolder="foo", numberofrules=5, sourcesdata="hosts", nounifiedhosts=False
         )
         update_readme_data(self.readme_file, **kwargs)
 
@@ -1229,7 +1267,7 @@ class TestUpdateReadmeData(BaseMockDir):
             sep = self.sep
 
         expected = {
-            "base": {"location": "foo" + sep, "sourcesdata": "hosts", "entries": 5}
+            "base": {"location": "foo" + sep, 'no_unified_hosts': False, "sourcesdata": "hosts", "entries": 5},
         }
 
         with open(self.readme_file, "r") as f:
@@ -1245,6 +1283,33 @@ class TestUpdateReadmeData(BaseMockDir):
             outputsubfolder="foo",
             numberofrules=5,
             sourcesdata="hosts",
+            nounifiedhosts=False,
+        )
+        update_readme_data(self.readme_file, **kwargs)
+
+        if platform.system().lower() == "windows":
+            sep = "/"
+        else:
+            sep = self.sep
+
+        expected = {
+            "com-org": {"location": "foo" + sep, 'no_unified_hosts': False, "sourcesdata": "hosts", "entries": 5}
+        }
+
+        with open(self.readme_file, "r") as f:
+            actual = json.load(f)
+            self.assertEqual(actual, expected)
+
+    def test_set_no_unified_hosts(self):
+        with open(self.readme_file, "w") as f:
+            json.dump({}, f)
+
+        kwargs = dict(
+            extensions=["com", "org"],
+            outputsubfolder="foo",
+            numberofrules=5,
+            sourcesdata="hosts",
+            nounifiedhosts=True,
         )
         update_readme_data(self.readme_file, **kwargs)
 
@@ -1254,7 +1319,7 @@ class TestUpdateReadmeData(BaseMockDir):
             sep = self.sep
 
         expected = {
-            "com-org": {"location": "foo" + sep, "sourcesdata": "hosts", "entries": 5}
+            "com-org-only": {"location": "foo" + sep, 'no_unified_hosts': True, "sourcesdata": "hosts", "entries": 5}
         }
 
         with open(self.readme_file, "r") as f:
@@ -1424,52 +1489,53 @@ class TestFlushDnsCache(BaseStdout):
 class TestRemoveOldHostsFile(BaseMockDir):
     def setUp(self):
         super(TestRemoveOldHostsFile, self).setUp()
-        self.hosts_file = os.path.join(self.test_dir, "hosts")
+        self.hosts_file = "hosts"
+        self.full_hosts_path = os.path.join(self.test_dir, "hosts")
 
     def test_remove_hosts_file(self):
         old_dir_count = self.dir_count
 
-        remove_old_hosts_file(self.hosts_file, backup=False)
+        remove_old_hosts_file(self.test_dir, self.hosts_file, backup=False)
 
         new_dir_count = old_dir_count + 1
         self.assertEqual(self.dir_count, new_dir_count)
 
-        with open(self.hosts_file, "r") as f:
+        with open(self.full_hosts_path, "r") as f:
             contents = f.read()
             self.assertEqual(contents, "")
 
     def test_remove_hosts_file_exists(self):
-        with open(self.hosts_file, "w") as f:
+        with open(self.full_hosts_path, "w") as f:
             f.write("foo")
 
         old_dir_count = self.dir_count
 
-        remove_old_hosts_file(self.hosts_file, backup=False)
+        remove_old_hosts_file(self.test_dir, self.hosts_file, backup=False)
 
         new_dir_count = old_dir_count
         self.assertEqual(self.dir_count, new_dir_count)
 
-        with open(self.hosts_file, "r") as f:
+        with open(self.full_hosts_path, "r") as f:
             contents = f.read()
             self.assertEqual(contents, "")
 
     @mock.patch("time.strftime", return_value="new")
     def test_remove_hosts_file_backup(self, _):
-        with open(self.hosts_file, "w") as f:
+        with open(self.full_hosts_path, "w") as f:
             f.write("foo")
 
         old_dir_count = self.dir_count
 
-        remove_old_hosts_file(self.hosts_file, backup=True)
+        remove_old_hosts_file(self.test_dir, self.hosts_file, backup=True)
 
         new_dir_count = old_dir_count + 1
         self.assertEqual(self.dir_count, new_dir_count)
 
-        with open(self.hosts_file, "r") as f:
+        with open(self.full_hosts_path, "r") as f:
             contents = f.read()
             self.assertEqual(contents, "")
 
-        new_hosts_file = self.hosts_file + "-new"
+        new_hosts_file = self.full_hosts_path + "-new"
 
         with open(new_hosts_file, "r") as f:
             contents = f.read()
index 57a80eb5813e135c5975b812c8a31f012fc03944..fc6bf19155266fa1e920b9e79566e6a770507469 100755 (executable)
@@ -72,6 +72,7 @@ def get_defaults():
         "keepdomaincomments": True,
         "extensionspath": path_join_robust(BASEDIR_PATH, "extensions"),
         "extensions": [],
+        "nounifiedhosts": False,
         "compress": False,
         "minimise": False,
         "outputsubfolder": "",
@@ -124,6 +125,13 @@ def main():
         nargs="*",
         help="Host extensions to include in the final hosts file.",
     )
+    parser.add_argument(
+        "--nounifiedhosts",
+        dest="nounifiedhosts",
+        default=False,
+        action="store_true",
+        help="Do not include the unified hosts file in the final hosts file. Usually used together with `--extensions`.",
+    )
     parser.add_argument(
         "--ip",
         "-i",
@@ -248,6 +256,7 @@ def main():
     auto = settings["auto"]
     exclusion_regexes = settings["exclusionregexes"]
     source_data_filename = settings["sourcedatafilename"]
+    no_unified_hosts = settings["nounifiedhosts"]
 
     update_sources = prompt_for_update(freshen=settings["freshen"], update_auto=auto)
     if update_sources:
@@ -271,9 +280,12 @@ def main():
         extensions=extensions,
         extensionspath=extensions_path,
         sourcedatafilename=source_data_filename,
+        nounifiedhosts=no_unified_hosts,
     )
 
-    merge_file = create_initial_file()
+    merge_file = create_initial_file(
+        nounifiedhosts=no_unified_hosts,
+    )
     remove_old_hosts_file(settings["outputpath"], "hosts", settings["backup"])
     if settings["compress"]:
         final_file = open(path_join_robust(settings["outputpath"], "hosts"), "w+b")
@@ -298,6 +310,7 @@ def main():
         numberofrules=number_of_rules,
         outputsubfolder=output_subfolder,
         skipstatichosts=skip_static_hosts,
+        nounifiedhosts=no_unified_hosts,
     )
     final_file.close()
 
@@ -308,6 +321,7 @@ def main():
             numberofrules=number_of_rules,
             outputsubfolder=output_subfolder,
             sourcesdata=sources_data,
+            nounifiedhosts=no_unified_hosts,
         )
 
     print_success(
@@ -666,6 +680,7 @@ def update_sources_data(sources_data, **sources_params):
         2) extensions
         3) extensionspath
         4) sourcedatafilename
+        5) nounifiedhosts
 
     Returns
     -------
@@ -675,13 +690,14 @@ def update_sources_data(sources_data, **sources_params):
 
     source_data_filename = sources_params["sourcedatafilename"]
 
-    for source in sort_sources(
-        recursive_glob(sources_params["datapath"], source_data_filename)
-    ):
-        update_file = open(source, "r", encoding="UTF-8")
-        update_data = json.load(update_file)
-        sources_data.append(update_data)
-        update_file.close()
+    if not sources_params["nounifiedhosts"]:
+        for source in sort_sources(
+            recursive_glob(sources_params["datapath"], source_data_filename)
+        ):
+            update_file = open(source, "r", encoding="UTF-8")
+            update_data = json.load(update_file)
+            sources_data.append(update_data)
+            update_file.close()
 
     for source in sources_params["extensions"]:
         source_dir = path_join_robust(sources_params["extensionspath"], source)
@@ -776,23 +792,32 @@ def update_all_sources(source_data_filename, host_filename):
 
 
 # File Logic
-def create_initial_file():
+def create_initial_file(**initial_file_params):
     """
     Initialize the file in which we merge all host files for later pruning.
+
+    Parameters
+    ----------
+    header_params : kwargs
+        Dictionary providing additional parameters for populating the initial file
+        information. Currently, those fields are:
+
+        1) nounifiedhosts
     """
 
     merge_file = tempfile.NamedTemporaryFile()
 
-    # spin the sources for the base file
-    for source in sort_sources(
-        recursive_glob(settings["datapath"], settings["hostfilename"])
-    ):
+    if not initial_file_params["nounifiedhosts"]:
+        # spin the sources for the base file
+        for source in sort_sources(
+            recursive_glob(settings["datapath"], settings["hostfilename"])
+        ):
 
-        start = "# Start {}\n\n".format(os.path.basename(os.path.dirname(source)))
-        end = "\n# End {}\n\n".format(os.path.basename(os.path.dirname(source)))
+            start = "# Start {}\n\n".format(os.path.basename(os.path.dirname(source)))
+            end = "\n# End {}\n\n".format(os.path.basename(os.path.dirname(source)))
 
-        with open(source, "r", encoding="UTF-8") as curFile:
-            write_data(merge_file, start + curFile.read() + end)
+            with open(source, "r", encoding="UTF-8") as curFile:
+                write_data(merge_file, start + curFile.read() + end)
 
     # spin the sources for extensions to the base file
     for source in settings["extensions"]:
@@ -1113,6 +1138,7 @@ def write_opening_header(final_file, **header_params):
         2) numberofrules
         3) outputsubfolder
         4) skipstatichosts
+        5) nounifiedhosts
     """
 
     final_file.seek(0)  # Reset file pointer.
@@ -1120,22 +1146,41 @@ def write_opening_header(final_file, **header_params):
 
     final_file.seek(0)  # Write at the top.
 
+    no_unified_hosts = header_params["nounifiedhosts"]
+
     if header_params["extensions"]:
-        if len(header_params["extensions"]) > 1:
-            write_data(
-                final_file,
-                "# Title: StevenBlack/hosts with the {0} and {1} extensions\n#\n".format(
-                    ", ".join(header_params["extensions"][:-1]),
-                    header_params["extensions"][-1],
-                ),
-            )
+        if no_unified_hosts:
+            if len(header_params["extensions"]) > 1:
+                write_data(
+                    final_file,
+                    "# Title: StevenBlack/hosts extensions {0} and {1} \n#\n".format(
+                        ", ".join(header_params["extensions"][:-1]),
+                        header_params["extensions"][-1],
+                    ),
+                )
+            else:
+                write_data(
+                    final_file,
+                    "# Title: StevenBlack/hosts extension {0}\n#\n".format(
+                        ", ".join(header_params["extensions"])
+                    ),
+                )
         else:
-            write_data(
-                final_file,
-                "# Title: StevenBlack/hosts with the {0} extension\n#\n".format(
-                    ", ".join(header_params["extensions"])
-                ),
-            )
+            if len(header_params["extensions"]) > 1:
+                write_data(
+                    final_file,
+                    "# Title: StevenBlack/hosts with the {0} and {1} extensions\n#\n".format(
+                        ", ".join(header_params["extensions"][:-1]),
+                        header_params["extensions"][-1],
+                    ),
+                )
+            else:
+                write_data(
+                    final_file,
+                    "# Title: StevenBlack/hosts with the {0} extension\n#\n".format(
+                        ", ".join(header_params["extensions"])
+                    ),
+                )
     else:
         write_data(final_file, "# Title: StevenBlack/hosts\n#\n")
 
@@ -1151,12 +1196,21 @@ def write_opening_header(final_file, **header_params):
     )
 
     if header_params["extensions"]:
-        write_data(
-            final_file,
-            "# Extensions added to this file: "
-            + ", ".join(header_params["extensions"])
-            + "\n",
-        )
+        if header_params["nounifiedhosts"]:
+            write_data(
+                final_file,
+                "# The unified hosts file was not used while generating this file.\n"
+                "# Extensions used to generate this file: "
+                + ", ".join(header_params["extensions"])
+                + "\n",
+            )
+        else:
+            write_data(
+                final_file,
+                "# Extensions added to this file: "
+                + ", ".join(header_params["extensions"])
+                + "\n",
+            )
 
     write_data(
         final_file,
@@ -1234,17 +1288,22 @@ def update_readme_data(readme_file, **readme_updates):
         2) sourcesdata
         3) numberofrules
         4) outputsubfolder
+        5) nounifiedhosts
     """
 
     extensions_key = "base"
     extensions = readme_updates["extensions"]
+    no_unified_hosts = readme_updates["nounifiedhosts"]
 
     if extensions:
         extensions_key = "-".join(extensions)
+        if no_unified_hosts:
+            extensions_key = extensions_key + "-only"
 
     output_folder = readme_updates["outputsubfolder"]
     generation_data = {
         "location": path_join_robust(output_folder, ""),
+        "no_unified_hosts": no_unified_hosts,
         "entries": readme_updates["numberofrules"],
         "sourcesdata": readme_updates["sourcesdata"],
     }
index 1267b704d8b6cd2346ff1964a2f2b4f3d38c476a..4ab581ac32963c91b3bfa75f0e4f45a1c0c2b91b 100755 (executable)
@@ -32,7 +32,7 @@ def main():
     keys = list(data.keys())
     # Sort by the number of en-dashes in the key
     # and then by the key string itself.
-    keys.sort(key=lambda item: (item.count("-"), item))
+    keys.sort(key=lambda item: (item.replace("-only", "").count("-"), item.replace("-only", "")))
 
     toc_rows = ""
     for key in keys:
@@ -40,9 +40,14 @@ def main():
         if key == "base":
             data[key]["description"] = "Unified hosts = **(adware + malware)**"
         else:
-            data[key]["description"] = (
-                "Unified hosts **+ " + key.replace("-", " + ") + "**"
-            )
+            if data[key]["no_unified_hosts"]:
+                data[key]["description"] = (
+                    "**" + key.replace("-only", "").replace("-", " + ") + "**"
+                )
+            else:
+                data[key]["description"] = (
+                    "Unified hosts **+ " + key.replace("-", " + ") + "**"
+                )
 
         if "\\" in data[key]["location"]:
             data[key]["location"] = data[key]["location"].replace("\\", "/")
@@ -64,9 +69,12 @@ def main():
     )
     size_history_graph = "![Size history](https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts_file_size_history.png)"
     for key in keys:
-        extensions = key.replace("-", ", ")
+        extensions = key.replace("-only", "").replace("-", ", ")
         extensions_str = "* Extensions: **" + extensions + "**."
-        extensions_header = "with " + extensions + " extensions"
+        if data[key]["no_unified_hosts"]:
+            extensions_header = "Limited to the extensions: " + extensions
+        else:
+            extensions_header = "Unified hosts file with " + extensions + " extensions"
 
         source_rows = ""
         source_list = data[key]["sourcesdata"]
git clone https://git.99rst.org/PROJECT