Unification of the sorting of the sources.
authorfunilrys <redacted>
Tue, 21 Apr 2020 20:37:37 +0000 (22:37 +0200)
committerfunilrys <redacted>
Tue, 21 Apr 2020 20:54:25 +0000 (22:54 +0200)
Before this patch, there was no proper sorting and the sources.

As @XhmikosR mentioned in StevenBlack/hosts#1166, without this patch,
the output is totally different in Windows. But let's be honest, if it
is like that under Windows, chances are that the same behavior happens
across different OSes or machines around the globe.

Another reason behind this patch is that - desperate the fact that we
certainly trust @StevenBlack - the integrity of the generated files
could not be guarenteed because of the sorting which may be completely
different from an OS to another and a machine to another.

For those reasons, this patch introduces a unification of the sorting of
all sources.

The idea behind this patch is to have @StevenBlack's ad-hoc hosts
file always on top (1st) and the rest of the sources sorted
alphabetically based on the name of the folder inside the `data`
or `extensions` directory.

This will ensure that we get the same result everywhere.

Concretely speaking, I just added the function (`sort_sources`) which
sorts a given list of sources files. And later on, call the new function
everywhere it was necessary. Tests of the newly introduced function
are also included.

Contributors:
  * @ScriptTiger
  * @XhmikosR

Notes:
  * This patch fix (completely ?) ScriptTiger/hosts#1
  * This patch fix https://github.com/StevenBlack/hosts/issues/1166#issuecomment-590511086

testUpdateHostsFile.py
updateHostsFile.py

index 0bb92d8a661ec6b1c258954b60bfef8f469f3c5e..bfbf06461d473d58be40d7d00d54a6b9eb851eff 100644 (file)
@@ -41,6 +41,7 @@ from updateHostsFile import (
     query_yes_no,
     recursive_glob,
     remove_old_hosts_file,
+    sort_sources,
     strip_rule,
     supports_color,
     update_all_sources,
@@ -131,6 +132,80 @@ class TestGetDefaults(Base):
 # End Project Settings
 
 
+class TestSortSources(Base):
+    def test_sort_sources_simple(self):
+        given = [
+            "sbc.io",
+            "example.com",
+            "github.com",
+        ]
+
+        expected = ["example.com", "github.com", "sbc.io"]
+
+        actual = sort_sources(given)
+
+        self.assertEqual(actual, expected)
+
+    def test_live_data(self):
+        given = [
+            "data/KADhosts/update.json",
+            "data/someonewhocares.org/update.json",
+            "data/StevenBlack/update.json",
+            "data/adaway.org/update.json",
+            "data/URLHaus/update.json",
+            "data/UncheckyAds/update.json",
+            "data/add.2o7Net/update.json",
+            "data/mvps.org/update.json",
+            "data/add.Spam/update.json",
+            "data/add.Dead/update.json",
+            "data/malwaredomainlist.com/update.json",
+            "data/Badd-Boyz-Hosts/update.json",
+            "data/hostsVN/update.json",
+            "data/yoyo.org/update.json",
+            "data/add.Risk/update.json",
+            "data/tiuxo/update.json",
+            "extensions/gambling/update.json",
+            "extensions/porn/clefspeare13/update.json",
+            "extensions/porn/sinfonietta-snuff/update.json",
+            "extensions/porn/tiuxo/update.json",
+            "extensions/porn/sinfonietta/update.json",
+            "extensions/fakenews/update.json",
+            "extensions/social/tiuxo/update.json",
+            "extensions/social/sinfonietta/update.json",
+        ]
+
+        expected = [
+            "data/StevenBlack/update.json",
+            "data/adaway.org/update.json",
+            "data/add.2o7Net/update.json",
+            "data/add.Dead/update.json",
+            "data/add.Risk/update.json",
+            "data/add.Spam/update.json",
+            "data/Badd-Boyz-Hosts/update.json",
+            "data/hostsVN/update.json",
+            "data/KADhosts/update.json",
+            "data/malwaredomainlist.com/update.json",
+            "data/mvps.org/update.json",
+            "data/someonewhocares.org/update.json",
+            "data/tiuxo/update.json",
+            "data/UncheckyAds/update.json",
+            "data/URLHaus/update.json",
+            "data/yoyo.org/update.json",
+            "extensions/fakenews/update.json",
+            "extensions/gambling/update.json",
+            "extensions/porn/clefspeare13/update.json",
+            "extensions/porn/sinfonietta/update.json",
+            "extensions/porn/sinfonietta-snuff/update.json",
+            "extensions/porn/tiuxo/update.json",
+            "extensions/social/sinfonietta/update.json",
+            "extensions/social/tiuxo/update.json",
+        ]
+
+        actual = sort_sources(given)
+
+        self.assertEqual(actual, expected)
+
+
 # Prompt the User
 class TestPromptForUpdate(BaseStdout, BaseMockDir):
     def setUp(self):
index d7ba9ef26f460f07741e2822d438383f2592b13e..6c417436b296bf85cb53990f1a00617603df9131 100644 (file)
@@ -474,6 +474,34 @@ def prompt_for_move(final_file, **move_params):
 # End Prompt the User
 
 
+def sort_sources(sources):
+    """
+    Sorts the sources.
+    The idea is that all Steven Black's list, file or entries
+    get on top and the rest sorted alphabetically.
+
+    Parameters
+    ----------
+    sources: list
+        The sources to sort.
+    """
+
+    result = sorted(
+        sources.copy(),
+        key=lambda x: x.lower().replace("-", "").replace("_", "").replace(" ", ""),
+    )
+
+    # Steven Black's repositories/files/lists should be on top!
+    steven_black_positions = [
+        x for x, y in enumerate(result) if "stevenblack" in y.lower()
+    ]
+
+    for index in steven_black_positions:
+        result.insert(0, result.pop(index))
+
+    return result
+
+
 # Exclusion logic
 def display_exclusion_options(common_exclusions, exclusion_pattern, exclusion_regexes):
     """
@@ -641,7 +669,9 @@ def update_sources_data(sources_data, **sources_params):
 
     source_data_filename = sources_params["sourcedatafilename"]
 
-    for source in recursive_glob(sources_params["datapath"], source_data_filename):
+    for source in sort_sources(
+        recursive_glob(sources_params["datapath"], source_data_filename)
+    ):
         update_file = open(source, "r", encoding="UTF-8")
         update_data = json.load(update_file)
         sources_data.append(update_data)
@@ -649,7 +679,9 @@ def update_sources_data(sources_data, **sources_params):
 
     for source in sources_params["extensions"]:
         source_dir = path_join_robust(sources_params["extensionspath"], source)
-        for update_file_path in recursive_glob(source_dir, source_data_filename):
+        for update_file_path in sort_sources(
+            recursive_glob(source_dir, source_data_filename)
+        ):
             update_file = open(update_file_path, "r")
             update_data = json.load(update_file)
 
@@ -695,7 +727,7 @@ def update_all_sources(source_data_filename, host_filename):
     # The transforms we support
     transform_methods = {"jsonarray": jsonarray}
 
-    all_sources = recursive_glob("*", source_data_filename)
+    all_sources = sort_sources(recursive_glob("*", source_data_filename))
 
     for source in all_sources:
         update_file = open(source, "r", encoding="UTF-8")
git clone https://git.99rst.org/PROJECT