From: funilrys Date: Tue, 21 Apr 2020 20:37:37 +0000 (+0200) Subject: Unification of the sorting of the sources. X-Git-Url: http://git.99rst.org/?a=commitdiff_plain;h=665dc981919f640c77bb3292961589a1f40515dd;p=stevenblack-hosts.git Unification of the sorting of the sources. Before this patch, there was no proper sorting and the sources. As @XhmikosR mentioned in StevenBlack/hosts#1166, without this patch, the output is totally different in Windows. But let's be honest, if it is like that under Windows, chances are that the same behavior happens across different OSes or machines around the globe. Another reason behind this patch is that - desperate the fact that we certainly trust @StevenBlack - the integrity of the generated files could not be guarenteed because of the sorting which may be completely different from an OS to another and a machine to another. For those reasons, this patch introduces a unification of the sorting of all sources. The idea behind this patch is to have @StevenBlack's ad-hoc hosts file always on top (1st) and the rest of the sources sorted alphabetically based on the name of the folder inside the `data` or `extensions` directory. This will ensure that we get the same result everywhere. Concretely speaking, I just added the function (`sort_sources`) which sorts a given list of sources files. And later on, call the new function everywhere it was necessary. Tests of the newly introduced function are also included. Contributors: * @ScriptTiger * @XhmikosR Notes: * This patch fix (completely ?) ScriptTiger/hosts#1 * This patch fix https://github.com/StevenBlack/hosts/issues/1166#issuecomment-590511086 --- diff --git a/testUpdateHostsFile.py b/testUpdateHostsFile.py index 0bb92d8a6..bfbf06461 100644 --- a/testUpdateHostsFile.py +++ b/testUpdateHostsFile.py @@ -41,6 +41,7 @@ from updateHostsFile import ( query_yes_no, recursive_glob, remove_old_hosts_file, + sort_sources, strip_rule, supports_color, update_all_sources, @@ -131,6 +132,80 @@ class TestGetDefaults(Base): # End Project Settings +class TestSortSources(Base): + def test_sort_sources_simple(self): + given = [ + "sbc.io", + "example.com", + "github.com", + ] + + expected = ["example.com", "github.com", "sbc.io"] + + actual = sort_sources(given) + + self.assertEqual(actual, expected) + + def test_live_data(self): + given = [ + "data/KADhosts/update.json", + "data/someonewhocares.org/update.json", + "data/StevenBlack/update.json", + "data/adaway.org/update.json", + "data/URLHaus/update.json", + "data/UncheckyAds/update.json", + "data/add.2o7Net/update.json", + "data/mvps.org/update.json", + "data/add.Spam/update.json", + "data/add.Dead/update.json", + "data/malwaredomainlist.com/update.json", + "data/Badd-Boyz-Hosts/update.json", + "data/hostsVN/update.json", + "data/yoyo.org/update.json", + "data/add.Risk/update.json", + "data/tiuxo/update.json", + "extensions/gambling/update.json", + "extensions/porn/clefspeare13/update.json", + "extensions/porn/sinfonietta-snuff/update.json", + "extensions/porn/tiuxo/update.json", + "extensions/porn/sinfonietta/update.json", + "extensions/fakenews/update.json", + "extensions/social/tiuxo/update.json", + "extensions/social/sinfonietta/update.json", + ] + + expected = [ + "data/StevenBlack/update.json", + "data/adaway.org/update.json", + "data/add.2o7Net/update.json", + "data/add.Dead/update.json", + "data/add.Risk/update.json", + "data/add.Spam/update.json", + "data/Badd-Boyz-Hosts/update.json", + "data/hostsVN/update.json", + "data/KADhosts/update.json", + "data/malwaredomainlist.com/update.json", + "data/mvps.org/update.json", + "data/someonewhocares.org/update.json", + "data/tiuxo/update.json", + "data/UncheckyAds/update.json", + "data/URLHaus/update.json", + "data/yoyo.org/update.json", + "extensions/fakenews/update.json", + "extensions/gambling/update.json", + "extensions/porn/clefspeare13/update.json", + "extensions/porn/sinfonietta/update.json", + "extensions/porn/sinfonietta-snuff/update.json", + "extensions/porn/tiuxo/update.json", + "extensions/social/sinfonietta/update.json", + "extensions/social/tiuxo/update.json", + ] + + actual = sort_sources(given) + + self.assertEqual(actual, expected) + + # Prompt the User class TestPromptForUpdate(BaseStdout, BaseMockDir): def setUp(self): diff --git a/updateHostsFile.py b/updateHostsFile.py index d7ba9ef26..6c417436b 100644 --- a/updateHostsFile.py +++ b/updateHostsFile.py @@ -474,6 +474,34 @@ def prompt_for_move(final_file, **move_params): # End Prompt the User +def sort_sources(sources): + """ + Sorts the sources. + The idea is that all Steven Black's list, file or entries + get on top and the rest sorted alphabetically. + + Parameters + ---------- + sources: list + The sources to sort. + """ + + result = sorted( + sources.copy(), + key=lambda x: x.lower().replace("-", "").replace("_", "").replace(" ", ""), + ) + + # Steven Black's repositories/files/lists should be on top! + steven_black_positions = [ + x for x, y in enumerate(result) if "stevenblack" in y.lower() + ] + + for index in steven_black_positions: + result.insert(0, result.pop(index)) + + return result + + # Exclusion logic def display_exclusion_options(common_exclusions, exclusion_pattern, exclusion_regexes): """ @@ -641,7 +669,9 @@ def update_sources_data(sources_data, **sources_params): source_data_filename = sources_params["sourcedatafilename"] - for source in recursive_glob(sources_params["datapath"], source_data_filename): + for source in sort_sources( + recursive_glob(sources_params["datapath"], source_data_filename) + ): update_file = open(source, "r", encoding="UTF-8") update_data = json.load(update_file) sources_data.append(update_data) @@ -649,7 +679,9 @@ def update_sources_data(sources_data, **sources_params): for source in sources_params["extensions"]: source_dir = path_join_robust(sources_params["extensionspath"], source) - for update_file_path in recursive_glob(source_dir, source_data_filename): + for update_file_path in sort_sources( + recursive_glob(source_dir, source_data_filename) + ): update_file = open(update_file_path, "r") update_data = json.load(update_file) @@ -695,7 +727,7 @@ def update_all_sources(source_data_filename, host_filename): # The transforms we support transform_methods = {"jsonarray": jsonarray} - all_sources = recursive_glob("*", source_data_filename) + all_sources = sort_sources(recursive_glob("*", source_data_filename)) for source in all_sources: update_file = open(source, "r", encoding="UTF-8")