From: Stefano Date: Sat, 30 Dec 2017 16:12:04 +0000 (+0100) Subject: Added an option to compress the created host file. X-Git-Url: http://git.99rst.org/?a=commitdiff_plain;h=ce8d3abf7e1819f81a8a037183c7fa5d7c7bd854;p=stevenblack-hosts.git Added an option to compress the created host file. In particular, the compression option removes non-necessary lines (empty lines and comments) and puts multiple domains in each line. This option should solve the issue #411 regarding the DNS client service of Windows. --- diff --git a/testUpdateHostsFile.py b/testUpdateHostsFile.py index 836e949c4..c5564c4ed 100644 --- a/testUpdateHostsFile.py +++ b/testUpdateHostsFile.py @@ -100,6 +100,7 @@ class TestGetDefaults(Base): "keepdomaincomments": False, "extensionspath": "foo" + self.sep + "extensions", "extensions": [], + "compress": False, "outputsubfolder": "", "hostfilename": "hosts", "targetip": "0.0.0.0", @@ -249,7 +250,7 @@ class TestPromptForUpdate(BaseStdout, BaseMockDir): def tearDown(self): BaseStdout.tearDown(self) - BaseStdout.tearDown(self) + # BaseStdout.tearDown(self) class TestPromptForExclusions(BaseStdout): diff --git a/updateHostsFile.py b/updateHostsFile.py index 9b6302ec9..3264d3e74 100644 --- a/updateHostsFile.py +++ b/updateHostsFile.py @@ -62,6 +62,7 @@ def get_defaults(): "keepdomaincomments": False, "extensionspath": path_join_robust(BASEDIR_PATH, "extensions"), "extensions": [], + "compress": False, "outputsubfolder": "", "hostfilename": "hosts", "targetip": "0.0.0.0", @@ -117,6 +118,13 @@ def main(): default=False, action="store_true", help="Attempt to flush DNS cache " "after replacing the hosts file.") + parser.add_argument("--compress", "-c", dest="compress", + default=False, action="store_true", + help="Compress the hosts file " + "ignoring non-necessary lines " + "(empty lines and comments)." + "Improve the performances " + "under Windows.") global settings @@ -170,7 +178,15 @@ def main(): merge_file = create_initial_file() remove_old_hosts_file(settings["backup"]) - final_file = remove_dups_and_excl(merge_file, exclusion_regexes) + if settings["compress"]: + # Another mode is required to read and write the file in Python 3 + final_file = open(path_join_robust(settings["outputpath"], "hosts"), + "w+b" if PY3 else "w+") + compressed_file = tempfile.NamedTemporaryFile() + remove_dups_and_excl(merge_file, exclusion_regexes, compressed_file) + compress_file(compressed_file, settings["targetip"], final_file) + else: + final_file = remove_dups_and_excl(merge_file, exclusion_regexes) number_of_rules = settings["numberofrules"] output_subfolder = settings["outputsubfolder"] @@ -630,7 +646,47 @@ def create_initial_file(): return merge_file -def remove_dups_and_excl(merge_file, exclusion_regexes): +def compress_file(input_file, target_ip, output_file): + """ + Reduce the file dimension removing non-necessary lines (empty lines and comments) and putting multiple domains in each line. + Reducing the number of lines of the file, the parsing under Microsoft Windows is much faster. + + Parameters + ---------- + input_file : file + The file object that contains the hostnames that we are reducing. + target_ip : str + The target IP address. + output_file : file + The file object that will contain the reduced hostnames. + """ + + input_file.seek(0) # reset file pointer + write_data(output_file, '\n') + + lines = [target_ip] + lines_index = 0 + for line in input_file.readlines(): + line = line.decode("UTF-8") + if line.startswith('#') or line.startswith('\n'): + continue + + if line.startswith(target_ip): + l = len(lines[lines_index]) + if l < 128 and (l + len(line[7:])) < 192: + lines[lines_index] += line[7:-1] + else: + lines[lines_index] += '\n' + lines.append(line[:-1]) + lines_index += 1 + + for line in lines: + write_data(output_file, line) + + input_file.close() + + +def remove_dups_and_excl(merge_file, exclusion_regexes, output_file=None): """ Remove duplicates and remove hosts that we are excluding. @@ -643,6 +699,8 @@ def remove_dups_and_excl(merge_file, exclusion_regexes): The file object that contains the hostnames that we are pruning. exclusion_regexes : list The list of regex patterns used to exclude domains. + output_file : file + The file object in which the result is written. If None, the file 'settings["outputpath"]' will be created. """ number_of_rules = settings["numberofrules"] @@ -656,9 +714,12 @@ def remove_dups_and_excl(merge_file, exclusion_regexes): if not os.path.exists(settings["outputpath"]): os.makedirs(settings["outputpath"]) - # Another mode is required to read and write the file in Python 3 - final_file = open(path_join_robust(settings["outputpath"], "hosts"), - "w+b" if PY3 else "w+") + if output_file is None: + # Another mode is required to read and write the file in Python 3 + final_file = open(path_join_robust(settings["outputpath"], "hosts"), + "w+b" if PY3 else "w+") + else: + final_file = output_file merge_file.seek(0) # reset file pointer hostnames = {"localhost", "localhost.localdomain", @@ -707,7 +768,8 @@ def remove_dups_and_excl(merge_file, exclusion_regexes): settings["numberofrules"] = number_of_rules merge_file.close() - return final_file + if output_file is None: + return final_file def normalize_rule(rule, target_ip, keep_domain_comments):