Added an option to compress the created host file.
authorStefano <redacted>
Sat, 30 Dec 2017 16:12:04 +0000 (17:12 +0100)
committerStefano <redacted>
Sat, 30 Dec 2017 16:12:04 +0000 (17:12 +0100)
In particular, the compression option removes non-necessary lines (empty lines and comments) and puts multiple domains in each line.
This option should solve the issue #411 regarding the DNS client service of Windows.

testUpdateHostsFile.py
updateHostsFile.py

index 836e949c43858e5ec9a02665fedfdf59e8f86144..c5564c4edce9ab14c702ce890d4840ef4682b3b1 100644 (file)
@@ -100,6 +100,7 @@ class TestGetDefaults(Base):
                         "keepdomaincomments": False,
                         "extensionspath": "foo" + self.sep + "extensions",
                         "extensions": [],
+                        "compress": False,
                         "outputsubfolder": "",
                         "hostfilename": "hosts",
                         "targetip": "0.0.0.0",
@@ -249,7 +250,7 @@ class TestPromptForUpdate(BaseStdout, BaseMockDir):
 
     def tearDown(self):
         BaseStdout.tearDown(self)
-        BaseStdout.tearDown(self)
+        BaseStdout.tearDown(self)
 
 
 class TestPromptForExclusions(BaseStdout):
index 9b6302ec9da2f4e2c2f400d429bf8b1d325aafef..3264d3e741f0a4f60ac50ff600d4488590dfa927 100644 (file)
@@ -62,6 +62,7 @@ def get_defaults():
         "keepdomaincomments": False,
         "extensionspath": path_join_robust(BASEDIR_PATH, "extensions"),
         "extensions": [],
+        "compress": False,
         "outputsubfolder": "",
         "hostfilename": "hosts",
         "targetip": "0.0.0.0",
@@ -117,6 +118,13 @@ def main():
                         default=False, action="store_true",
                         help="Attempt to flush DNS cache "
                              "after replacing the hosts file.")
+    parser.add_argument("--compress", "-c", dest="compress",
+                        default=False, action="store_true",
+                        help="Compress the hosts file "
+                             "ignoring non-necessary lines "
+                             "(empty lines and comments)."
+                             "Improve the performances "
+                             "under Windows.")
 
     global settings
 
@@ -170,7 +178,15 @@ def main():
 
     merge_file = create_initial_file()
     remove_old_hosts_file(settings["backup"])
-    final_file = remove_dups_and_excl(merge_file, exclusion_regexes)
+    if settings["compress"]:
+        # Another mode is required to read and write the file in Python 3
+        final_file = open(path_join_robust(settings["outputpath"], "hosts"),
+                          "w+b" if PY3 else "w+")
+        compressed_file = tempfile.NamedTemporaryFile()
+        remove_dups_and_excl(merge_file, exclusion_regexes, compressed_file)
+        compress_file(compressed_file, settings["targetip"], final_file)
+    else:
+        final_file = remove_dups_and_excl(merge_file, exclusion_regexes)
 
     number_of_rules = settings["numberofrules"]
     output_subfolder = settings["outputsubfolder"]
@@ -630,7 +646,47 @@ def create_initial_file():
     return merge_file
 
 
-def remove_dups_and_excl(merge_file, exclusion_regexes):
+def compress_file(input_file, target_ip, output_file):
+    """
+    Reduce the file dimension removing non-necessary lines (empty lines and comments) and putting multiple domains in each line.
+    Reducing the number of lines of the file, the parsing under Microsoft Windows is much faster.
+
+    Parameters
+    ----------
+    input_file : file
+        The file object that contains the hostnames that we are reducing.
+    target_ip : str
+        The target IP address.
+    output_file : file
+        The file object that will contain the reduced hostnames.
+    """
+
+    input_file.seek(0)  # reset file pointer
+    write_data(output_file, '\n')
+
+    lines = [target_ip]
+    lines_index = 0
+    for line in input_file.readlines():
+        line = line.decode("UTF-8")
+        if line.startswith('#') or line.startswith('\n'):
+            continue
+
+        if line.startswith(target_ip):
+            l = len(lines[lines_index])
+            if l < 128 and (l + len(line[7:])) < 192:
+                lines[lines_index] += line[7:-1]
+            else:
+                lines[lines_index] += '\n'
+                lines.append(line[:-1])
+                lines_index += 1
+
+    for line in lines:
+        write_data(output_file, line)
+
+    input_file.close()
+
+
+def remove_dups_and_excl(merge_file, exclusion_regexes, output_file=None):
     """
     Remove duplicates and remove hosts that we are excluding.
 
@@ -643,6 +699,8 @@ def remove_dups_and_excl(merge_file, exclusion_regexes):
         The file object that contains the hostnames that we are pruning.
     exclusion_regexes : list
         The list of regex patterns used to exclude domains.
+    output_file : file
+        The file object in which the result is written. If None, the file 'settings["outputpath"]' will be created.
     """
 
     number_of_rules = settings["numberofrules"]
@@ -656,9 +714,12 @@ def remove_dups_and_excl(merge_file, exclusion_regexes):
     if not os.path.exists(settings["outputpath"]):
         os.makedirs(settings["outputpath"])
 
-    # Another mode is required to read and write the file in Python 3
-    final_file = open(path_join_robust(settings["outputpath"], "hosts"),
-                      "w+b" if PY3 else "w+")
+    if output_file is None:
+        # Another mode is required to read and write the file in Python 3
+        final_file = open(path_join_robust(settings["outputpath"], "hosts"),
+                          "w+b" if PY3 else "w+")
+    else:
+        final_file = output_file
 
     merge_file.seek(0)  # reset file pointer
     hostnames = {"localhost", "localhost.localdomain",
@@ -707,7 +768,8 @@ def remove_dups_and_excl(merge_file, exclusion_regexes):
     settings["numberofrules"] = number_of_rules
     merge_file.close()
 
-    return final_file
+    if output_file is None:
+        return final_file
 
 
 def normalize_rule(rule, target_ip, keep_domain_comments):
git clone https://git.99rst.org/PROJECT