From: funilrys Date: Sun, 4 Mar 2018 10:17:38 +0000 (+0100) Subject: Review of domain_to_idna() to support more tests cases X-Git-Url: http://git.99rst.org/?a=commitdiff_plain;h=8405f87edb6a1fb9346409daaf5aee39b565355f;p=stevenblack-hosts.git Review of domain_to_idna() to support more tests cases Please note this patch comes after an issue reported by @FadeMind which I could reproduce in 1/3 computer. --- diff --git a/updateHostsFile.py b/updateHostsFile.py index d7c41b65f..33ea85958 100644 --- a/updateHostsFile.py +++ b/updateHostsFile.py @@ -1159,26 +1159,47 @@ def domain_to_idna(line): """ if not line.startswith('#'): - for separator in ['\t', ' ']: - comment = '' - - if separator in line: - splited_line = line.split(separator) - if '#' in splited_line[1]: - index_comment = splited_line[1].find('#') - - if index_comment > -1: - comment = splited_line[1][index_comment:] - - splited_line[1] = splited_line[1] \ - .split(comment)[0] \ - .encode("IDNA").decode("UTF-8") + \ - comment - - splited_line[1] = splited_line[1] \ - .encode("IDNA") \ - .decode("UTF-8") - return separator.join(splited_line) + tabs = '\t' + space = ' ' + + tabs_position, space_position = (line.find(tabs), line.find(space)) + + if tabs_position > -1 and space_position > -1: + if space_position < tabs_position: + separator = space + else: + separator = tabs + elif not tabs_position == -1: + separator = tabs + elif not space_position == -1: + separator = space + else: + separator = '' + + if separator: + splited_line = line.split(separator) + + index = 1 + while index < len(splited_line): + if splited_line[index]: + break + index += 1 + + if '#' in splited_line[index]: + index_comment = splited_line[index].find('#') + + if index_comment > -1: + comment = splited_line[index][index_comment:] + + splited_line[index] = splited_line[index] \ + .split(comment)[0] \ + .encode("IDNA").decode("UTF-8") + \ + comment + + splited_line[index] = splited_line[index] \ + .encode("IDNA") \ + .decode("UTF-8") + return separator.join(splited_line) return line.encode("IDNA").decode("UTF-8") return line.encode("UTF-8").decode("UTF-8")