# Script by Ben Limmer
# https://github.com/l1m5
#
-# This simple Python script will combine all the host files you provide
+# This Python script will combine all the host files you provide
# as sources into one, unique host file to keep you internet browsing happy.
# Making Python 2 compatible with Python 3
def getFileByUrl(url):
try:
f = urlopen(url)
- return f.read().decode("UTF-8")
+ return f.read().decode( "UTF-8" )
except:
- print ("Problem getting file: ", url);
+ print ( "Problem getting file: ", url );
# raise
-
# In Python 3 "print" is a function, braces are added everywhere
# Detecting Python 3 for version-dependent implementations
-Python3=False;
+Python3 = False;
cur_version = sys.version_info
-if cur_version >= (3, 0):
- Python3=True;
+if cur_version >= ( 3, 0 ):
+ Python3 = True;
# This function works in both Python 2 and Python 3
-def myInput(msg=""):
+def myInput( msg = "" ):
if Python3:
- return input(msg);
+ return input( msg );
else:
- return raw_input(msg);
+ return raw_input( msg );
# Cross-python writing function
-def writeData(f, data):
+def writeData( f, data ):
if Python3:
- f.write(bytes(data, 'UTF-8'))
+ f.write( bytes( data, 'UTF-8' ))
else:
- f.write(str(data).encode('UTF-8'))
+ f.write( str( data ).encode( 'UTF-8' ))
# This function doesn't list hidden files
-def listdir_nohidden(path):
- return glob.glob(os.path.join(path, '*'))
+def listdir_nohidden( path ):
+ return glob.glob( os.path.join( path, '*' ))
# Project Settings
-BASEDIR_PATH = os.path.dirname(os.path.realpath(__file__))
-DATA_PATH = os.path.join(BASEDIR_PATH, 'data')
-DATA_FILENAMES = 'hosts'
+BASEDIR_PATH = os.path.dirname( os.path.realpath( __file__ ))
+DATA_PATH = os.path.join( BASEDIR_PATH, 'data' )
+DATA_FILENAMES = 'hosts'
UPDATE_URL_FILENAME = 'update.info'
-SOURCES = listdir_nohidden(DATA_PATH)
-README_TEMPLATE = os.path.join(BASEDIR_PATH, 'readme_template.md')
-README_FILE = os.path.join(BASEDIR_PATH, 'readme.md')
-TARGET_HOST = '0.0.0.0'
-WHITELIST_FILE = os.path.join(BASEDIR_PATH, 'whitelist')
+SOURCES = listdir_nohidden( DATA_PATH )
+README_TEMPLATE = os.path.join( BASEDIR_PATH, 'readme_template.md' )
+README_FILE = os.path.join( BASEDIR_PATH, 'readme.md' )
+TARGET_HOST = '0.0.0.0'
+WHITELIST_FILE = os.path.join( BASEDIR_PATH, 'whitelist' )
# Exclusions
EXCLUSION_PATTERN = '([a-zA-Z\d-]+\.){0,}' #append domain the end
-EXCLUSIONS = []
+EXCLUSIONS = []
# Common domains to exclude
COMMON_EXCLUSIONS = ['hulu.com']
# Global vars
exclusionRegexs = []
-numberOfRules = 0
+numberOfRules = 0
def main():
promptForUpdate()
promptForExclusions()
mergeFile = createInitialFile()
removeOldHostsFile()
- finalFile = removeDupsAndExcl(mergeFile)
- finalizeFile(finalFile)
- updateReadme(numberOfRules)
- printSuccess('Success! Your shiny new hosts file has been prepared.\nIt contains ' + "{:,}".format( numberOfRules ) + ' unique entries.')
+ finalFile = removeDupsAndExcl( mergeFile )
+ finalizeFile( finalFile )
+ updateReadme( numberOfRules )
+ printSuccess( 'Success! Your new hosts file has been prepared.\nIt contains ' + "{:,}".format( numberOfRules ) + ' unique entries.' )
- promptForMove(finalFile)
+ promptForMove( finalFile )
# Prompt the User
def promptForUpdate():
-
# Create hosts file if it doesn't exists
- if not os.path.isfile(os.path.join(BASEDIR_PATH, 'hosts')):
+ if not os.path.isfile( os.path.join(BASEDIR_PATH, 'hosts' )):
try:
- file = open(os.path.join(BASEDIR_PATH, 'hosts'), 'w+').close()
+ file = open( os.path.join( BASEDIR_PATH, 'hosts' ), 'w+' ).close()
except:
- printFailure("ERROR: No 'hosts' file in the folder, try creating one manually")
-
- response = query_yes_no("Do you want to update all data sources?")
- if (response == "yes"):
+ printFailure( "ERROR: No 'hosts' file in the folder, try creating one manually" )
+
+ response = query_yes_no( "Do you want to update all data sources?" )
+ if ( response == "yes" ):
updateAllSources()
else:
- print ('OK, we\'ll stick with what we\'ve got locally.')
+ print ( 'OK, we\'ll stick with what we\'ve got locally.' )
def promptForExclusions():
- response = query_yes_no("Do you want to exclude any domains?\n" +
+ response = query_yes_no( "Do you want to exclude any domains?\n" +
"For example, hulu.com video streaming must be able to access " +
- "its tracking and ad servers in order to play video.")
- if (response == "yes"):
+ "its tracking and ad servers in order to play video." )
+ if ( response == "yes" ):
displayExclusionOptions()
else:
- print ('OK, we\'ll only exclude domains in the whitelist.')
+ print ( 'OK, we\'ll only exclude domains in the whitelist.' )
def promptForMoreCustomExclusions():
- response = query_yes_no("Do you have more domains you want to enter?")
- if (response == "yes"):
+ response = query_yes_no( "Do you have more domains you want to enter?" )
+ if ( response == "yes" ):
return True
else:
return False
-def promptForMove(finalFile):
- response = query_yes_no("Do you want to replace your existing hosts file with the newly generated file?")
- if (response == "yes"):
- moveHostsFileIntoPlace(finalFile)
+def promptForMove( finalFile ):
+ response = query_yes_no( "Do you want to replace your existing hosts file with the newly generated file?" )
+ if ( response == "yes" ):
+ moveHostsFileIntoPlace( finalFile )
else:
return False
# End Prompt the User
# Exclusion logic
def displayExclusionOptions():
for exclusionOption in COMMON_EXCLUSIONS:
- response = query_yes_no("Do you want to exclude the domain " + exclusionOption + " ?")
- if (response == "yes"):
+ response = query_yes_no( "Do you want to exclude the domain " + exclusionOption + " ?" )
+ if ( response == "yes" ):
excludeDomain(exclusionOption)
else:
continue
- response = query_yes_no("Do you want to exclude any other domains?")
- if (response == "yes"):
+ response = query_yes_no( "Do you want to exclude any other domains?" )
+ if ( response == "yes" ):
gatherCustomExclusions()
def gatherCustomExclusions():
while True:
# Cross-python Input
- domainFromUser = myInput("Enter the domain you want to exclude (e.g. facebook.com): ")
- if (isValidDomainFormat(domainFromUser)):
- excludeDomain(domainFromUser)
- if (promptForMoreCustomExclusions() == False):
+ domainFromUser = myInput( "Enter the domain you want to exclude (e.g. facebook.com): " )
+ if (isValidDomainFormat( domainFromUser )):
+ excludeDomain( domainFromUser )
+ if ( promptForMoreCustomExclusions() == False ):
return
-def excludeDomain(domain):
- exclusionRegexs.append(re.compile(EXCLUSION_PATTERN + domain))
+def excludeDomain( domain ):
+ exclusionRegexs.append( re.compile( EXCLUSION_PATTERN + domain ))
-def matchesExclusions(strippedRule):
+def matchesExclusions( strippedRule ):
strippedDomain = strippedRule.split()[1]
for exclusionRegex in exclusionRegexs:
- if exclusionRegex.search(strippedDomain):
+ if exclusionRegex.search( strippedDomain ):
return True
return False
# End Exclusion Logic
# Update Logic
def updateAllSources():
for source in SOURCES:
- updateURL = getUpdateURLFromFile(source)
- if (updateURL == None):
+ updateURL = getUpdateURLFromFile( source )
+ if ( updateURL == None ):
continue;
- print ('Updating source ' + source + ' from ' + updateURL)
+ print ( 'Updating source ' + source + ' from ' + updateURL )
# Cross-python call
- updatedFile = getFileByUrl(updateURL);
- updatedFile = updatedFile.replace('\r', '') #get rid of carriage-return symbols
+ updatedFile = getFileByUrl( updateURL );
+ updatedFile = updatedFile.replace( '\r', '' ) #get rid of carriage-return symbols
# This is cross-python code
- dataFile = open(os.path.join(DATA_PATH, source, DATA_FILENAMES), 'wb')
- writeData(dataFile, updatedFile);
+ dataFile = open( os.path.join( DATA_PATH, source, DATA_FILENAMES ), 'wb' )
+ writeData( dataFile, updatedFile );
dataFile.close()
-def getUpdateURLFromFile(source):
- pathToUpdateFile = os.path.join(DATA_PATH, source, UPDATE_URL_FILENAME)
- if os.path.exists(pathToUpdateFile):
- updateFile = open(pathToUpdateFile, 'r')
- retURL = updateFile.readline().strip()
+def getUpdateURLFromFile( source ):
+ pathToUpdateFile = os.path.join( DATA_PATH, source, UPDATE_URL_FILENAME )
+ if os.path.exists( pathToUpdateFile ):
+ updateFile = open( pathToUpdateFile, 'r' )
+ retURL = updateFile.readline().strip()
updateFile.close()
else:
retURL = None
- printFailure('Warning: Can\'t find the update file for source ' + source + '\n' +
- 'Make sure that there\'s a file at ' + pathToUpdateFile)
+ printFailure( 'Warning: Can\'t find the update file for source ' + source + '\n' +
+ 'Make sure that there\'s a file at ' + pathToUpdateFile )
return retURL
# End Update Logic
def createInitialFile():
mergeFile = tempfile.NamedTemporaryFile()
for source in SOURCES:
- curFile = open(os.path.join(DATA_PATH, source, DATA_FILENAMES), 'r')
+ curFile = open( os.path.join( DATA_PATH, source, DATA_FILENAMES ), 'r' )
#Done in a cross-python way
- writeData(mergeFile, curFile.read())
+ writeData( mergeFile, curFile.read() )
return mergeFile
-def removeDupsAndExcl(mergeFile):
+def removeDupsAndExcl( mergeFile ):
global numberOfRules
- if os.path.isfile(WHITELIST_FILE):
- with open(WHITELIST_FILE, "r") as ins:
+ if os.path.isfile( WHITELIST_FILE ):
+ with open( WHITELIST_FILE, "r" ) as ins:
for line in ins:
- EXCLUSIONS.append(line)
+ EXCLUSIONS.append( line )
# Another mode is required to read and write the file in Python 3
- finalFile = open(os.path.join(BASEDIR_PATH, 'hosts'), 'r+b')
- mergeFile.seek(0) # reset file pointer
+ finalFile = open( os.path.join( BASEDIR_PATH, 'hosts' ), 'r+b' )
+ mergeFile.seek( 0 ) # reset file pointer
hostnames = set()
- hostnames.add("localhost")
+ hostnames.add( "localhost" )
for line in mergeFile.readlines():
write = 'true'
# Explicit encoding
- line = line.decode("UTF-8")
+ line = line.decode( "UTF-8" )
# Testing the first character doesn't require startswith
if line[0] == '#' or re.match(r'^\s*$', line[0]):
# Cross-python write
- writeData(finalFile, line)
+ writeData( finalFile, line )
continue
- strippedRule = stripRule(line) #strip comments
- if len(strippedRule) == 0:
+ strippedRule = stripRule( line ) #strip comments
+ if len( strippedRule ) == 0:
continue
- if matchesExclusions(strippedRule):
+ if matchesExclusions( strippedRule ):
continue
- hostname, normalizedRule = normalizeRule(strippedRule) # normalize rule
+ hostname, normalizedRule = normalizeRule( strippedRule ) # normalize rule
for exclude in EXCLUSIONS:
- if (exclude in line):
+ if ( exclude in line ):
write = 'false'
break
- if normalizedRule and (hostname not in hostnames) and (write == 'true'):
- writeData(finalFile, normalizedRule)
- hostnames.add(hostname)
+ if normalizedRule and ( hostname not in hostnames ) and ( write == 'true' ):
+ writeData( finalFile, normalizedRule )
+ hostnames.add( hostname )
numberOfRules += 1
mergeFile.close()
return finalFile
def normalizeRule(rule):
- result = re.search(r'^[ \t]*(\d+\.\d+\.\d+\.\d+)\s+([\w\.-]+)(.*)',rule)
+ result = re.search(r'^[ \t]*(\d+\.\d+\.\d+\.\d+)\s+([\w\.-]+)(.*)', rule )
if result:
target, hostname, suffix = result.groups()
hostname = hostname.lower() # explicitly lowercase hostname
if suffix is not '':
# add suffix as comment only, not as a separate host
- return hostname, "%s %s #%s\n" % (TARGET_HOST, hostname, suffix)
+ return hostname, "%s %s #%s\n" % ( TARGET_HOST, hostname, suffix )
else:
- return hostname, "%s %s\n" % (TARGET_HOST, hostname)
- print ('==>%s<==' % rule)
+ return hostname, "%s %s\n" % ( TARGET_HOST, hostname )
+ print ( '==>%s<==' % rule )
return None, None
-def finalizeFile(finalFile):
- writeOpeningHeader(finalFile)
+def finalizeFile( finalFile ):
+ writeOpeningHeader( finalFile )
finalFile.close()
# Some sources put comments around their rules, for accuracy we need to strip them
# the comments are preserved in the output hosts file
-def stripRule(line):
+def stripRule( line ):
splitLine = line.split()
- if (len(splitLine) < 2) :
+ if ( len( splitLine ) < 2 ) :
# just return blank
return ''
else:
def writeOpeningHeader(finalFile):
global numberOfRules
- finalFile.seek(0) #reset file pointer
+ finalFile.seek( 0 ) #reset file pointer
fileContents = finalFile.read(); #save content
- finalFile.seek(0) #write at the top
- writeData(finalFile, '# This file is a merged collection of hosts from reputable sources,\n')
- writeData(finalFile, '# with a dash of crowd sourcing via Github\n#\n')
- writeData(finalFile, '# Project home page: https://github.com/StevenBlack/hosts\n#\n')
- writeData(finalFile, '# ===============================================================\n')
- writeData(finalFile, '\n')
- writeData(finalFile, '127.0.0.1 localhost\n')
- writeData(finalFile, '\n')
-
- preamble = os.path.join(BASEDIR_PATH, "myhosts");
- if os.path.isfile(preamble):
- with open(preamble, "r") as f:
- writeData(finalFile, f.read());
-
- finalFile.write(fileContents)
-
-def updateReadme(numberOfRules):
- with open(README_FILE, "wt") as out:
- for line in open(README_TEMPLATE):
- out.write(line.replace('@NUM_ENTRIES@', "{:,}".format( numberOfRules )))
-
-def moveHostsFileIntoPlace(finalFile):
- if (os.name == 'posix'):
- print ('Moving the file requires administrative privileges. You might need to enter your password.')
- if(subprocess.call(["/usr/bin/sudo", "cp", os.path.abspath(finalFile.name), "/etc/hosts"])):
- printFailure("Moving the file failed.")
- print ('Flushing the DNS Cache to utilize new hosts file...')
- if (platform.system() == 'Darwin'):
- if(subprocess.call(["/usr/bin/sudo", "killall", "-HUP", "mDNSResponder"])):
- printFailure("Flushing the DNS Cache failed.")
+ finalFile.seek( 0 ) #write at the top
+ writeData( finalFile, '# This file is a merged collection of hosts from reputable sources,\n' )
+ writeData( finalFile, '# with a dash of crowd sourcing via Github\n#\n' )
+ writeData( finalFile, '# Project home page: https://github.com/StevenBlack/hosts\n#\n' )
+ writeData( finalFile, '# ===============================================================\n' )
+ writeData( finalFile, '\n' )
+ writeData( finalFile, '127.0.0.1 localhost\n' )
+ writeData( finalFile, '\n' )
+
+ preamble = os.path.join( BASEDIR_PATH, "myhosts" );
+ if os.path.isfile( preamble ):
+ with open( preamble, "r" ) as f:
+ writeData( finalFile, f.read() );
+
+ finalFile.write( fileContents )
+
+def updateReadme( numberOfRules ):
+ with open( README_FILE, "wt" ) as out:
+ for line in open( README_TEMPLATE ):
+ out.write( line.replace( '@NUM_ENTRIES@', "{:,}".format( numberOfRules )))
+
+def moveHostsFileIntoPlace( finalFile ):
+ if ( os.name == 'posix' ):
+ print ( 'Moving the file requires administrative privileges. You might need to enter your password.' )
+ if(subprocess.call( ["/usr/bin/sudo", "cp", os.path.abspath( finalFile.name ), "/etc/hosts"] )):
+ printFailure( "Moving the file failed." )
+ print ('Flushing the DNS Cache to utilize new hosts file...' )
+ if ( platform.system() == 'Darwin' ):
+ if( subprocess.call( ["/usr/bin/sudo", "killall", "-HUP", "mDNSResponder"] )):
+ printFailure( "Flushing the DNS Cache failed." )
else:
- if os.path.isfile("/etc/rc.d/init.d/nscd"):
- if(subprocess.call(["/usr/bin/sudo", "/etc/rc.d/init.d/nscd", "restart"])):
- printFailure("Flushing the DNS Cache failed.")
- elif (os.name == 'nt'):
- print ('Automatically moving the hosts file in place is not yet supported.')
- print ('Please move the generated file to %SystemRoot%\system32\drivers\etc\hosts')
-
+ if os.path.isfile( "/etc/rc.d/init.d/nscd" ):
+ if( subprocess.call(["/usr/bin/sudo", "/etc/rc.d/init.d/nscd", "restart"] )):
+ printFailure( "Flushing the DNS Cache failed." )
+ elif ( os.name == 'nt' ):
+ print ( 'Automatically moving the hosts file in place is not yet supported.' )
+ print ( 'Please move the generated file to %SystemRoot%\system32\drivers\etc\hosts' )
+
def removeOldHostsFile(): # hotfix since merging with an already existing hosts file leads to artefacts and duplicates
- oldFilePath=os.path.join(BASEDIR_PATH,'hosts')
- open(oldFilePath, 'a').close() # create if already removed, so remove wont raise an error
+ oldFilePath = os.path.join( BASEDIR_PATH, 'hosts' )
+ open( oldFilePath, 'a' ).close() # create if already removed, so remove wont raise an error
os.remove(oldFilePath);
- open(oldFilePath, 'a').close() # create new empty hostsfile
+ open( oldFilePath, 'a' ).close() # create new empty hostsfile
# End File Logic
# Helper Functions
## {{{ http://code.activestate.com/recipes/577058/ (r2)
-def query_yes_no(question, default="yes"):
+def query_yes_no( question, default = "yes" ):
"""Ask a yes/no question via raw_input() and return their answer.
"question" is a string that is presented to the user.
elif default == "no":
prompt = " [y/N] "
else:
- raise ValueError("invalid default answer: '%s'" % default)
+ raise ValueError( "invalid default answer: '%s'" % default )
while 1:
- sys.stdout.write(colorize(question, colors.PROMPT) + prompt)
+ sys.stdout.write( colorize( question, colors.PROMPT ) + prompt )
# Changed to be cross-python
choice = myInput().lower()
if default is not None and choice == '':
elif choice in valid.keys():
return valid[choice]
else:
- printFailure("Please respond with 'yes' or 'no' "\
- "(or 'y' or 'n').\n")
+ printFailure( "Please respond with 'yes' or 'no' "\
+ "(or 'y' or 'n').\n" )
## end of http://code.activestate.com/recipes/577058/ }}}
-def isValidDomainFormat(domain):
- if (domain == ''):
- print ("You didn\'t enter a domain. Try again.")
+def isValidDomainFormat( domain ):
+ if ( domain == '' ):
+ print ( "You didn\'t enter a domain. Try again." )
return False
- domainRegex = re.compile("www\d{0,3}[.]|https?")
- if (domainRegex.match(domain)):
- print ("The domain " + domain + " is not valid. Do not include www.domain.com or http(s)://domain.com. Try again.")
+ domainRegex = re.compile( "www\d{0,3}[.]|https?" )
+ if ( domainRegex.match( domain )):
+ print ( "The domain " + domain + " is not valid. Do not include www.domain.com or http(s)://domain.com. Try again." )
return False
else:
return True
FAIL = '\033[91m'
ENDC = '\033[0m'
-def colorize(text, color):
+def colorize( text, color ):
return color + text + colors.ENDC
-def printSuccess(text):
- print (colorize(text, colors.SUCCESS))
+def printSuccess( text ):
+ print ( colorize(text, colors.SUCCESS ))
-def printFailure(text):
- print (colorize(text, colors.FAIL))
+def printFailure( text ):
+ print ( colorize( text, colors.FAIL ))
# End Helper Functions
if __name__ == "__main__":