From: Alexander Cecile Date: Mon, 17 Aug 2020 23:52:11 +0000 (-0400) Subject: Introduce the requests library. X-Git-Url: http://git.99rst.org/?a=commitdiff_plain;h=5186071948529beb5c1da68f23c47403fa972160;p=stevenblack-hosts.git Introduce the requests library. Replace the combination of urllib, beautifulsoup and lxml with the requests library. --- diff --git a/requirements.txt b/requirements.txt index 45685c115..f2293605c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1 @@ -lxml>=4.2.4,<=5.0 -beautifulsoup4>=4.6.1,<=5.0 -flake8>=3.8,<=4.0 +requests diff --git a/testUpdateHostsFile.py b/testUpdateHostsFile.py index 334aa0868..bf9fcff5f 100644 --- a/testUpdateHostsFile.py +++ b/testUpdateHostsFile.py @@ -1615,47 +1615,6 @@ class DomainToIDNA(Base): self.assertEqual(actual, expected) -class GetFileByUrl(BaseStdout): - @mock.patch("updateHostsFile.urlopen", side_effect=mock_url_open) - def test_read_url(self, _): - url = b"www.google.com" - - expected = "www.google.com" - actual = get_file_by_url(url, delay=0) - - self.assertEqual(actual, expected) - - @mock.patch("updateHostsFile.urlopen", side_effect=mock_url_open_fail) - def test_read_url_fail(self, _): - url = b"www.google.com" - self.assertIsNone(get_file_by_url(url, delay=0)) - - expected = "Problem getting file:" - output = sys.stdout.getvalue() - - self.assertIn(expected, output) - - @mock.patch("updateHostsFile.urlopen", side_effect=mock_url_open_read_fail) - def test_read_url_read_fail(self, _): - url = b"www.google.com" - self.assertIsNone(get_file_by_url(url, delay=0)) - - expected = "Problem getting file:" - output = sys.stdout.getvalue() - - self.assertIn(expected, output) - - @mock.patch("updateHostsFile.urlopen", side_effect=mock_url_open_decode_fail) - def test_read_url_decode_fail(self, _): - url = b"www.google.com" - self.assertIsNone(get_file_by_url(url, delay=0)) - - expected = "Problem getting file:" - output = sys.stdout.getvalue() - - self.assertIn(expected, output) - - class TestWriteData(Base): def test_write_basic(self): f = BytesIO() diff --git a/updateHostsFile.py b/updateHostsFile.py index 9437d40d7..3e2b00185 100644 --- a/updateHostsFile.py +++ b/updateHostsFile.py @@ -21,15 +21,12 @@ import tempfile import time from glob import glob -import lxml # noqa: F401 -from bs4 import BeautifulSoup +import requests # Detecting Python 3 for version-dependent implementations PY3 = sys.version_info >= (3, 0) -if PY3: - from urllib.request import urlopen -else: +if not PY3: raise Exception("We do not support Python 2 anymore.") # Syntactic sugar for "sudo" command in UNIX / Linux @@ -1469,40 +1466,8 @@ def maybe_copy_example_file(file_path): shutil.copyfile(example_file_path, file_path) -def get_file_by_url(url, retries=3, delay=10): - """ - Get a file data located at a particular URL. - - Parameters - ---------- - url : str - The URL at which to access the data. - - Returns - ------- - url_data : str or None - The data retrieved at that URL from the file. Returns None if the - attempted retrieval is unsuccessful. - - Note - ---- - - BeautifulSoup is used in this case to avoid having to search in which - format we have to encode or decode data before parsing it to UTF-8. - """ - - while retries: - try: - with urlopen(url) as f: - soup = BeautifulSoup(f.read(), "lxml").get_text() - return "\n".join(list(map(domain_to_idna, soup.split("\n")))) - except Exception as e: - if 'failure in name resolution' in str(e): - print('No internet connection! Retrying in {} seconds'.format(delay)) - time.sleep(delay) - retries -= 1 - continue - break - print("Problem getting file: ", url) +def get_file_by_url(url, params, **kwargs): + return requests.get(url=url, params=params, **kwargs).text def write_data(f, data):