diff --git a/htmlark.py b/htmlark.py
index 02c2918..e7bbf21 100755
--- a/htmlark.py
+++ b/htmlark.py
@@ -13,6 +13,7 @@
from urllib.parse import urlparse
import bs4
+
# Import requests if available, dummy it if not
try:
from requests import get as requests_get
@@ -20,6 +21,7 @@
except ImportError:
requests_get = None
+
class RequestException(Exception): # NOQA make flake8 shut up
"""Dummy exception for when Requests is not installed."""
pass
@@ -67,7 +69,9 @@ def _get_resource(resource_url: str) -> (str, bytes):
elif url_parsed.scheme == '':
# '' is local file
with open(resource_url, 'rb') as f:
- data = f.read()
+ data = b'\xef\xbb\xbf'
+ data += f.read()
+
mimetype, _ = mimetypes.guess_type(resource_url)
elif url_parsed.scheme == 'data':
raise ValueError("Resource path is a data URI", url_parsed.scheme)
@@ -98,14 +102,14 @@ def make_data_uri(mimetype: str, data: bytes) -> str:
return "data:{},{}".format(mimetype, encoded_data)
-def convert_page(page_path: str, parser: str='auto',
- callback: Callable[[str, str, str], None]=lambda *_: None,
- ignore_errors: bool=False, ignore_images: bool=False,
- ignore_css: bool=False, ignore_js: bool=False) -> str:
+def convert_page(page_path: str, parser: str = 'auto',
+ callback: Callable[[str, str, str], None] = lambda *_: None,
+ ignore_errors: bool = False, ignore_images: bool = False,
+ ignore_css: bool = False, ignore_js: bool = False) -> str:
"""Take an HTML file or URL and outputs new HTML with resources as data URIs.
Parameters:
- pageurl (str): URL or path of web page to convert.
+ page_path (str): URL or path of web page to convert.
Keyword Arguments:
parser (str): HTML Parser for Beautiful Soup 4 to use. See
`BS4's docs. `_
@@ -296,6 +300,7 @@ def _main():
# All further messages should use print_verbose() or print_error()
def print_error(m):
print(m, file=sys.stderr)
+
# print_error = lambda m: print(m, file=sys.stderr)
if options.verbose:
print_verbose = print_error