From d1fae02088d7ca43a74849b1617fd33a6aa5e124 Mon Sep 17 00:00:00 2001
From: Abhishek Kumar <abhishek.kasyap09@gmail.com>
Date: Wed, 19 Aug 2020 04:15:46 +0530
Subject: [PATCH 1/5] Drop Python 2 for scancode-toolkit

Signed-off-by: Abhishek Kumar <abhishek.kasyap09@gmail.com>
---
 src/formattedcode/output_html.py              | 14 +---
 src/formattedcode/output_json.py              | 20 ++----
 src/formattedcode/output_jsonlines.py         | 23 ++----
 src/licensedcode/cache.py                     |  9 +--
 src/licensedcode/match_hash.py                |  7 +-
 src/licensedcode/tokenize.py                  |  5 +-
 src/packagedcode/jar_manifest.py              |  6 +-
 src/packagedcode/pypi.py                      | 11 +--
 src/packagedcode/recognize.py                 |  7 --
 src/scancode/cli.py                           | 11 +--
 src/scancode/cli_test_utils.py                | 12 +---
 src/scancode/resource.py                      | 30 +++-----
 src/summarycode/plugin_consolidate.py         |  4 +-
 src/textcode/analysis.py                      |  5 +-
 tests/cluecode/cluecode_test_utils.py         |  6 --
 tests/cluecode/test_copyrights_fosso.py       |  6 +-
 tests/cluecode/test_finder.py                 | 15 ----
 .../test_reuse_output_plugins.py              |  7 +-
 tests/licensedcode/licensedcode_test_utils.py |  6 +-
 tests/licensedcode/test_detection_validate.py |  6 +-
 tests/licensedcode/test_match_spdx_lid.py     |  7 +-
 tests/licensedcode/test_models.py             |  7 +-
 tests/licensedcode/test_query.py              | 71 ++++++-------------
 tests/licensedcode/test_tokenize.py           |  7 +-
 tests/packagedcode/packages_test_utils.py     | 12 +---
 tests/packagedcode/test_gemfile_lock.py       |  8 +--
 tests/packagedcode/test_godeps.py             |  7 +-
 tests/packagedcode/test_jar_manifest.py       | 11 +--
 tests/packagedcode/test_maven.py              | 11 +--
 tests/packagedcode/test_npm.py                |  7 +-
 tests/packagedcode/test_plugin.py             |  7 +-
 tests/packagedcode/test_rpm.py                |  9 +--
 tests/packagedcode/test_rubygems.py           | 16 +----
 tests/packagedcode/test_win_pe.py             |  7 +-
 tests/scancode/test_scancode_checks.py        |  3 -
 tests/summarycode/test_score.py               |  6 +-
 36 files changed, 80 insertions(+), 326 deletions(-)

diff --git a/src/formattedcode/output_html.py b/src/formattedcode/output_html.py
index 83c5589c35..5815147cd1 100644
--- a/src/formattedcode/output_html.py
+++ b/src/formattedcode/output_html.py
@@ -49,8 +49,6 @@
 from commoncode.fileutils import fsencode
 from commoncode.fileutils import parent_directory
 from commoncode.system import on_linux
-from commoncode.system import py2
-from commoncode.system import py3
 
 from plugincode.output import output_impl
 from plugincode.output import OutputPlugin
@@ -122,10 +120,6 @@ def is_enabled(self, custom_output, custom_template, **kwargs):
     def process_codebase(self, codebase, custom_output, custom_template, **kwargs):
         results = self.get_files(codebase, **kwargs)
         version = codebase.get_or_create_current_header().tool_version
-
-        if on_linux and py2:
-            custom_template = fsencode(custom_template)
-
         template_loc = custom_template
         output_file = custom_output
         write_templated(output_file, results, version, template_loc)
@@ -329,12 +323,8 @@ def create_html_app(output_file, results, version, scanned_path):  # NOQA
 
         # write json data
         # FIXME: this should a regular JSON scan format
-        if py2:
-            mode = 'wb'
-            prefix = b'data='
-        if py3:
-            mode = 'w'
-            prefix = u'data='
+        mode = 'w'
+        prefix = u'data='
         with io.open(join(target_assets_dir, 'data.js'), mode) as f:
             f.write(prefix)
             simplejson.dump(results, f, iterable_as_array=True)
diff --git a/src/formattedcode/output_json.py b/src/formattedcode/output_json.py
index 1949c749ac..ac84e904b1 100644
--- a/src/formattedcode/output_json.py
+++ b/src/formattedcode/output_json.py
@@ -31,8 +31,6 @@
 from six import string_types
 
 from commoncode import compat
-from commoncode.system import py2
-from commoncode.system import py3
 from plugincode.output import output_impl
 from plugincode.output import OutputPlugin
 from scancode import CommandLineOption
@@ -65,19 +63,11 @@ def logger_debug(*args):
                                      and a or repr(a) for a in args))
 
 
-if py2:
-    mode = 'wb'
-    space = b' '
-    comma = b','
-    colon = b':'
-    eol = b'\n'
-
-if py3:
-    mode = 'w'
-    space = u' '
-    comma = u','
-    colon = u':'
-    eol = u'\n'
+mode = 'w'
+space = u' '
+comma = u','
+colon = u':'
+eol = u'\n'
 
 
 @output_impl
diff --git a/src/formattedcode/output_jsonlines.py b/src/formattedcode/output_jsonlines.py
index 53d6afd198..57e824fe9b 100644
--- a/src/formattedcode/output_jsonlines.py
+++ b/src/formattedcode/output_jsonlines.py
@@ -29,8 +29,6 @@
 
 import simplejson
 
-from commoncode.system import py2
-from commoncode.system import py3
 from plugincode.output import output_impl
 from plugincode.output import OutputPlugin
 from scancode import CommandLineOption
@@ -42,21 +40,12 @@
 """
 
 
-if py2:
-    mode = 'wb'
-    space = b' '
-    comma = b','
-    colon = b':'
-    eol = b'\n'
-    file_key = b'files'
-
-if py3:
-    mode = 'w'
-    space = u' '
-    comma = u','
-    colon = u':'
-    eol = u'\n'
-    file_key = u'files'
+mode = 'w'
+space = u' '
+comma = u','
+colon = u':'
+eol = u'\n'
+file_key = u'files'
 
 
 @output_impl
diff --git a/src/licensedcode/cache.py b/src/licensedcode/cache.py
index 4702bc5f02..ff11b0cffa 100644
--- a/src/licensedcode/cache.py
+++ b/src/licensedcode/cache.py
@@ -38,7 +38,6 @@
 from commoncode.fileutils import resource_iter
 from commoncode.fileutils import create_dir
 from commoncode import ignore
-from commoncode.system import py3
 
 from scancode_config import scancode_cache_dir
 from scancode_config import scancode_src_dir
@@ -285,10 +284,7 @@ def load_index(cache_file, use_loads=False):
                 'Please delete "{cache_file}" and retry.\n'
                 'If the problem persists, copy this error message '
                 'and submit a bug report.\n'.format(**locals()))
-            if py3:
-                raise ex_type(message).with_traceback(ex_traceback)
-            else:
-                six.reraise(ex_type, message, ex_traceback)
+            raise ex_type(message).with_traceback(ex_traceback)
 
 
 _ignored_from_hash = partial(
@@ -314,8 +310,7 @@ def tree_checksum(tree_base_dir=scancode_src_dir, _ignored=_ignored_from_hash):
     resources = resource_iter(tree_base_dir, ignored=_ignored, with_dirs=False)
     hashable = (pth + str(getmtime(pth)) + str(getsize(pth)) for pth in resources)
     hashable = ''.join(sorted(hashable))
-    if py3:
-        hashable=hashable.encode('utf-8')
+    hashable=hashable.encode('utf-8')
     return md5(hashable).hexdigest()
 
 
diff --git a/src/licensedcode/match_hash.py b/src/licensedcode/match_hash.py
index 742428d689..b2b19a15f3 100644
--- a/src/licensedcode/match_hash.py
+++ b/src/licensedcode/match_hash.py
@@ -29,8 +29,6 @@
 
 from six import string_types
 
-from commoncode.system import py2
-from commoncode.system import py3
 from licensedcode.match import LicenseMatch
 from licensedcode.spans import Span
 
@@ -65,10 +63,7 @@ def tokens_hash(tokens):
     """
     Return a digest binary string computed from a sequence of numeric token ids.
     """
-    if py2:
-        as_bytes = array('h', tokens).tostring()
-    if py3:
-        as_bytes = array('h', tokens).tobytes()
+    as_bytes = array('h', tokens).tobytes()
     return md5(as_bytes).digest()
 
 
diff --git a/src/licensedcode/tokenize.py b/src/licensedcode/tokenize.py
index f117ae0d9b..a75d2ac02a 100644
--- a/src/licensedcode/tokenize.py
+++ b/src/licensedcode/tokenize.py
@@ -40,8 +40,6 @@
 from binascii import crc32
 import re
 
-from commoncode.system import py2
-from commoncode.system import py3
 from licensedcode.stopwords import STOPWORDS
 from textcode.analysis import numbered_text_lines
 
@@ -229,8 +227,7 @@ def select_ngrams(ngrams, with_pos=False):
         # FIXME: use a proper hash
         nghs = []
         for ng in ngram:
-            if ((py2 and isinstance(ng, basestring))
-                    or (py3 and isinstance(ng, str))):
+            if isinstance(ng, str):
                 ng = bytearray(ng, encoding='utf-8')
             else:
                 ng = bytearray(str(ng).encode('utf-8'))
diff --git a/src/packagedcode/jar_manifest.py b/src/packagedcode/jar_manifest.py
index 8d4b49fb3e..29f126027c 100644
--- a/src/packagedcode/jar_manifest.py
+++ b/src/packagedcode/jar_manifest.py
@@ -32,7 +32,6 @@
 import attr
 
 from commoncode.fileutils import as_posixpath
-from commoncode.fileutils import py2
 from packagedcode.utils import normalize_vcs_url
 from packagedcode.maven import parse_scm_connection
 from packagedcode.models import Package
@@ -82,10 +81,7 @@ def parse_manifest(location):
     """
     Return a Manifest parsed from the file at `location` or None if this
     cannot be parsed.         """
-    if py2:
-        mode = 'rb'
-    else:
-        mode = 'r'
+    mode = 'r'
     with open(location, mode) as manifest:
         return parse_manifest_data(manifest.read())
 
diff --git a/src/packagedcode/pypi.py b/src/packagedcode/pypi.py
index d5bd4f7025..095aa67452 100644
--- a/src/packagedcode/pypi.py
+++ b/src/packagedcode/pypi.py
@@ -49,7 +49,6 @@
 
 from commoncode import filetype
 from commoncode import fileutils
-from commoncode.system import py2
 from packagedcode import models
 from packagedcode.utils import build_description
 from packagedcode.utils import combine_expressions
@@ -243,10 +242,7 @@ def parse_with_dparse(location):
                          filetypes.pipfile,
                          filetypes.pipfile_lock):
         return
-    if py2:
-        mode = 'rb'
-    else:
-        mode = 'r'
+    mode = 'r'
     with open(location, mode) as f:
         content = f.read()
 
@@ -329,10 +325,7 @@ def parse_setup_py(location):
         return
 
     # FIXME: what if this is unicode text?
-    if py2:
-        mode = 'rb'
-    else:
-        mode = 'r'
+    mode = 'r'
     with open(location, mode) as inp:
         setup_text = inp.read()
 
diff --git a/src/packagedcode/recognize.py b/src/packagedcode/recognize.py
index 2edcbdf9e1..45f33d4a4a 100644
--- a/src/packagedcode/recognize.py
+++ b/src/packagedcode/recognize.py
@@ -37,7 +37,6 @@
 from commoncode.fileutils import fsencode
 from commoncode.fileutils import splitext_name
 from commoncode.system import on_linux
-from commoncode.system import py2
 from packagedcode import PACKAGE_TYPES
 from typecode import contenttype
 
@@ -90,9 +89,6 @@ def recognize_packages(location):
     for package_type in PACKAGE_TYPES:
         # Note: default to True if there is nothing to match against
         metafiles = package_type.metafiles
-        if on_linux and py2:
-            metafiles = (fsencode(m) for m in metafiles)
-
         if any(fnmatch.fnmatchcase(filename, metaf) for metaf in metafiles):
             for recognized in package_type.recognize(location):
                 if TRACE:logger_debug('recognize_packages: metafile matching: recognized:', recognized)
@@ -114,9 +110,6 @@ def recognize_packages(location):
         extension_matched = False
         extensions = package_type.extensions
         if extensions:
-            if on_linux and py2:
-                extensions = (fsencode(e) for e in extensions)
-
             extensions = (e.lower() for e in extensions)
             extension_matched = any(fnmatch.fnmatchcase(extension, ext_pat)
                                     for ext_pat in extensions)
diff --git a/src/scancode/cli.py b/src/scancode/cli.py
index 8b48f65a39..83ba032a4f 100644
--- a/src/scancode/cli.py
+++ b/src/scancode/cli.py
@@ -68,7 +68,6 @@ class WindowsError(Exception):
 from commoncode.fileutils import PATH_TYPE
 from commoncode.fileutils import POSIX_PATH_SEP
 from commoncode.timeutils import time2tstamp
-from commoncode.system import py2
 from commoncode.system import on_windows
 from commoncode.system import on_linux
 
@@ -590,10 +589,7 @@ def echo_func(*_args, **_kwargs):
 
     if not isinstance(input, (list, tuple)):
         # nothing else todo
-        if on_linux and py2:
-            assert isinstance(input, bytes)
-        else:
-            assert isinstance(input, compat.unicode)
+        assert isinstance(input, compat.unicode)
 
     elif len(input) == 1:
         # we received a single input path, so we treat this as a single path
@@ -1206,10 +1202,7 @@ def scan_codebase(codebase, scanners, processes=1, timeout=DEFAULT_TIMEOUT,
 
         while True:
             try:
-                if py2:
-                    location, rid, scan_errors, scan_time, scan_result, scan_timings = scans.next()
-                else:
-                    location, rid, scan_errors, scan_time, scan_result, scan_timings = next(scans)
+                location, rid, scan_errors, scan_time, scan_result, scan_timings = next(scans)
 
                 if TRACE_DEEP:
                     logger_debug(
diff --git a/src/scancode/cli_test_utils.py b/src/scancode/cli_test_utils.py
index 45c370bc80..65b474fa58 100644
--- a/src/scancode/cli_test_utils.py
+++ b/src/scancode/cli_test_utils.py
@@ -35,15 +35,10 @@
 
 from commoncode.system import on_linux
 from commoncode.system import on_windows
-from commoncode.system import py2
-from commoncode.system import py3
 from scancode_config import scancode_root_dir
 
 
-if py2:
-    mode = 'wb'
-if py3:
-    mode = 'w'
+mode = 'w'
 
 
 def run_scan_plain(options, cwd=None, test_mode=True, expected_rc=0, env=None):
@@ -57,10 +52,7 @@ def run_scan_plain(options, cwd=None, test_mode=True, expected_rc=0, env=None):
     if test_mode and '--test-mode' not in options:
         options.append('--test-mode')
 
-    if on_linux and py2:
-        scmd = b'scancode'
-    else:
-        scmd = u'scancode'
+    scmd = u'scancode'
     scan_cmd = os.path.join(scancode_root_dir, scmd)
     rc, stdout, stderr = execute2(cmd_loc=scan_cmd, args=options, cwd=cwd, env=env)
 
diff --git a/src/scancode/resource.py b/src/scancode/resource.py
index d848d1471c..2d1aa996bd 100644
--- a/src/scancode/resource.py
+++ b/src/scancode/resource.py
@@ -75,8 +75,6 @@
 
 from commoncode import ignore
 from commoncode.system import on_linux
-from commoncode.system import py2
-from commoncode.system import py3
 
 
 """
@@ -314,10 +312,7 @@ def __init__(self, location,
 
         # setup location
         ########################################################################
-        if on_linux and py2:
-            location = fsencode(location)
-        else:
-            location = fsdecode(location)
+        location = fsdecode(location)
 
         location = abspath(normpath(expanduser(location)))
         location = location.rstrip(POSIX_PATH_SEP).rstrip(WIN_PATH_SEP)
@@ -415,7 +410,7 @@ def _get_resource_cache_location(self, rid, create=False):
         """
         if not self.cache_dir:
             return
-        resid = (b'%08x'if (py2 and on_linux) else '%08x') % rid
+        resid = ('%08x') % rid
         cache_sub_dir, cache_file_name = resid[-2:], resid
         parent = join(self.cache_dir, cache_sub_dir)
         if create and not exists(parent):
@@ -440,7 +435,7 @@ def _populate(self):
 
         # Resource sub-class to use. Configured with plugin attributes if present
         self.resource_class = attr.make_class(
-            name=b'ScannedResource' if py2 else 'ScannedResource',
+            name='ScannedResource',
             attrs=self.resource_attributes or {},
             slots=True,
             # frozen=True,
@@ -731,10 +726,7 @@ def _dump_resource(self, resource):
                             'in memory: %(resource)r' % resource)
 
         # TODO: consider messagepack or protobuf for compact/faster processing?
-        if py2:
-            mode = 'wb'
-        if py3:
-            mode = 'w'
+        mode = 'w'
         with open(cache_location , mode) as cached:
             cached.write(json.dumps(resource.serialize(), check_circular=False))
 
@@ -964,10 +956,7 @@ def to_native_path(path):
     """
     if not path:
         return path
-    if on_linux and py2:
-        return fsencode(path)
-    else:
-        return fsdecode(path)
+    return fsdecode(path)
 
 
 def to_decoded_posix_path(path):
@@ -1372,10 +1361,7 @@ def get_codebase_cache_dir(temp_dir):
 
     prefix = 'scancode-codebase-' + time2tstamp() + '-'
     cache_dir = get_temp_dir(base_dir=temp_dir, prefix=prefix)
-    if on_linux and py2:
-        cache_dir = fsencode(cache_dir)
-    else:
-        cache_dir = fsdecode(cache_dir)
+    cache_dir = fsdecode(cache_dir)
     return cache_dir
 
 
@@ -1387,7 +1373,7 @@ def to_dict(self):
 
 def get_codebase_attributes_class(attributes):
     return attr.make_class(
-        name=b'CodebaseAttributes' if py2 else u'CodebaseAttributes',
+        name=u'CodebaseAttributes',
         attrs=attributes or OrderedDict(),
         slots=True,
         bases=(_CodebaseAttributes,)
@@ -1598,7 +1584,7 @@ def _populate(self, scan_data):
 
         # Create the Resource class with the desired attributes
         self.resource_class = attr.make_class(
-            name=b'ScannedResource' if py2 else u'ScannedResource',
+            name=u'ScannedResource',
             attrs=all_res_attributes or OrderedDict(),
             slots=True,
             # frozen=True,
diff --git a/src/summarycode/plugin_consolidate.py b/src/summarycode/plugin_consolidate.py
index 37e7a4e580..26c34057f8 100644
--- a/src/summarycode/plugin_consolidate.py
+++ b/src/summarycode/plugin_consolidate.py
@@ -34,7 +34,6 @@
 import attr
 
 from cluecode.copyrights import CopyrightDetector
-from commoncode.system import py3
 from commoncode.text import python_safe_name
 from license_expression import Licensing
 from packagedcode import get_package_instance
@@ -47,8 +46,7 @@
 from summarycode import copyright_summary
 
 
-if py3:
-    unicode = str
+unicode = str
 
 
 # Tracing flags
diff --git a/src/textcode/analysis.py b/src/textcode/analysis.py
index 9c36122fbb..3699c89881 100644
--- a/src/textcode/analysis.py
+++ b/src/textcode/analysis.py
@@ -38,7 +38,6 @@
 
 from commoncode import compat
 from commoncode.system import on_linux
-from commoncode.system import py2
 from textcode import pdf
 from textcode import markup
 from textcode import sfdb
@@ -150,8 +149,8 @@ def numbered_text_lines(location, demarkup=False, plain_text=False):
     if T.is_text:
         numbered_lines = enumerate(unicode_text_lines(location), 1)
         # text with very long lines such minified JS, JS map files or large JSON
-        locale = b'locale' if on_linux and py2 else u'locale'
-        package_json = b'package.json' if on_linux and py2 else u'package.json'
+        locale = u'locale'
+        package_json = u'package.json'
 
         if (not location.endswith(package_json)
             and (T.is_text_with_long_lines or T.is_compact_js
diff --git a/tests/cluecode/cluecode_test_utils.py b/tests/cluecode/cluecode_test_utils.py
index 6ee0fc5f84..dd2410e3d6 100644
--- a/tests/cluecode/cluecode_test_utils.py
+++ b/tests/cluecode/cluecode_test_utils.py
@@ -37,7 +37,6 @@
 import cluecode.copyrights
 from commoncode import compat
 from commoncode import saneyaml
-from commoncode.system import py2
 from commoncode.testcase import FileDrivenTesting
 from commoncode.testcase import get_test_file_pairs
 from commoncode.text import python_safe_name
@@ -248,11 +247,6 @@ def closure_test_function(*args, **kwargs):
     whats = '_'.join(what)
     test_name = 'test_%(tfn)s_%(index)s' % locals()
     test_name = python_safe_name(test_name)
-
-    # onPython2 we need a plain non-unicode string
-    if py2 and isinstance(test_name, compat.unicode):
-        test_name = test_name.encode('utf-8')
-
     closure_test_function.__name__ = test_name
 
     if test.expected_failures:
diff --git a/tests/cluecode/test_copyrights_fosso.py b/tests/cluecode/test_copyrights_fosso.py
index 441f1f3736..f8f82927ce 100644
--- a/tests/cluecode/test_copyrights_fosso.py
+++ b/tests/cluecode/test_copyrights_fosso.py
@@ -35,8 +35,6 @@
 
 import cluecode_test_utils
 from commoncode import compat
-from commoncode.system import py2
-from commoncode.system import py3
 from commoncode.testcase import FileDrivenTesting
 from commoncode.text import python_safe_name
 
@@ -138,9 +136,7 @@ def build_copyright_test_methods_with_fossology_data():
         tfn = test_file.replace(test_data_dir, '').strip('\\/\\')
         test_name = 'test_fossology_copyright_%(tfn)s' % locals()
         test_name = python_safe_name(test_name)
-        if py2 and isinstance(test_name, compat.unicode):
-            test_name = test_name.encode('utf-8')
-        if py3 and not isinstance(test_name, compat.unicode):
+        if not isinstance(test_name, compat.unicode):
             test_name = test_name.decode('utf-8')
 
         test_method.__name__ = test_name
diff --git a/tests/cluecode/test_finder.py b/tests/cluecode/test_finder.py
index 4fdfe794d8..63a89b24c5 100644
--- a/tests/cluecode/test_finder.py
+++ b/tests/cluecode/test_finder.py
@@ -35,7 +35,6 @@
 
 from commoncode.testcase import FileBasedTesting
 from commoncode import compat
-from commoncode.system import py3
 from cluecode import finder
 from cluecode.finder import find
 from cluecode.finder import urls_regex
@@ -693,18 +692,6 @@ def test_misc_invalid_urls_that_are_still_detected_and_may_not_be_really_invalid
             result = [val for val, _ln in finder.find_urls([test])]
             assert result in ([test] , [test + u'/'])
 
-    @pytest.mark.skipif(py3, reason='url-cpp behaves differently')
-    def test_misc_invalid_urls_that_are_still_detected_and_may_not_be_really_invalidPpy2(self):
-        # set of non URLs from https://mathiasbynens.be/demo/url-regex
-        urls = u'''
-            http://www.foo.bar./
-            ftps://foo.bar/
-        '''
-        for test in urls.split():
-            result = [val for val, _ln in finder.find_urls([test])]
-            assert result in ([test] , [test + u'/'])
-
-    @pytest.mark.skipif(not py3, reason='url-cpp behaves differently')
     def test_misc_invalid_urls_that_are_still_detected_and_normalized(self):
         # set of non URLs from https://mathiasbynens.be/demo/url-regex
         urls = u'''
@@ -714,7 +701,6 @@ def test_misc_invalid_urls_that_are_still_detected_and_normalized(self):
             result = [val for val, _ln in finder.find_urls([test])]
             assert [test] == result
 
-    @pytest.mark.skipif(not py3, reason='url-cpp behaves differently')
     def test_invalid_urls_are_not_detected(self):
         # set of non URLs from https://mathiasbynens.be/demo/url-regex
         urls = u'''
@@ -736,7 +722,6 @@ def test_misc_invalid_urls_that_should_not_be_detected(self):
             result = [val for val, _ln in finder.find_urls([test])]
             assert result, test
 
-    @pytest.mark.skipif(py3, reason='url-cpp behaves differently')
     def test_misc_invalid_urls_that_should_not_be_detected_2(self):
         # At least per this set of non URLs from https://mathiasbynens.be/demo/url-regex
         urls = u'''
diff --git a/tests/formattedcode/test_reuse_output_plugins.py b/tests/formattedcode/test_reuse_output_plugins.py
index 970eb3439f..84b04723d6 100644
--- a/tests/formattedcode/test_reuse_output_plugins.py
+++ b/tests/formattedcode/test_reuse_output_plugins.py
@@ -33,8 +33,6 @@
 
 import pytest
 
-from commoncode.system import py2
-from commoncode.system import py3
 from commoncode.testcase import FileDrivenTesting
 
 
@@ -59,10 +57,7 @@ def check_plugin(plugin_class, test_file='reuse/vb.json', force_text=False):
         with io.open(result_file, 'w', encoding='utf-8') as out:
             op.process_codebase(cb, out)
     else:
-        if py2:
-            mode = 'wb'
-        if py3:
-            mode = 'w'
+        mode = 'w'
         with io.open(result_file, mode) as out:
             op.process_codebase(cb, out)
 
diff --git a/tests/licensedcode/licensedcode_test_utils.py b/tests/licensedcode/licensedcode_test_utils.py
index 2107c1158d..eaf21cb385 100644
--- a/tests/licensedcode/licensedcode_test_utils.py
+++ b/tests/licensedcode/licensedcode_test_utils.py
@@ -37,8 +37,6 @@
 
 from commoncode import compat
 from commoncode import saneyaml
-from commoncode.system import py2
-from commoncode.system import py3
 from commoncode import text
 from commoncode.testcase import get_test_file_pairs
 
@@ -153,9 +151,7 @@ def get_test_method_name(self, prefix='test_detection_'):
         test_file_name = self.test_file_name
         test_name = '{prefix}{test_file_name}'.format(**locals())
         test_name = text.python_safe_name(test_name)
-        if py2 and not isinstance(test_name, bytes):
-            test_name = test_name.encode('utf-8')
-        if py3 and not isinstance(test_name, compat.unicode):
+        if not isinstance(test_name, compat.unicode):
             test_name = test_name.decode('utf-8')
         return test_name
 
diff --git a/tests/licensedcode/test_detection_validate.py b/tests/licensedcode/test_detection_validate.py
index cbe89b7151..c186406de8 100644
--- a/tests/licensedcode/test_detection_validate.py
+++ b/tests/licensedcode/test_detection_validate.py
@@ -36,8 +36,6 @@
 from commoncode import compat
 from commoncode.functional import flatten
 from commoncode import text
-from commoncode.system import py2
-from commoncode.system import py3
 from licensedcode import cache
 from licensedcode import models
 
@@ -56,9 +54,7 @@ def make_validation_test(rule, test_name):
     """
     Build and return a test function closing on tests arguments.
     """
-    if py2 and isinstance(test_name, compat.unicode):
-        test_name = test_name.encode('utf-8')
-    if py3 and isinstance(test_name, bytes):
+    if isinstance(test_name, bytes):
         test_name = test_name.decode('utf-8')
 
     if rule.is_negative or rule.is_false_positive:
diff --git a/tests/licensedcode/test_match_spdx_lid.py b/tests/licensedcode/test_match_spdx_lid.py
index be9ebcc664..dccce0a8eb 100644
--- a/tests/licensedcode/test_match_spdx_lid.py
+++ b/tests/licensedcode/test_match_spdx_lid.py
@@ -34,8 +34,6 @@
 from license_expression import Licensing
 from license_expression import ExpressionError
 
-from commoncode.system import py2
-from commoncode.system import py3
 from commoncode.testcase import FileBasedTesting
 from commoncode import text
 
@@ -90,10 +88,7 @@ def test_method(self):
         qry = Query(location=test_loc, idx=idx)
         results = [list(l) for l in qry.spdx_lines]
         if regen:
-            if py2:
-                wmode = 'wb'
-            if py3:
-                wmode = 'w'
+            wmode = 'w'
             with open(expected_loc, wmode) as ef:
                 json.dump(results, ef, indent=2)
             expected = results
diff --git a/tests/licensedcode/test_models.py b/tests/licensedcode/test_models.py
index b1ecde9b10..8888b11a47 100644
--- a/tests/licensedcode/test_models.py
+++ b/tests/licensedcode/test_models.py
@@ -31,8 +31,6 @@
 import os
 
 from commoncode.testcase import FileBasedTesting
-from commoncode.system import py2
-from commoncode.system import py3
 
 from licensedcode import cache
 from licensedcode import index
@@ -44,10 +42,7 @@
 
 def check_json(expected, results, regen=False):
     if regen:
-        if py2:
-            mode = 'wb'
-        if py3:
-            mode = 'w'
+        mode = 'w'
         with open(expected, mode) as ex:
             json.dump(results, ex, indent=2, separators=(',', ': '))
     with open(expected) as ex:
diff --git a/tests/licensedcode/test_query.py b/tests/licensedcode/test_query.py
index 0359500a0a..4317c7e3fc 100644
--- a/tests/licensedcode/test_query.py
+++ b/tests/licensedcode/test_query.py
@@ -29,8 +29,6 @@
 import os
 
 from commoncode.testcase import FileBasedTesting
-from commoncode.system import py2
-from commoncode.system import py3
 from licensedcode import cache
 from licensedcode import index
 from licensedcode import models
@@ -565,40 +563,21 @@ def test_query_run_has_correct_offset(self):
         query_doc = self.get_test_loc('query/runs/query.txt')
         q = Query(location=query_doc, idx=idx, line_threshold=4)
         result = [qr.to_dict() for qr in q.query_runs]
-        if py2:
-            expected = [
-                {b'end': 0, b'start': 0, b'tokens': u'inc'},
-                {b'end': 121, b'start': 1,
-                 b'tokens': (
-                    u'this library is free software you can redistribute it and or modify '
-                    u'it under the terms of the gnu library general public license as '
-                    u'published by the free software foundation either version 2 of the '
-                    u'license or at your option any later version this library is '
-                    u'distributed in the hope that it will be useful but without any '
-                    u'warranty without even the implied warranty of merchantability or '
-                    u'fitness for particular purpose see the gnu library general public '
-                    u'license for more details you should have received copy of the gnu '
-                    u'library general public license along with this library see the file '
-                    u'copying lib if not write to the free software foundation inc 51 '
-                    u'franklin street fifth floor boston ma 02110 1301 usa')
-                 }
-            ]
-        if py3:
-            expected = [
-                {u'end': 0, u'start': 0, u'tokens': u'inc'},
-                {u'end': 121, u'start': 1,
-                 u'tokens': (
-                    u'this library is free software you can redistribute it and or modify '
-                    u'it under the terms of the gnu library general public license as '
-                    u'published by the free software foundation either version 2 of the '
-                    u'license or at your option any later version this library is '
-                    u'distributed in the hope that it will be useful but without any '
-                    u'warranty without even the implied warranty of merchantability or '
-                    u'fitness for particular purpose see the gnu library general public '
-                    u'license for more details you should have received copy of the gnu '
-                    u'library general public license along with this library see the file '
-                    u'copying lib if not write to the free software foundation inc 51 '
-                    u'franklin street fifth floor boston ma 02110 1301 usa')
+        expected = [
+            {u'end': 0, u'start': 0, u'tokens': u'inc'},
+            {u'end': 121, u'start': 1,
+                u'tokens': (
+                u'this library is free software you can redistribute it and or modify '
+                u'it under the terms of the gnu library general public license as '
+                u'published by the free software foundation either version 2 of the '
+                u'license or at your option any later version this library is '
+                u'distributed in the hope that it will be useful but without any '
+                u'warranty without even the implied warranty of merchantability or '
+                u'fitness for particular purpose see the gnu library general public '
+                u'license for more details you should have received copy of the gnu '
+                u'library general public license along with this library see the file '
+                u'copying lib if not write to the free software foundation inc 51 '
+                u'franklin street fifth floor boston ma 02110 1301 usa')
                  }
             ]
 
@@ -676,20 +655,12 @@ def test_QueryRun_with_all_digit_lines(self):
         qry = Query(query_string=qs, idx=idx)
         result = [qr.to_dict() for qr in qry.query_runs]
         # FIXME: we should not even have a query run for things that are all digits
-        if py2:
-            expected = [
-                {b'end': 5, b'start': 0, b'tokens': u'1 80 0 256 1568 1953'},
-                {b'end': 12, b'start': 6, b'tokens': u'406 1151 1 429 368 634 8'},
-                {b'end': 17, b'start': 13, b'tokens': u'1955 724 2 932 234'},
-                {b'end': 20, b'start': 18, b'tokens': u'694 634 110'},
-            ]
-        if py3:
-            expected = [
-                {u'end': 5, u'start': 0, u'tokens': u'1 80 0 256 1568 1953'},
-                {u'end': 12, u'start': 6, u'tokens': u'406 1151 1 429 368 634 8'},
-                {u'end': 17, u'start': 13, u'tokens': u'1955 724 2 932 234'},
-                {u'end': 20, u'start': 18, u'tokens': u'694 634 110'},
-            ]
+        expected = [
+            {u'end': 5, u'start': 0, u'tokens': u'1 80 0 256 1568 1953'},
+            {u'end': 12, u'start': 6, u'tokens': u'406 1151 1 429 368 634 8'},
+            {u'end': 17, u'start': 13, u'tokens': u'1955 724 2 932 234'},
+            {u'end': 20, u'start': 18, u'tokens': u'694 634 110'},
+        ]
         assert expected == result
 
         assert not any(qr.is_matchable() for qr in qry.query_runs)
diff --git a/tests/licensedcode/test_tokenize.py b/tests/licensedcode/test_tokenize.py
index 1db7da6f67..f4cfc89cc7 100644
--- a/tests/licensedcode/test_tokenize.py
+++ b/tests/licensedcode/test_tokenize.py
@@ -34,8 +34,6 @@
 import os
 from time import time
 
-from commoncode.system import py2
-from commoncode.system import py3
 from commoncode.testcase import FileBasedTesting
 from licensedcode.tokenize import matched_query_text_tokenizer
 from licensedcode.tokenize import ngrams
@@ -49,10 +47,7 @@
 TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), 'data')
 
 
-if py2:
-    mode = 'wb'
-if py3:
-    mode = 'w'
+mode = 'w'
 
 
 
diff --git a/tests/packagedcode/packages_test_utils.py b/tests/packagedcode/packages_test_utils.py
index de63a19407..73b2b04358 100644
--- a/tests/packagedcode/packages_test_utils.py
+++ b/tests/packagedcode/packages_test_utils.py
@@ -31,8 +31,6 @@
 import json
 import shutil
 
-from commoncode.system import py2
-from commoncode.system import py3
 from commoncode import testcase
 
 
@@ -50,10 +48,7 @@ def check_package(self, package, expected_loc, regen=False):
 
         if regen:
             regened_exp_loc = self.get_temp_file()
-            if py2:
-                wmode = 'wb'
-            if py3:
-                wmode = 'w'
+            wmode = 'w'
             with open(regened_exp_loc, wmode) as ex:
                 json.dump(results, ex, indent=2, separators=(',', ': '))
 
@@ -83,10 +78,7 @@ def check_packages(self, packages, expected_loc, regen=False):
 
         if regen:
             regened_exp_loc = self.get_temp_file()
-            if py2:
-                wmode = 'wb'
-            if py3:
-                wmode = 'w'
+            wmode = 'w'
             with open(regened_exp_loc, wmode) as ex:
                 json.dump(results, ex, indent=2, separators=(',', ': '))
 
diff --git a/tests/packagedcode/test_gemfile_lock.py b/tests/packagedcode/test_gemfile_lock.py
index 9d2fdad89d..67bad9aa9f 100644
--- a/tests/packagedcode/test_gemfile_lock.py
+++ b/tests/packagedcode/test_gemfile_lock.py
@@ -30,8 +30,6 @@
 import os
 import shutil
 
-from commoncode.system import py2
-from commoncode.system import py3
 from commoncode.testcase import FileBasedTesting
 from packagedcode import gemfile_lock
 
@@ -46,11 +44,7 @@ def check_results(self, results, expected_loc, regen=False):
         expected_loc = self.get_test_loc(expected_loc)
         if regen:
             regened_exp_loc = self.get_temp_file()
-            if py2:
-                wmode = 'wb'
-            if py3:
-                wmode = 'w'
-
+            wmode = 'w'
             with open(regened_exp_loc, wmode) as ex:
                 json.dump(results, ex, indent=2, separators=(',', ': '))
 
diff --git a/tests/packagedcode/test_godeps.py b/tests/packagedcode/test_godeps.py
index e202500d2e..3082d023f7 100644
--- a/tests/packagedcode/test_godeps.py
+++ b/tests/packagedcode/test_godeps.py
@@ -31,8 +31,6 @@
 import json
 import os
 
-from commoncode.system import py2
-from commoncode.system import py3
 from commoncode.testcase import FileBasedTesting
 from packagedcode import godeps
 
@@ -83,10 +81,7 @@ def check_package(self, test_file, expected_file, regen=False):
         results = godeps.parse(location=test_loc)
         expected_loc = self.get_test_loc(expected_file)
         if regen:
-            if py2:
-                wmode = 'wb'
-            if py3:
-                wmode = 'w'
+            wmode = 'w'
             with open(expected_loc, wmode) as ex:
                 json.dump(results, ex, indent=2)
         with io.open(expected_loc, encoding='utf-8') as ex:
diff --git a/tests/packagedcode/test_jar_manifest.py b/tests/packagedcode/test_jar_manifest.py
index a594d670b4..aa7cb8ef7c 100644
--- a/tests/packagedcode/test_jar_manifest.py
+++ b/tests/packagedcode/test_jar_manifest.py
@@ -32,18 +32,13 @@
 import os.path
 
 from commoncode import compat
-from commoncode.system import py2
-from commoncode.system import py3
 from commoncode import text
 from commoncode import testcase
 from packagedcode.jar_manifest import parse_manifest
 from packagedcode.jar_manifest import get_normalized_package_data
 
 
-if py2:
-    mode = 'wb'
-if py3:
-    mode = 'w'
+mode = 'w'
 
 
 class BaseParseManifestCase(testcase.FileBasedTesting):
@@ -124,9 +119,7 @@ def test_manifest(self):
 
     # set a proper function name to display in reports and use in discovery
     # function names are best as bytes
-    if py2 and isinstance(test_name, compat.unicode):
-        test_name = test_name.encode('utf-8')
-    if py3 and isinstance(test_name, bytes):
+    if isinstance(test_name, bytes):
         test_name = test_name.decode('utf-8')
     test_manifest.__name__ = test_name
     return test_manifest
diff --git a/tests/packagedcode/test_maven.py b/tests/packagedcode/test_maven.py
index eab192460f..500fa83523 100644
--- a/tests/packagedcode/test_maven.py
+++ b/tests/packagedcode/test_maven.py
@@ -35,18 +35,13 @@
 
 from commoncode import compat
 from commoncode import fileutils
-from commoncode.system import py2
-from commoncode.system import py3
 from commoncode import text
 from commoncode import testcase
 from packagedcode import maven
 from scancode.resource import Codebase
 
 
-if py2:
-    mode = 'wb'
-if py3:
-    mode = 'w'
+mode = 'w'
 
 
 class TestIsPom(testcase.FileBasedTesting):
@@ -502,9 +497,7 @@ def test_pom(self):
 
     # set a proper function name to display in reports and use in discovery
     # function names are best as bytes
-    if py2 and isinstance(test_name, compat.unicode):
-        test_name = test_name.encode('utf-8')
-    if py3 and isinstance(test_name, bytes):
+    if isinstance(test_name, bytes):
         test_name = test_name.decode('utf-8')
 
     test_pom.__name__ = test_name
diff --git a/tests/packagedcode/test_npm.py b/tests/packagedcode/test_npm.py
index e401ea55d2..3b6b6b6cf7 100644
--- a/tests/packagedcode/test_npm.py
+++ b/tests/packagedcode/test_npm.py
@@ -28,8 +28,6 @@
 
 import os.path
 
-from commoncode.system import py2
-from commoncode.system import py3
 from packagedcode import npm
 from scancode.resource import Codebase
 from packages_test_utils import PackageTester
@@ -144,10 +142,7 @@ def test_parse_invalid_json(self):
         try:
             npm.parse(test_file)
         except ValueError as e:
-            if py2:
-                assert 'No JSON object could be decoded' in str(e)
-            if py3:
-                assert 'Expecting value: line 60 column 3' in str(e)
+            assert 'Expecting value: line 60 column 3' in str(e)
 
     def test_parse_keywords(self):
         test_file = self.get_test_loc('npm/keywords/package.json')
diff --git a/tests/packagedcode/test_plugin.py b/tests/packagedcode/test_plugin.py
index dc4db2fa7c..99bba503e7 100644
--- a/tests/packagedcode/test_plugin.py
+++ b/tests/packagedcode/test_plugin.py
@@ -30,8 +30,6 @@
 from unittest.case import skipIf
 
 from commoncode.system import on_windows
-from commoncode.system import py2
-from commoncode.system import py3
 from packages_test_utils import PackageTester
 from scancode.cli_test_utils import check_json_scan
 from scancode.cli_test_utils import run_scan_click
@@ -44,10 +42,7 @@ def test_package_list_command(self, regen=False):
         expected_file = self.get_test_loc('plugin/help.txt')
         result = run_scan_click(['--list-packages'])
         if regen:
-            if py2:
-                wmode = 'wb'
-            if py3:
-                wmode = 'w'
+            wmode = 'w'
             with open(expected_file, wmode) as ef:
                 ef.write(result.output)
         assert open(expected_file).read() == result.output
diff --git a/tests/packagedcode/test_rpm.py b/tests/packagedcode/test_rpm.py
index d3390f85f7..452bf12be6 100644
--- a/tests/packagedcode/test_rpm.py
+++ b/tests/packagedcode/test_rpm.py
@@ -35,8 +35,6 @@
 from commoncode import compat
 from commoncode.testcase import FileBasedTesting
 from commoncode.system import on_linux
-from commoncode.system import py2
-from commoncode.system import py3
 from packagedcode import rpm
 
 
@@ -157,10 +155,7 @@ def test_packagedcode_rpm_tags_and_info_on_non_rpm_file(self):
 
 def check_json(result, expected_file, regen=False):
     if regen:
-        if py2:
-            mode = 'wb'
-        if py3:
-            mode = 'w'
+        mode = 'w'
         with io.open(expected_file, mode) as reg:
             reg.write(json.dumps(result, indent=4, separators=(',', ': ')))
 
@@ -173,7 +168,7 @@ class TestRpmTags(FileBasedTesting):
     test_data_dir = os.path.join(os.path.dirname(__file__), 'data')
 
     def check_rpm_tags(self, test_file):
-        suffix = b'-expected.json' if on_linux and py2 else '-expected.json'
+        suffix = '-expected.json'
         expected_file = test_file + suffix
         result = rpm.get_rpm_tags(test_file)._asdict()
         check_json(result, expected_file, regen=False)
diff --git a/tests/packagedcode/test_rubygems.py b/tests/packagedcode/test_rubygems.py
index 28337475ee..b0a9798293 100644
--- a/tests/packagedcode/test_rubygems.py
+++ b/tests/packagedcode/test_rubygems.py
@@ -35,8 +35,6 @@
 import saneyaml
 
 from commoncode import compat
-from commoncode.system import py2
-from commoncode.system import py3
 from commoncode import text
 from commoncode.testcase import FileBasedTesting
 from packagedcode import rubygems
@@ -67,10 +65,7 @@ def check_gemspec(self, test_loc, expected_loc, regen=False):
             pass
 
         if regen:
-            if py2:
-                mode = 'wb'
-            if py3:
-                mode = 'w'
+            mode = 'w'
             with open(expected_loc, mode) as ex:
                 json.dump(results, ex, indent=2)
         with io.open(expected_loc, encoding='UTF-8') as ex:
@@ -145,10 +140,7 @@ def check_rubygem(self):
         package.license_expression = package.compute_normalized_license()
         package = [package.to_dict()]
         if regen:
-            if py2:
-                wmode = 'wb'
-            if py3:
-                wmode = 'w'
+            wmode = 'w'
             with io.open(expected_json_loc, wmode) as ex:
                 json.dump(package, ex, indent=2)
 
@@ -156,9 +148,7 @@ def check_rubygem(self):
             expected = json.load(ex, object_pairs_hook=OrderedDict)
         assert expected == package
 
-    if py2 and isinstance(test_name, compat.unicode):
-        test_name = test_name.encode('utf-8')
-    if py3 and isinstance(test_name, bytes):
+    if isinstance(test_name, bytes):
         test_name = test_name.decode('utf-8')
 
     check_rubygem.__name__ = test_name
diff --git a/tests/packagedcode/test_win_pe.py b/tests/packagedcode/test_win_pe.py
index b71698567c..02f9762cb5 100644
--- a/tests/packagedcode/test_win_pe.py
+++ b/tests/packagedcode/test_win_pe.py
@@ -31,8 +31,6 @@
 import json
 import os
 
-from commoncode.system import py2
-from commoncode.system import py3
 from commoncode.testcase import FileBasedTesting
 from packagedcode import win_pe
 
@@ -43,10 +41,7 @@ class TestWinPe(FileBasedTesting):
     def check_win_pe(self, test_file, expected_file, regen=False):
         result = win_pe.pe_info(test_file, include_extra_data=True)
         if regen:
-            if py2:
-                mode = 'wb'
-            if py3:
-                mode = 'w'
+            mode = 'w'
             with open(expected_file, mode) as out:
                 json.dump(result, out, indent=2)
 
diff --git a/tests/scancode/test_scancode_checks.py b/tests/scancode/test_scancode_checks.py
index 746206c39d..2c3ca611ca 100644
--- a/tests/scancode/test_scancode_checks.py
+++ b/tests/scancode/test_scancode_checks.py
@@ -47,9 +47,6 @@
 
 @unittest.skipIf(not on_linux, 'Check about files only on one OS')
 class TestCheckAboutFiles(unittest.TestCase):
-    def test_about_files_thirdparty(self):
-        subprocess.check_output('bin/about check thirdparty/'.split(), cwd=root_dir)
-
     def test_about_files_src(self):
         subprocess.check_output('bin/about check src/'.split(), cwd=root_dir)
 
diff --git a/tests/summarycode/test_score.py b/tests/summarycode/test_score.py
index fc9cc7091a..59a878f915 100644
--- a/tests/summarycode/test_score.py
+++ b/tests/summarycode/test_score.py
@@ -34,8 +34,6 @@
 import pytest
 
 from commoncode import compat
-from commoncode.system import py2
-from commoncode.system import py3
 from commoncode.testcase import FileDrivenTesting
 from commoncode.text import python_safe_name
 from scancode.cli_test_utils import check_json_scan
@@ -79,9 +77,7 @@ def closure_test_function(*args, **kwargs):
 
     test_name = 'test_license_clarity_score_%(test_name)s' % locals()
     test_name = python_safe_name(test_name)
-    if py2 and isinstance(test_name, compat.unicode):
-        test_name = test_name.encode('utf-8')
-    if py3 and isinstance(test_name, bytes):
+    if isinstance(test_name, bytes):
         test_name = test_name.decode('utf-8')
 
     closure_test_function.__name__ = test_name

From 08df9a7ccb04856e13387ca4bd4d346be49bddfe Mon Sep 17 00:00:00 2001
From: Philippe Ombredanne <pombredanne@nexb.com>
Date: Fri, 4 Dec 2020 14:17:03 +0100
Subject: [PATCH 2/5] Remove remaining references to Python 2 #295

This removes any references to Python 2 and the commoncode.compat module

Signed-off-by: Philippe Ombredanne <pombredanne@nexb.com>
---
 etc/scripts/scanserv.py                       |   3 +-
 etc/scripts/synclic.py                        |   3 +-
 src/cluecode/finder.py                        |   6 +-
 src/formattedcode/output_csv.py               |   5 +-
 src/formattedcode/output_html.py              |   5 +-
 src/formattedcode/output_json.py              |   3 +-
 src/formattedcode/output_spdx.py              |  16 +-
 src/licensedcode/index.py                     |   6 -
 src/licensedcode/spans.py                     |   7 +-
 src/licensedcode/tokenize.py                  |   9 -
 src/packagedcode/cocoapods.py                 |  18 +-
 src/packagedcode/pypi.py                      |   9 +-
 src/scancode/__init__.py                      |   1 -
 src/scancode/cli.py                           |  10 -
 src/scancode/interrupt.py                     |  19 +-
 src/scancode/plugin_ignore.py                 |   4 +-
 src/summarycode/copyright_summary.py          |   3 +-
 src/textcode/analysis.py                      |   6 +-
 tests/cluecode/cluecode_test_utils.py         |   1 -
 tests/cluecode/test_copyrights_fosso.py       |   3 +-
 tests/cluecode/test_finder.py                 |   3 +-
 .../data/licenses/2187-agpl/_json.py          |   2 +-
 tests/licensedcode/licensedcode_test_utils.py |   3 +-
 tests/licensedcode/licenses_test_utils.py     | 118 -----
 tests/licensedcode/test_detection_validate.py |   1 -
 tests/licensedcode/test_match.py              |  13 +-
 tests/licensedcode/test_query.py              |  21 +-
 tests/packagedcode/test_alpine.py             |   3 -
 tests/packagedcode/test_cocoapods.py          |   5 +-
 tests/packagedcode/test_debian.py             |   2 -
 tests/packagedcode/test_debian_copyright.py   |   6 -
 tests/packagedcode/test_jar_manifest.py       |   1 -
 tests/packagedcode/test_maven.py              |   1 -
 tests/packagedcode/test_pypi.py               |   8 +-
 tests/packagedcode/test_rpm.py                |   4 +-
 tests/packagedcode/test_rubygems.py           |   1 -
 .../data/non_utf8/expected-win-py2.json       | 496 ------------------
 ...xpected-win-py3.json => expected-win.json} |   0
 tests/scancode/test_cli.py                    |  62 +--
 tests/scancode/test_outdated.py               |   6 -
 tests/summarycode/test_score.py               |   1 -
 tests/summarycode/test_summarizer.py          |  12 +-
 tests/textcode/test_analysis.py               |  13 +-
 tests/textcode/test_strings.py                |   8 +-
 44 files changed, 90 insertions(+), 837 deletions(-)
 delete mode 100644 tests/licensedcode/licenses_test_utils.py
 delete mode 100644 tests/scancode/data/non_utf8/expected-win-py2.json
 rename tests/scancode/data/non_utf8/{expected-win-py3.json => expected-win.json} (100%)

diff --git a/etc/scripts/scanserv.py b/etc/scripts/scanserv.py
index aaa9fcd2c6..5aee4df20e 100644
--- a/etc/scripts/scanserv.py
+++ b/etc/scripts/scanserv.py
@@ -45,10 +45,9 @@ def run_scan(location, **kwargs):
 
 
 if __name__ == '__channelexec__':
-    from commoncode import compat
     for kwargs in channel:  # NOQA
         # a mapping of kwargs or a location string
-        if isinstance(kwargs, (str, compat.unicode)):
+        if isinstance(kwargs, (str, str)):
             channel.send(run_scan(kwargs))  # NOQA
         elif isinstance(kwargs, dict):
             channel.send(run_scan(**kwargs))  # NOQA
diff --git a/etc/scripts/synclic.py b/etc/scripts/synclic.py
index 5ce64d115d..d2d4897d3b 100644
--- a/etc/scripts/synclic.py
+++ b/etc/scripts/synclic.py
@@ -45,7 +45,6 @@
 
 from commoncode import fetch
 from commoncode import fileutils
-from commoncode import compat
 
 import licensedcode
 from licensedcode.models import load_licenses
@@ -898,7 +897,7 @@ def update_external(_attrib, _sc_val, _ext_val):
 
             continue
 
-        if (isinstance(scancode_value, compat.unicode) and isinstance(external_value, compat.unicode)):
+        if (isinstance(scancode_value, str) and isinstance(external_value, str)):
             # keep the stripped and normalized spaces value
             # normalized spaces
             normalized_scancode_value = ' '.join(scancode_value.split())
diff --git a/src/cluecode/finder.py b/src/cluecode/finder.py
index eca603860e..9b69bc9979 100644
--- a/src/cluecode/finder.py
+++ b/src/cluecode/finder.py
@@ -33,8 +33,6 @@
 from six import string_types
 import urlpy
 
-from commoncode import compat
-from commoncode.system import py3
 from commoncode.text import toascii
 from cluecode import finder_data
 from textcode import analysis
@@ -251,7 +249,7 @@ def find_urls(location, unique=True):
         if TRACE_URL:
             logger_debug('find_urls: lineno:', lineno, '_line:', repr(_line),
                          'type(url):', type(url), 'url:', repr(url))
-        yield compat.unicode(url), lineno
+        yield str(url), lineno
 
 
 EMPTY_URLS = set(['https', 'http', 'ftp', 'www', ])
@@ -462,7 +460,7 @@ def get_ip(s):
         return False
 
     try:
-        ip = ipaddress.ip_address(compat.unicode(s))
+        ip = ipaddress.ip_address(str(s))
         return ip
     except ValueError:
         return False
diff --git a/src/formattedcode/output_csv.py b/src/formattedcode/output_csv.py
index bbf4f265fd..c46104d051 100644
--- a/src/formattedcode/output_csv.py
+++ b/src/formattedcode/output_csv.py
@@ -33,7 +33,6 @@
 from six import string_types
 import unicodecsv
 
-from commoncode import compat
 from formattedcode import FileOptionType
 from plugincode.output import output_impl
 from plugincode.output import OutputPlugin
@@ -320,7 +319,7 @@ def flatten_package(_package, path, prefix='package__'):
                     if isinstance(component_val, list):
                         component_val = '\n'.join(component_val)
 
-                    if not isinstance(component_val, compat.unicode):
+                    if not isinstance(component_val, str):
                         component_val = repr(component_val)
 
                     existing = pack.get(component_new_key) or []
@@ -338,7 +337,7 @@ def flatten_package(_package, path, prefix='package__'):
 
         pack[nk] = ''
 
-        if isinstance(val, compat.unicode):
+        if isinstance(val, str):
             pack[nk] = val
         else:
             # Use repr if not a string
diff --git a/src/formattedcode/output_html.py b/src/formattedcode/output_html.py
index da9f672eb2..60f46dcfd3 100644
--- a/src/formattedcode/output_html.py
+++ b/src/formattedcode/output_html.py
@@ -40,16 +40,13 @@
 import click
 import simplejson
 
-from commoncode import compat
 from commoncode.fileutils import PATH_TYPE
 from commoncode.fileutils import as_posixpath
 from commoncode.fileutils import copytree
 from commoncode.fileutils import delete
 from commoncode.fileutils import file_name
 from commoncode.fileutils import file_base_name
-from commoncode.fileutils import fsencode
 from commoncode.fileutils import parent_directory
-from commoncode.system import on_linux
 from formattedcode import FileOptionType
 from commoncode.cliutils import PluggableCommandLineOption
 from commoncode.cliutils import OUTPUT_GROUP
@@ -133,7 +130,7 @@ def write_templated(output_file, results, version, template_loc):
     template = get_template(template_loc)
 
     for template_chunk in generate_output(results, version, template):
-        assert isinstance(template_chunk, compat.unicode)
+        assert isinstance(template_chunk, str)
         try:
             output_file.write(template_chunk)
         except Exception:
diff --git a/src/formattedcode/output_json.py b/src/formattedcode/output_json.py
index 5c7dd3f9ca..7c59f54d14 100644
--- a/src/formattedcode/output_json.py
+++ b/src/formattedcode/output_json.py
@@ -134,8 +134,7 @@ def write_results(codebase, output_file, pretty=False, **kwargs):
 
         # Write files
         codebase_files = OutputPlugin.get_files(codebase, **kwargs)
-        # OutputPlugin.get_files() returns a `map()`, which isn's JSON
-        # serializable in Python 3
+        # OutputPlugin.get_files() returns a generator, not JSON-serializable
         codebase_files = list(codebase_files)
         s.write('files', codebase_files)
 
diff --git a/src/formattedcode/output_spdx.py b/src/formattedcode/output_spdx.py
index dac4878c38..cef1806011 100644
--- a/src/formattedcode/output_spdx.py
+++ b/src/formattedcode/output_spdx.py
@@ -196,8 +196,8 @@ def write_spdx(output_file, files, tool_name, tool_version, notice, input_file,
     """
     Write scan output as SPDX Tag/value or RDF.
     """
+    as_rdf = not as_tagvalue
     _patch_license_list()
-
     absinput = abspath(input_file)
 
     if isdir(absinput):
@@ -341,24 +341,20 @@ def write_spdx(output_file, files, tool_name, tool_version, notice, input_file,
 
         if as_tagvalue:
             from spdx.writers.tagvalue import write_document  # NOQA
-        else:
+        elif as_rdf:
             from spdx.writers.rdf import write_document  # NOQA
 
         if as_tagvalue:
-            # unicode text everywhere
             spdx_output = StringIO()
-        else:
-            # rdf as utf-encoded bytes on Py2
+        elif as_rdf:
+            # rdf is utf-encoded bytes
             spdx_output = BytesIO()
 
         write_document(doc, spdx_output, validate=False)
         result = spdx_output.getvalue()
 
-        if as_tagvalue:
-            # unicode text everywhere
-            pass
-        else:
-            # rdf as utf-encoded bytes on Py2
+        if as_rdf:
+            # rdf is utf-encoded bytes
             result = result.decode('utf-8')
 
         output_file.write(result)
diff --git a/src/licensedcode/index.py b/src/licensedcode/index.py
index 641f52fe3c..6fceca9707 100644
--- a/src/licensedcode/index.py
+++ b/src/licensedcode/index.py
@@ -41,12 +41,6 @@
 import sys
 from time import time
 
-# Python 2 and 3 support
-try:
-    import itertools.izip as zip  # NOQA
-except ImportError:
-    pass
-
 from intbitset import intbitset
 from six import string_types
 
diff --git a/src/licensedcode/spans.py b/src/licensedcode/spans.py
index 397d5a907b..cbed98677a 100644
--- a/src/licensedcode/spans.py
+++ b/src/licensedcode/spans.py
@@ -37,9 +37,6 @@
 
 from intbitset import intbitset
 
-from commoncode import compat
-
-
 """
 Ranges and intervals of integers using bitmaps.
 Used as a compact and faster data structure for token and position sets.
@@ -110,7 +107,7 @@ def __init__(self, *args):
 
         elif len_args == 1:
             # args0 is a single int or an iterable of ints
-            if isinstance(args[0], compat.integer_types):
+            if isinstance(args[0], int):
                 self._set = intbitset(args)
             else:
                 # some sequence or iterable
@@ -207,7 +204,7 @@ def __contains__(self, other):
         if isinstance(other, Span):
             return self._set.issuperset(other._set)
 
-        if isinstance(other, compat.integer_types):
+        if isinstance(other, int):
             return self._set.__contains__(other)
 
         if isinstance(other, (set, frozenset)):
diff --git a/src/licensedcode/tokenize.py b/src/licensedcode/tokenize.py
index a75d2ac02a..0ba58ca501 100644
--- a/src/licensedcode/tokenize.py
+++ b/src/licensedcode/tokenize.py
@@ -28,15 +28,6 @@
 from __future__ import unicode_literals
 
 from itertools import islice
-
-# Python 2 and 3 support
-try:
-    # Python 2
-    import itertools.izip as zip  # NOQA
-except ImportError:
-    # Python 3
-    pass
-
 from binascii import crc32
 import re
 
diff --git a/src/packagedcode/cocoapods.py b/src/packagedcode/cocoapods.py
index 587f0b831b..50e94840a9 100644
--- a/src/packagedcode/cocoapods.py
+++ b/src/packagedcode/cocoapods.py
@@ -29,15 +29,11 @@
 import re
 
 import attr
-from packageurl import PackageURL
 
-from commoncode.fileutils import py2
 from commoncode import filetype
-from commoncode import fileutils
 from packagedcode import models
 from packagedcode.spec import Spec
 
-
 """
 Handle cocoapods packages manifests for macOS and iOS
 including .podspec, Podfile and Podfile.lock files.
@@ -46,7 +42,6 @@
 
 # TODO: override the license detection to detect declared_license correctly.
 
-
 TRACE = False
 
 logger = logging.getLogger(__name__)
@@ -104,11 +99,16 @@ def build_package(podspec_data):
     name = podspec_data.get('name')
     version = podspec_data.get('version')
     declared_license = podspec_data.get('license')
-    summary = podspec_data.get('summary')
-    description = podspec_data.get('description')
+    summary = podspec_data.get('summary', '')
+    description = podspec_data.get('description', '')
     homepage_url = podspec_data.get('homepage_url')
     source = podspec_data.get('source')
     authors = podspec_data.get('author') or []
+    if summary and not description.startswith(summary):
+        desc = [summary]
+        if description:
+            desc += [description]
+        description = '\n'.join(desc)
 
     author_names = []
     author_email = []
@@ -166,7 +166,7 @@ def party_mapper(author, email):
 def parse_person(person):
     """
     Return name and email from person string.
-    
+
     https://guides.cocoapods.org/syntax/podspec.html#authors
     Author can be in the form:
         s.author = 'Rohit Potter'
@@ -187,4 +187,4 @@ def parse_person(person):
         name = parsed.group('name')
         email = parsed.group('email')
 
-    return name, email 
\ No newline at end of file
+    return name, email
diff --git a/src/packagedcode/pypi.py b/src/packagedcode/pypi.py
index 8db86bc121..6e6870a99e 100644
--- a/src/packagedcode/pypi.py
+++ b/src/packagedcode/pypi.py
@@ -52,13 +52,6 @@
 from packagedcode.utils import build_description
 from packagedcode.utils import combine_expressions
 
-try:
-    # Python 2
-    unicode = unicode  # NOQA
-
-except NameError:  # pragma: nocover
-    # Python 3
-    unicode = str  # NOQA
 
 """
 Detect and collect Python packages information.
@@ -774,7 +767,7 @@ def compute_normalized_license(declared_license):
         values = list(declared_license.values())
     elif isinstance(declared_license, list):
         values = list(declared_license)
-    elif isinstance(declared_license, (str, unicode,)):
+    elif isinstance(declared_license, str):
         values = [declared_license]
     else:
         return
diff --git a/src/scancode/__init__.py b/src/scancode/__init__.py
index 0f560cbe10..7fce143aa4 100644
--- a/src/scancode/__init__.py
+++ b/src/scancode/__init__.py
@@ -35,7 +35,6 @@
 from click.types import BoolParamType
 from six import string_types
 
-from commoncode import compat
 from commoncode import fileutils
 
 # Tracing flags
diff --git a/src/scancode/cli.py b/src/scancode/cli.py
index 752ca098b9..76ffdc8524 100644
--- a/src/scancode/cli.py
+++ b/src/scancode/cli.py
@@ -43,14 +43,6 @@
 from time import time
 import traceback
 
-# Python 2 and 3 support
-try:
-    # Python 2
-    import itertools.imap as map  # NOQA
-except ImportError:
-    # Python 3
-    pass
-
 # this exception is not available on posix
 try:
     WindowsError  # NOQA
@@ -68,7 +60,6 @@ class WindowsError(Exception):
 from commoncode.cliutils import path_progress_message
 from commoncode.cliutils import progressmanager
 from commoncode.cliutils import PluggableCommandLineOption
-from commoncode import compat
 from commoncode.fileutils import as_posixpath
 from commoncode.fileutils import PATH_TYPE
 from commoncode.fileutils import POSIX_PATH_SEP
@@ -76,7 +67,6 @@ class WindowsError(Exception):
 from commoncode.resource import Codebase
 from commoncode.resource import VirtualCodebase
 from commoncode.system import on_windows
-from commoncode.system import on_linux
 
 # these are important to register plugin managers
 from plugincode import PluginManager
diff --git a/src/scancode/interrupt.py b/src/scancode/interrupt.py
index e1cc60c744..84877ca666 100644
--- a/src/scancode/interrupt.py
+++ b/src/scancode/interrupt.py
@@ -51,7 +51,6 @@ class TimeoutError(Exception):  # NOQA
 NO_ERROR = None
 NO_VALUE = None
 
-
 if not on_windows:
     """
     Some code based in part and inspired from the RobotFramework and
@@ -113,21 +112,9 @@ def handler(signum, frame):
     from ctypes import pythonapi
     from multiprocessing import TimeoutError as MpTimeoutError
 
-    try:
-        # python 3
-        from queue import Empty as Queue_Empty  # NOQA
-        from queue import Queue  # NOQA
-    except:
-        # python 2
-        from Queue import Empty as Queue_Empty  # NOQA
-        from Queue import Queue  # NOQA
-
-    try:
-        # python 3
-        from _thread import start_new_thread
-    except ImportError:
-        # python 2
-        from thread import start_new_thread
+    from queue import Empty as Queue_Empty
+    from queue import Queue
+    from _thread import start_new_thread
 
     def interruptible(func, args=None, kwargs=None, timeout=DEFAULT_TIMEOUT):
         """
diff --git a/src/scancode/plugin_ignore.py b/src/scancode/plugin_ignore.py
index f0e03fefda..56b7155281 100644
--- a/src/scancode/plugin_ignore.py
+++ b/src/scancode/plugin_ignore.py
@@ -32,7 +32,6 @@
 from plugincode.pre_scan import pre_scan_impl
 from commoncode.cliutils import PluggableCommandLineOption
 from commoncode.cliutils import PRE_SCAN_GROUP
-from commoncode import compat
 
 
 # Tracing flags
@@ -53,8 +52,7 @@ def logger_debug(*args):
     logger.setLevel(logging.DEBUG)
 
     def logger_debug(*args):
-        return logger.debug(
-            ' '.join(isinstance(a, compat.unicode) and a or repr(a) for a in args))
+        return logger.debug(' '.join(isinstance(a, str) and a or repr(a) for a in args))
 
 
 @pre_scan_impl
diff --git a/src/summarycode/copyright_summary.py b/src/summarycode/copyright_summary.py
index 42b6f94f17..dd19669b35 100644
--- a/src/summarycode/copyright_summary.py
+++ b/src/summarycode/copyright_summary.py
@@ -36,7 +36,6 @@
 from text_unidecode import unidecode
 
 from cluecode.copyrights import CopyrightDetector
-from commoncode import compat
 from commoncode.text import toascii
 from summarycode.utils import sorted_counter
 from summarycode.utils import get_resource_summary
@@ -164,7 +163,7 @@ def transliterate(self):
 
     def fingerprint(self):
         key = self.key
-        if not isinstance(key, compat.unicode):
+        if not isinstance(key, str):
             key = unidecode(key)
         fp = fingerprints.generate(key)
 
diff --git a/src/textcode/analysis.py b/src/textcode/analysis.py
index 0ada972d67..c767b383c9 100644
--- a/src/textcode/analysis.py
+++ b/src/textcode/analysis.py
@@ -36,8 +36,6 @@
 import chardet
 from six import string_types
 
-from commoncode import compat
-from commoncode.system import on_linux
 from textcode import pdf
 from textcode import markup
 from textcode import sfdb
@@ -249,7 +247,7 @@ def as_unicode(line):
 
     TODO: Add file/magic detection, unicodedmanit/BS3/4
     """
-    if isinstance(line, compat.unicode):
+    if isinstance(line, str):
         return remove_null_bytes(line)
 
     try:
@@ -270,7 +268,7 @@ def as_unicode(line):
             except UnicodeDecodeError:
                 try:
                     enc = chardet.detect(line)['encoding']
-                    s = compat.unicode(line, enc)
+                    s = str(line, enc)
                 except UnicodeDecodeError:
                     # fall-back to strings extraction if all else fails
                     s = strings.string_from_string(s)
diff --git a/tests/cluecode/cluecode_test_utils.py b/tests/cluecode/cluecode_test_utils.py
index dd2410e3d6..6e256c1d4a 100644
--- a/tests/cluecode/cluecode_test_utils.py
+++ b/tests/cluecode/cluecode_test_utils.py
@@ -35,7 +35,6 @@
 import pytest
 
 import cluecode.copyrights
-from commoncode import compat
 from commoncode import saneyaml
 from commoncode.testcase import FileDrivenTesting
 from commoncode.testcase import get_test_file_pairs
diff --git a/tests/cluecode/test_copyrights_fosso.py b/tests/cluecode/test_copyrights_fosso.py
index f8f82927ce..9f0e40f1d6 100644
--- a/tests/cluecode/test_copyrights_fosso.py
+++ b/tests/cluecode/test_copyrights_fosso.py
@@ -34,7 +34,6 @@
 import pytest
 
 import cluecode_test_utils
-from commoncode import compat
 from commoncode.testcase import FileDrivenTesting
 from commoncode.text import python_safe_name
 
@@ -136,7 +135,7 @@ def build_copyright_test_methods_with_fossology_data():
         tfn = test_file.replace(test_data_dir, '').strip('\\/\\')
         test_name = 'test_fossology_copyright_%(tfn)s' % locals()
         test_name = python_safe_name(test_name)
-        if not isinstance(test_name, compat.unicode):
+        if not isinstance(test_name, str):
             test_name = test_name.decode('utf-8')
 
         test_method.__name__ = test_name
diff --git a/tests/cluecode/test_finder.py b/tests/cluecode/test_finder.py
index 63a89b24c5..18d43ca1d8 100644
--- a/tests/cluecode/test_finder.py
+++ b/tests/cluecode/test_finder.py
@@ -34,7 +34,6 @@
 import pytest
 
 from commoncode.testcase import FileBasedTesting
-from commoncode import compat
 from cluecode import finder
 from cluecode.finder import find
 from cluecode.finder import urls_regex
@@ -763,7 +762,7 @@ def test_find_in_go_does_not_crash_with_unicode_error(self):
         test_file = self.get_test_loc('finder/url/verify.go')
         patterns = [('urls', urls_regex(),)]
         for _key, url, _line, _lineno in find(test_file, patterns):
-            assert type(url) == compat.unicode
+            assert type(url) == str
 
 
 class TestSearch(FileBasedTesting):
diff --git a/tests/licensedcode/data/licenses/2187-agpl/_json.py b/tests/licensedcode/data/licenses/2187-agpl/_json.py
index eac37972dc..fc58fc1481 100644
--- a/tests/licensedcode/data/licenses/2187-agpl/_json.py
+++ b/tests/licensedcode/data/licenses/2187-agpl/_json.py
@@ -87,7 +87,7 @@ def __str__(self):
             return self.getquoted()
     else:
         def __str__(self):
-            # getquoted is binary in Py3
+            # getquoted is binary
             return self.getquoted().decode('ascii', 'replace')
 
 
diff --git a/tests/licensedcode/licensedcode_test_utils.py b/tests/licensedcode/licensedcode_test_utils.py
index eaf21cb385..1af971513e 100644
--- a/tests/licensedcode/licensedcode_test_utils.py
+++ b/tests/licensedcode/licensedcode_test_utils.py
@@ -35,7 +35,6 @@
 from license_expression import Licensing
 import pytest
 
-from commoncode import compat
 from commoncode import saneyaml
 from commoncode import text
 from commoncode.testcase import get_test_file_pairs
@@ -151,7 +150,7 @@ def get_test_method_name(self, prefix='test_detection_'):
         test_file_name = self.test_file_name
         test_name = '{prefix}{test_file_name}'.format(**locals())
         test_name = text.python_safe_name(test_name)
-        if not isinstance(test_name, compat.unicode):
+        if not isinstance(test_name, str):
             test_name = test_name.decode('utf-8')
         return test_name
 
diff --git a/tests/licensedcode/licenses_test_utils.py b/tests/licensedcode/licenses_test_utils.py
deleted file mode 100644
index ed6312b00c..0000000000
--- a/tests/licensedcode/licenses_test_utils.py
+++ /dev/null
@@ -1,118 +0,0 @@
-#
-# Copyright (c) 2016 nexB Inc. and others. All rights reserved.
-# http://nexb.com and https://github.com/nexB/scancode-toolkit/
-# The ScanCode software is licensed under the Apache License version 2.0.
-# Data generated with ScanCode require an acknowledgment.
-# ScanCode is a trademark of nexB Inc.
-#
-# You may not use this software except in compliance with the License.
-# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software distributed
-# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations under the License.
-#
-# When you publish or redistribute any data created with ScanCode or any ScanCode
-# derivative work, you must accompany this data with the following acknowledgment:
-#
-#  Generated with ScanCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES
-#  OR CONDITIONS OF ANY KIND, either express or implied. No content created from
-#  ScanCode should be considered or used as legal advice. Consult an Attorney
-#  for any legal advice.
-#  ScanCode is a free software code scanning tool from nexB Inc. and others.
-#  Visit https://github.com/nexB/scancode-toolkit/ for support and download.
-
-from __future__ import absolute_import
-from __future__ import print_function
-from __future__ import unicode_literals
-
-from collections import OrderedDict
-import os.path
-import json
-import shutil
-
-from commoncode.system import py2
-from commoncode.system import py3
-from commoncode import testcase
-
-
-class PackageTester(testcase.FileBasedTesting):
-    test_data_dir = os.path.join(os.path.dirname(__file__), 'data')
-
-    def check_package(self, package, expected_loc, regen=False):
-        """
-        Helper to test a package object against an expected JSON file.
-        """
-        expected_loc = self.get_test_loc(expected_loc)
-
-        package.license_expression = package.compute_normalized_license()
-        results = package.to_dict()
-
-        if regen:
-            regened_exp_loc = self.get_temp_file()
-            if py2:
-                wmode = 'wb'
-            if py3:
-                wmode = 'w'
-            with open(regened_exp_loc, wmode) as ex:
-                json.dump(results, ex, indent=2, separators=(',', ': '))
-
-            expected_dir = os.path.dirname(expected_loc)
-            if not os.path.exists(expected_dir):
-                os.makedirs(expected_dir)
-            shutil.copy(regened_exp_loc, expected_loc)
-
-        with open(expected_loc, 'rb') as ex:
-            expected = json.load(ex, encoding='utf-8', object_pairs_hook=OrderedDict)
-
-        try:
-            assert expected == results
-        except AssertionError:
-            assert json.dumps(expected, indent=2) == json.dumps(results, indent=2)
-
-    def check_packages(self, packages, expected_loc, regen=False):
-        """
-        Helper to test multiple package objects against an expected JSON file.
-        """
-        expected_loc = self.get_test_loc(expected_loc)
-
-        results = []
-        for package in packages:
-            package.license_expression = package.compute_normalized_license()
-            results.append(package.to_dict())
-
-        if regen:
-            regened_exp_loc = self.get_temp_file()
-            if py2:
-                wmode = 'wb'
-            if py3:
-                wmode = 'w'
-            with open(regened_exp_loc, wmode) as ex:
-                json.dump(results, ex, indent=2, separators=(',', ': '))
-
-            expected_dir = os.path.dirname(expected_loc)
-            if not os.path.exists(expected_dir):
-                os.makedirs(expected_dir)
-            shutil.copy(regened_exp_loc, expected_loc)
-
-        with open(expected_loc, 'rb') as ex:
-            expected_packages = json.load(ex, encoding='utf-8', object_pairs_hook=OrderedDict)
-
-        for expected_package, result in zip(expected_packages, results):
-            assert expected_package == result
-
-
-def check_result_equals_expected_json(result, expected, regen=False):
-    """
-    Check equality between a result collection and an expected JSON file.
-    Regen the expected file if regen is True.
-    """
-    if regen:
-        with open(expected, 'w') as ex:
-            ex.write(json.dumps(result, indent=2))
-
-    with open(expected) as ex:
-        expected = json.loads(ex.read())
-
-    assert expected == result
-
diff --git a/tests/licensedcode/test_detection_validate.py b/tests/licensedcode/test_detection_validate.py
index c84fa408cc..9159fc2a3f 100644
--- a/tests/licensedcode/test_detection_validate.py
+++ b/tests/licensedcode/test_detection_validate.py
@@ -33,7 +33,6 @@
 import pytest
 import saneyaml
 
-from commoncode import compat
 from commoncode.functional import flatten
 from commoncode import text
 from licensedcode import cache
diff --git a/tests/licensedcode/test_match.py b/tests/licensedcode/test_match.py
index 2aa1f1e19e..1bf493d133 100644
--- a/tests/licensedcode/test_match.py
+++ b/tests/licensedcode/test_match.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 #
-# Copyright (c) 2017 nexB Inc. and others. All rights reserved.
+# Copyright (c) nexB Inc. and others. All rights reserved.
 # http://nexb.com and https://github.com/nexB/scancode-toolkit/
 # The ScanCode software is licensed under the Apache License version 2.0.
 # Data generated with ScanCode require an acknowledgment.
@@ -28,10 +28,9 @@
 from __future__ import unicode_literals
 
 import os
-from unittest import skipIf
+
 import pytest
 
-from commoncode.system import py2
 from commoncode.testcase import FileBasedTesting
 from licensedcode import cache
 from licensedcode import index
@@ -50,7 +49,6 @@
 from licensedcode.models import load_rules
 from licensedcode.spans import Span
 
-
 TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), 'data')
 
 
@@ -99,7 +97,6 @@ def test_LicenseMatch_equality_2(self):
 
         assert m3_r3 != m4_r4
 
-
     def test_LicenseMatch_not_equal(self):
         r1 = Rule(text_file='r1', license_expression='apache-1.0 OR gpl')
         m1 = LicenseMatch(rule=r1, qspan=Span(0, 2), ispan=Span(0, 2))
@@ -541,7 +538,6 @@ def test_filter_overlapping_matches_filters_multiple_contained_matches(self):
         assert [m1] == matches
         assert sorted([m5, contained1, contained2, ]) == sorted(discarded)
 
-
     def test_merge_does_not_merge_matches_with_same_spans_if_licenses_are_identical_but_rule_differ(self):
         r1 = Rule(text_file='r1', license_expression='apache-2.0')
         m1 = LicenseMatch(rule=r1, qspan=Span(0, 2), ispan=Span(0, 2))
@@ -972,7 +968,6 @@ def test_get_full_matched_text(self):
         matched_text = match.matched_text(_usecache=False)
         assert expected.rstrip() == matched_text
 
-
         # test again using some HTML with tags
         # Note that there is a trailing space in that string
         expected = u"""Copyright <br>2003</br> (<br>C</br>) <br>James</br>. <br>All</br> <br>Rights</br> <br>Reserved</br>.
@@ -1086,7 +1081,6 @@ def test_tokenize_matched_text_does_return_correct_tokens(self):
 
         assert expected == result
 
-    @skipIf(py2, 'This complex unicode test is not worth testing on Python2')
     def test_tokenize_matched_text_does_not_crash_on_turkish_unicode(self):
         querys = u'İrəli'
         result = tokenize_matched_text(location=None, query_string=querys, dictionary={})
@@ -1097,7 +1091,6 @@ def test_tokenize_matched_text_does_not_crash_on_turkish_unicode(self):
         ]
         assert expected == result
 
-    @skipIf(py2, 'This complex unicode test is not worth testing on Python2')
     def test_tokenize_matched_text_behaves_like_query_tokenizer_on_turkish_unicode(self):
         from licensedcode.tokenize import query_tokenizer
         querys = u'İrəli'
@@ -1240,7 +1233,6 @@ def test_matched_text_is_not_truncated_with_unicode_diacritic_input_from_query_w
         matched_text = match.matched_text(_usecache=False, whole_lines=True)
         assert expected == matched_text
 
-    @skipIf(py2, 'This complex unicode test is not worth testing on Python2')
     def test_matched_text_is_not_truncated_with_unicode_diacritic_input_with_diacritic_in_rules(self):
         rule_dir = self.get_test_loc('match/turkish_unicode/rules')
         idx = index.LicenseIndex(load_rules(rule_dir))
@@ -1261,7 +1253,6 @@ def test_matched_text_is_not_truncated_with_unicode_diacritic_input_with_diacrit
 
         assert expected == matched_texts
 
-    @skipIf(py2, 'This complex unicode test is not worth testing on Python2')
     def test_matched_text_is_not_truncated_with_unicode_diacritic_input_and_full_index(self):
         idx = cache.get_index()
         query_loc = self.get_test_loc('match/turkish_unicode/query')
diff --git a/tests/licensedcode/test_query.py b/tests/licensedcode/test_query.py
index e7a94fa859..c0b28fb815 100644
--- a/tests/licensedcode/test_query.py
+++ b/tests/licensedcode/test_query.py
@@ -26,6 +26,7 @@
 from __future__ import print_function
 from __future__ import unicode_literals
 
+import json
 import os
 
 from commoncode.testcase import FileBasedTesting
@@ -34,12 +35,28 @@
 from licensedcode import models
 from licensedcode.models import Rule
 from licensedcode.query import Query
-from licenses_test_utils import check_result_equals_expected_json
 
 
 TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), 'data')
 
 
+
+
+def check_result_equals_expected_json(result, expected, regen=False):
+    """
+    Check equality between a result collection and an expected JSON file.
+    Regen the expected file if regen is True.
+    """
+    if regen:
+        with open(expected, 'w') as ex:
+            ex.write(json.dumps(result, indent=2))
+
+    with open(expected) as ex:
+        expected = json.loads(ex.read())
+
+    assert expected == result
+
+
 class IndexTesting(FileBasedTesting):
     test_data_dir = TEST_DATA_DIR
 
@@ -775,7 +792,7 @@ def test_query_run_for_text_with_long_lines(self):
         assert len(Query(location1, idx=idx).query_runs) == 17
         assert len(Query(location2, idx=idx).query_runs) == 15
 
-    def test_Query_tokens_by_line_behaves_the_same_on_python_2_and_python_3(self):
+    def test_Query_tokens_by_line_behaves_the_same_on_various_python_2(self):
         location = self.get_test_loc('query/query_lines/yahoo-eula.txt')
         idx = cache.get_index()
         query = Query(location, idx=idx)
diff --git a/tests/packagedcode/test_alpine.py b/tests/packagedcode/test_alpine.py
index 57e943477b..41391753f1 100644
--- a/tests/packagedcode/test_alpine.py
+++ b/tests/packagedcode/test_alpine.py
@@ -27,15 +27,12 @@
 from __future__ import unicode_literals
 
 import os.path
-from unittest.case import skipIf
 
-from commoncode.system import py2
 from packagedcode import alpine
 from packages_test_utils import check_result_equals_expected_json
 from packages_test_utils import PackageTester
 
 
-@skipIf(py2, 'Alpine linux package parsing is not worth testing on Python2')
 class TestAlpinePackage(PackageTester):
     test_data_dir = os.path.join(os.path.dirname(__file__), 'data')
 
diff --git a/tests/packagedcode/test_cocoapods.py b/tests/packagedcode/test_cocoapods.py
index a5d061bf08..ce1465fb33 100644
--- a/tests/packagedcode/test_cocoapods.py
+++ b/tests/packagedcode/test_cocoapods.py
@@ -27,14 +27,11 @@
 from __future__ import unicode_literals
 
 import os
-import pytest
 
-from commoncode.system import py2
 from packagedcode import cocoapods
 from packages_test_utils import PackageTester
 
 
-@pytest.mark.skipif(py2, reason='Does not pass on Python2')
 class TestRubyGemspec(PackageTester):
     test_data_dir = os.path.join(os.path.dirname(__file__), 'data')
 
@@ -66,4 +63,4 @@ def test_rubygems_can_parse_SwiftLib(self):
         test_file = self.get_test_loc('cocoapods/podspec/SwiftLib.podspec')
         expected_loc = self.get_test_loc('cocoapods/podspec/SwiftLib.podspec.expected.json')
         packages = cocoapods.parse(test_file)
-        self.check_package(packages, expected_loc, regen=False)
\ No newline at end of file
+        self.check_package(packages, expected_loc, regen=False)
diff --git a/tests/packagedcode/test_debian.py b/tests/packagedcode/test_debian.py
index 197158d565..8ef12f5f15 100644
--- a/tests/packagedcode/test_debian.py
+++ b/tests/packagedcode/test_debian.py
@@ -29,7 +29,6 @@
 import os.path
 from unittest.case import skipIf
 
-from commoncode.system import py2
 from commoncode.system import on_windows
 from packagedcode import debian
 from packagedcode import models
@@ -69,7 +68,6 @@ def test_parse_status_file_not_a_status_file(self):
         test_packages = list(debian.parse_status_file(test_file))
         assert [] == test_packages
 
-    @skipIf(py2, 'FileNotFoundError is not defined on Python2')
     def test_parse_status_file_non_existing_file(self):
         test_file = os.path.join(self.get_test_loc('debian'), 'foobarbaz')
         try:
diff --git a/tests/packagedcode/test_debian_copyright.py b/tests/packagedcode/test_debian_copyright.py
index aa4cbe6efc..bd273bccc8 100644
--- a/tests/packagedcode/test_debian_copyright.py
+++ b/tests/packagedcode/test_debian_copyright.py
@@ -29,9 +29,7 @@
 import json
 from os import path
 from os import walk
-from unittest.case import skipIf
 
-from commoncode.system import py2
 from commoncode.testcase import FileBasedTesting
 from commoncode import text
 from packagedcode import debian_copyright
@@ -98,9 +96,6 @@ def build_tests(test_dir, clazz, prefix='test_', regen=False):
     """
     test_data_dir = path.join(path.dirname(__file__), 'data')
     test_dir_loc = path.join(test_data_dir, test_dir)
-    if py2:
-        return
-
     # loop through all items and attach a test method to our test class
     for test_file in relative_walk(test_dir_loc):
         test_name = prefix + text.python_safe_name(test_file)
@@ -115,7 +110,6 @@ def build_tests(test_dir, clazz, prefix='test_', regen=False):
         setattr(clazz, test_name, test_method)
 
 
-@skipIf(py2, 'Only on Python3')
 class TestDebianCopyrightLicenseDetection(FileBasedTesting):
     # pytestmark = pytest.mark.scanslow
     test_data_dir = path.join(path.dirname(__file__), 'data')
diff --git a/tests/packagedcode/test_jar_manifest.py b/tests/packagedcode/test_jar_manifest.py
index aa7cb8ef7c..57a8594af6 100644
--- a/tests/packagedcode/test_jar_manifest.py
+++ b/tests/packagedcode/test_jar_manifest.py
@@ -31,7 +31,6 @@
 import json
 import os.path
 
-from commoncode import compat
 from commoncode import text
 from commoncode import testcase
 from packagedcode.jar_manifest import parse_manifest
diff --git a/tests/packagedcode/test_maven.py b/tests/packagedcode/test_maven.py
index 3163679e24..6d94927251 100644
--- a/tests/packagedcode/test_maven.py
+++ b/tests/packagedcode/test_maven.py
@@ -33,7 +33,6 @@
 
 import pytest
 
-from commoncode import compat
 from commoncode import fileutils
 from commoncode import text
 from commoncode import testcase
diff --git a/tests/packagedcode/test_pypi.py b/tests/packagedcode/test_pypi.py
index e750decc42..2c9638b3ba 100644
--- a/tests/packagedcode/test_pypi.py
+++ b/tests/packagedcode/test_pypi.py
@@ -35,8 +35,6 @@
 import pytest
 
 from commoncode.system import on_windows
-from commoncode.system import py2
-from commoncode.system import py3
 from packagedcode.models import DependentPackage
 from packagedcode import pypi
 from packages_test_utils import PackageTester
@@ -490,11 +488,7 @@ def test_parse_setup_py_with_computed_versions(self, test_loc, expected_loc, reg
             results = {}
 
         if regen:
-            if py2:
-                wmode = 'wb'
-            if py3:
-                wmode = 'w'
-            with open(expected_loc, wmode) as ex:
+            with open(expected_loc, 'w') as ex:
                 json.dump(results, ex, indent=2, separators=(',', ': '))
 
         with open(expected_loc, 'rb') as ex:
diff --git a/tests/packagedcode/test_rpm.py b/tests/packagedcode/test_rpm.py
index 452bf12be6..7c03aa7722 100644
--- a/tests/packagedcode/test_rpm.py
+++ b/tests/packagedcode/test_rpm.py
@@ -32,9 +32,7 @@
 import json
 import os
 
-from commoncode import compat
 from commoncode.testcase import FileBasedTesting
-from commoncode.system import on_linux
 from packagedcode import rpm
 
 
@@ -119,7 +117,7 @@ def test_pyrpm_basic(self):
 
         assert expected == alltags
         # tests that tags are all unicode
-        assert all([isinstance(v, compat.unicode) for v in alltags.values() if v])
+        assert all([isinstance(v, str) for v in alltags.values() if v])
 
     def test_get_rpm_tags_(self):
         test_file = self.get_test_loc('rpm/header/python-glc-0.7.1-1.src.rpm')
diff --git a/tests/packagedcode/test_rubygems.py b/tests/packagedcode/test_rubygems.py
index 0a1f0ee79a..8eca2a8fd6 100644
--- a/tests/packagedcode/test_rubygems.py
+++ b/tests/packagedcode/test_rubygems.py
@@ -35,7 +35,6 @@
 
 import saneyaml
 
-from commoncode import compat
 from commoncode import text
 from commoncode.testcase import FileBasedTesting
 from packagedcode import rubygems
diff --git a/tests/scancode/data/non_utf8/expected-win-py2.json b/tests/scancode/data/non_utf8/expected-win-py2.json
deleted file mode 100644
index 66212ac6e1..0000000000
--- a/tests/scancode/data/non_utf8/expected-win-py2.json
+++ /dev/null
@@ -1,496 +0,0 @@
-{
-  "headers": [
-    {
-      "tool_name": "scancode-toolkit",
-      "options": {
-        "input": "<path>",
-        "--info": true,
-        "--json": "<file>",
-        "--strip-root": true
-      },
-      "notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.",
-      "message": null,
-      "errors": [],
-      "extra_data": {
-        "files_count": 18
-      }
-    }
-  ],
-  "files": [
-    {
-      "path": "non_unicode",
-      "base_name": "non_unicode",
-      "date": null,
-      "extension": "",
-      "file_type": null,
-      "files_count": 18,
-      "dirs_count": 0,
-      "size_count": 0,
-      "is_archive": false,
-      "is_binary": false,
-      "is_media": false,
-      "is_script": false,
-      "is_source": false,
-      "is_text": false,
-      "md5": null,
-      "sha256": null,
-      "mime_type": null,
-      "name": "non_unicode",
-      "programming_language": null,
-      "scan_errors": [],
-      "sha1": null,
-      "size": 0,
-      "type": "directory"
-    },
-    {
-      "name": "foo\u00b1bar",
-      "base_name": "foo\u00b1bar",
-      "date": "2017-07-14",
-      "extension": "",
-      "file_type": "empty",
-      "files_count": 0,
-      "dirs_count": 0,
-      "size_count": 0,
-      "is_archive": false,
-      "is_binary": false,
-      "is_media": false,
-      "is_script": false,
-      "is_source": false,
-      "is_text": true,
-      "md5": null,
-      "sha256": null,
-      "mime_type": "inode/x-empty",
-      "path": "non_unicode/foo\u00b1bar",
-      "programming_language": null,
-      "scan_errors": [],
-      "sha1": null,
-      "size": 0,
-      "type": "file"
-    },
-    {
-      "base_name": "non_ascii_-\u00c3\u00a0\u00c3\u00b2\u00c9\u02dc\u00c5\u0081\u00c4\u0178",
-      "date": "2017-08-27",
-      "extension": "",
-      "file_type": "empty",
-      "files_count": 0,
-      "dirs_count": 0,
-      "size_count": 0,
-      "is_archive": false,
-      "is_binary": false,
-      "is_media": false,
-      "is_script": false,
-      "is_source": false,
-      "is_text": true,
-      "md5": null,
-      "sha256": null,
-      "mime_type": "inode/x-empty",
-      "name": "non_ascii_-\u00c3\u00a0\u00c3\u00b2\u00c9\u02dc\u00c5\u0081\u00c4\u0178",
-      "path": "non_unicode/non_ascii_-\u00c3\u00a0\u00c3\u00b2\u00c9\u02dc\u00c5\u0081\u00c4\u0178",
-      "programming_language": null,
-      "scan_errors": [],
-      "sha1": null,
-      "size": 0,
-      "type": "file"
-    },
-    {
-      "base_name": "non_ascii_10_\u00e0\u00b8\u0081",
-      "date": "2017-08-27",
-      "extension": "",
-      "file_type": "empty",
-      "files_count": 0,
-      "dirs_count": 0,
-      "size_count": 0,
-      "is_archive": false,
-      "is_binary": false,
-      "is_media": false,
-      "is_script": false,
-      "is_source": false,
-      "is_text": true,
-      "md5": null,
-      "sha256": null,
-      "mime_type": "inode/x-empty",
-      "name": "non_ascii_10_\u00e0\u00b8\u0081",
-      "path": "non_unicode/non_ascii_10_\u00e0\u00b8\u0081",
-      "programming_language": null,
-      "scan_errors": [],
-      "sha1": null,
-      "size": 0,
-      "type": "file"
-    },
-    {
-      "base_name": "non_ascii_11_\u00c2\u00a0",
-      "date": "2017-08-27",
-      "extension": "",
-      "file_type": "empty",
-      "files_count": 0,
-      "dirs_count": 0,
-      "size_count": 0,
-      "is_archive": false,
-      "is_binary": false,
-      "is_media": false,
-      "is_script": false,
-      "is_source": false,
-      "is_text": true,
-      "md5": null,
-      "sha256": null,
-      "mime_type": "inode/x-empty",
-      "name": "non_ascii_11_\u00c2\u00a0",
-      "path": "non_unicode/non_ascii_11_\u00c2\u00a0",
-      "programming_language": null,
-      "scan_errors": [],
-      "sha1": null,
-      "size": 0,
-      "type": "file"
-    },
-    {
-      "base_name": "non_ascii_12_\u00e2\u201a\u00ac",
-      "date": "2017-08-27",
-      "extension": "",
-      "file_type": "empty",
-      "files_count": 0,
-      "dirs_count": 0,
-      "size_count": 0,
-      "is_archive": false,
-      "is_binary": false,
-      "is_media": false,
-      "is_script": false,
-      "is_source": false,
-      "is_text": true,
-      "md5": null,
-      "sha256": null,
-      "mime_type": "inode/x-empty",
-      "name": "non_ascii_12_\u00e2\u201a\u00ac",
-      "path": "non_unicode/non_ascii_12_\u00e2\u201a\u00ac",
-      "programming_language": null,
-      "scan_errors": [],
-      "sha1": null,
-      "size": 0,
-      "type": "file"
-    },
-    {
-      "base_name": "non_ascii_2_\u00c3\u00a6",
-      "date": "2017-08-27",
-      "extension": "",
-      "file_type": "empty",
-      "files_count": 0,
-      "dirs_count": 0,
-      "size_count": 0,
-      "is_archive": false,
-      "is_binary": false,
-      "is_media": false,
-      "is_script": false,
-      "is_source": false,
-      "is_text": true,
-      "md5": null,
-      "sha256": null,
-      "mime_type": "inode/x-empty",
-      "name": "non_ascii_2_\u00c3\u00a6",
-      "path": "non_unicode/non_ascii_2_\u00c3\u00a6",
-      "programming_language": null,
-      "scan_errors": [],
-      "sha1": null,
-      "size": 0,
-      "type": "file"
-    },
-    {
-      "base_name": "non_ascii_3_\u00c4\u00b0",
-      "date": "2017-08-27",
-      "extension": "",
-      "file_type": "empty",
-      "files_count": 0,
-      "dirs_count": 0,
-      "size_count": 0,
-      "is_archive": false,
-      "is_binary": false,
-      "is_media": false,
-      "is_script": false,
-      "is_source": false,
-      "is_text": true,
-      "md5": null,
-      "sha256": null,
-      "mime_type": "inode/x-empty",
-      "name": "non_ascii_3_\u00c4\u00b0",
-      "path": "non_unicode/non_ascii_3_\u00c4\u00b0",
-      "programming_language": null,
-      "scan_errors": [],
-      "sha1": null,
-      "size": 0,
-      "type": "file"
-    },
-    {
-      "base_name": "non_ascii_4_\u00c5\u0081",
-      "date": "2017-08-27",
-      "extension": "",
-      "file_type": "empty",
-      "files_count": 0,
-      "dirs_count": 0,
-      "size_count": 0,
-      "is_archive": false,
-      "is_binary": false,
-      "is_media": false,
-      "is_script": false,
-      "is_source": false,
-      "is_text": true,
-      "md5": null,
-      "sha256": null,
-      "mime_type": "inode/x-empty",
-      "name": "non_ascii_4_\u00c5\u0081",
-      "path": "non_unicode/non_ascii_4_\u00c5\u0081",
-      "programming_language": null,
-      "scan_errors": [],
-      "sha1": null,
-      "size": 0,
-      "type": "file"
-    },
-    {
-      "base_name": "non_ascii_5_\u00cf\u2020",
-      "date": "2017-08-27",
-      "extension": "",
-      "file_type": "empty",
-      "files_count": 0,
-      "dirs_count": 0,
-      "size_count": 0,
-      "is_archive": false,
-      "is_binary": false,
-      "is_media": false,
-      "is_script": false,
-      "is_source": false,
-      "is_text": true,
-      "md5": null,
-      "sha256": null,
-      "mime_type": "inode/x-empty",
-      "name": "non_ascii_5_\u00cf\u2020",
-      "path": "non_unicode/non_ascii_5_\u00cf\u2020",
-      "programming_language": null,
-      "scan_errors": [],
-      "sha1": null,
-      "size": 0,
-      "type": "file"
-    },
-    {
-      "base_name": "non_ascii_6_\u00d0\u0161",
-      "date": "2017-08-27",
-      "extension": "",
-      "file_type": "empty",
-      "files_count": 0,
-      "dirs_count": 0,
-      "size_count": 0,
-      "is_archive": false,
-      "is_binary": false,
-      "is_media": false,
-      "is_script": false,
-      "is_source": false,
-      "is_text": true,
-      "md5": null,
-      "sha256": null,
-      "mime_type": "inode/x-empty",
-      "name": "non_ascii_6_\u00d0\u0161",
-      "path": "non_unicode/non_ascii_6_\u00d0\u0161",
-      "programming_language": null,
-      "scan_errors": [],
-      "sha1": null,
-      "size": 0,
-      "type": "file"
-    },
-    {
-      "base_name": "non_ascii_7_\u00d7\u0090",
-      "date": "2017-08-27",
-      "extension": "",
-      "file_type": "empty",
-      "files_count": 0,
-      "dirs_count": 0,
-      "size_count": 0,
-      "is_archive": false,
-      "is_binary": false,
-      "is_media": false,
-      "is_script": false,
-      "is_source": false,
-      "is_text": true,
-      "md5": null,
-      "sha256": null,
-      "mime_type": "inode/x-empty",
-      "name": "non_ascii_7_\u00d7\u0090",
-      "path": "non_unicode/non_ascii_7_\u00d7\u0090",
-      "programming_language": null,
-      "scan_errors": [],
-      "sha1": null,
-      "size": 0,
-      "type": "file"
-    },
-    {
-      "base_name": "non_ascii_8_\u00d8\u0152",
-      "date": "2017-08-27",
-      "extension": "",
-      "file_type": "empty",
-      "files_count": 0,
-      "dirs_count": 0,
-      "size_count": 0,
-      "is_archive": false,
-      "is_binary": false,
-      "is_media": false,
-      "is_script": false,
-      "is_source": false,
-      "is_text": true,
-      "md5": null,
-      "sha256": null,
-      "mime_type": "inode/x-empty",
-      "name": "non_ascii_8_\u00d8\u0152",
-      "path": "non_unicode/non_ascii_8_\u00d8\u0152",
-      "programming_language": null,
-      "scan_errors": [],
-      "sha1": null,
-      "size": 0,
-      "type": "file"
-    },
-    {
-      "base_name": "non_ascii_9_\u00d8\u00aa",
-      "date": "2017-08-27",
-      "extension": "",
-      "file_type": "empty",
-      "files_count": 0,
-      "dirs_count": 0,
-      "size_count": 0,
-      "is_archive": false,
-      "is_binary": false,
-      "is_media": false,
-      "is_script": false,
-      "is_source": false,
-      "is_text": true,
-      "md5": null,
-      "sha256": null,
-      "mime_type": "inode/x-empty",
-      "name": "non_ascii_9_\u00d8\u00aa",
-      "path": "non_unicode/non_ascii_9_\u00d8\u00aa",
-      "programming_language": null,
-      "scan_errors": [],
-      "sha1": null,
-      "size": 0,
-      "type": "file"
-    },
-    {
-      "base_name": "non_cp12_decodable_\u0081\u02dc",
-      "date": "2017-08-27",
-      "extension": "",
-      "file_type": "empty",
-      "files_count": 0,
-      "dirs_count": 0,
-      "size_count": 0,
-      "is_archive": false,
-      "is_binary": false,
-      "is_media": false,
-      "is_script": false,
-      "is_source": false,
-      "is_text": true,
-      "md5": null,
-      "sha256": null,
-      "mime_type": "inode/x-empty",
-      "name": "non_cp12_decodable_\u0081\u02dc",
-      "path": "non_unicode/non_cp12_decodable_\u0081\u02dc",
-      "programming_language": null,
-      "scan_errors": [],
-      "sha1": null,
-      "size": 0,
-      "type": "file"
-    },
-    {
-      "base_name": "non_cp932_decodable_\u00e7w\u00f0",
-      "date": "2017-08-27",
-      "extension": "",
-      "file_type": "empty",
-      "files_count": 0,
-      "dirs_count": 0,
-      "size_count": 0,
-      "is_archive": false,
-      "is_binary": false,
-      "is_media": false,
-      "is_script": false,
-      "is_source": false,
-      "is_text": true,
-      "md5": null,
-      "sha256": null,
-      "mime_type": "inode/x-empty",
-      "name": "non_cp932_decodable_\u00e7w\u00f0",
-      "path": "non_unicode/non_cp932_decodable_\u00e7w\u00f0",
-      "programming_language": null,
-      "scan_errors": [],
-      "sha1": null,
-      "size": 0,
-      "type": "file"
-    },
-    {
-      "base_name": "non_utf8_decodable_2_\u00ed\u00b2\u20ac",
-      "date": "2017-08-27",
-      "extension": "",
-      "file_type": "empty",
-      "files_count": 0,
-      "dirs_count": 0,
-      "size_count": 0,
-      "is_archive": false,
-      "is_binary": false,
-      "is_media": false,
-      "is_script": false,
-      "is_source": false,
-      "is_text": true,
-      "md5": null,
-      "sha256": null,
-      "mime_type": "inode/x-empty",
-      "name": "non_utf8_decodable_2_\u00ed\u00b2\u20ac",
-      "path": "non_unicode/non_utf8_decodable_2_\u00ed\u00b2\u20ac",
-      "programming_language": null,
-      "scan_errors": [],
-      "sha1": null,
-      "size": 0,
-      "type": "file"
-    },
-    {
-      "base_name": "non_utf8_decodable_3_\u00ed\u00b4\u20ac",
-      "date": "2017-08-27",
-      "extension": "",
-      "file_type": "empty",
-      "files_count": 0,
-      "dirs_count": 0,
-      "size_count": 0,
-      "is_archive": false,
-      "is_binary": false,
-      "is_media": false,
-      "is_script": false,
-      "is_source": false,
-      "is_text": true,
-      "md5": null,
-      "sha256": null,
-      "mime_type": "inode/x-empty",
-      "name": "non_utf8_decodable_3_\u00ed\u00b4\u20ac",
-      "path": "non_unicode/non_utf8_decodable_3_\u00ed\u00b4\u20ac",
-      "programming_language": null,
-      "scan_errors": [],
-      "sha1": null,
-      "size": 0,
-      "type": "file"
-    },
-    {
-      "base_name": "non_utf8_decodable_\u00ff",
-      "date": "2017-08-27",
-      "extension": "",
-      "file_type": "empty",
-      "files_count": 0,
-      "dirs_count": 0,
-      "size_count": 0,
-      "is_archive": false,
-      "is_binary": false,
-      "is_media": false,
-      "is_script": false,
-      "is_source": false,
-      "is_text": true,
-      "md5": null,
-      "sha256": null,
-      "mime_type": "inode/x-empty",
-      "name": "non_utf8_decodable_\u00ff",
-      "path": "non_unicode/non_utf8_decodable_\u00ff",
-      "programming_language": null,
-      "scan_errors": [],
-      "sha1": null,
-      "size": 0,
-      "type": "file"
-    }
-  ]
-}
diff --git a/tests/scancode/data/non_utf8/expected-win-py3.json b/tests/scancode/data/non_utf8/expected-win.json
similarity index 100%
rename from tests/scancode/data/non_utf8/expected-win-py3.json
rename to tests/scancode/data/non_utf8/expected-win.json
diff --git a/tests/scancode/test_cli.py b/tests/scancode/test_cli.py
index 3040c312c5..eba8a1c065 100644
--- a/tests/scancode/test_cli.py
+++ b/tests/scancode/test_cli.py
@@ -43,8 +43,6 @@
 from commoncode.system import on_mac
 from commoncode.system import on_macos_14_or_higher
 from commoncode.system import on_windows
-from commoncode.system import py2
-from commoncode.system import py3
 
 from scancode.cli_test_utils import check_json_scan
 from scancode.cli_test_utils import load_json_result
@@ -52,7 +50,6 @@
 from scancode.cli_test_utils import run_scan_click
 from scancode.cli_test_utils import run_scan_plain
 
-
 test_env = FileDrivenTesting()
 test_env.test_data_dir = os.path.join(os.path.dirname(__file__), 'data')
 
@@ -64,12 +61,6 @@
 """
 
 
-if py2:
-    read_mode = 'rb'
-if py3:
-    read_mode = 'r'
-
-
 def test_package_option_detects_packages(monkeypatch):
     test_dir = test_env.get_test_loc('package', copy=True)
     result_file = test_env.get_temp_file('json')
@@ -182,7 +173,7 @@ def test_scan_info_returns_full_root():
     result_file = test_env.get_temp_file('json')
     args = ['--info', '--full-root', test_dir, '--json', result_file]
     run_scan_click(args)
-    result_data = json.loads(open(result_file, read_mode).read())
+    result_data = json.loads(open(result_file).read())
     file_paths = [f['path'] for f in result_data['files']]
     assert 12 == len(file_paths)
     root = fileutils.as_posixpath(test_dir)
@@ -194,7 +185,7 @@ def test_scan_info_returns_correct_full_root_with_single_file():
     result_file = test_env.get_temp_file('json')
     args = ['--info', '--full-root', test_file, '--json', result_file]
     run_scan_click(args)
-    result_data = json.loads(open(result_file, read_mode).read())
+    result_data = json.loads(open(result_file).read())
     files = result_data['files']
     # we have a single file
     assert len(files) == 1
@@ -285,7 +276,6 @@ def test_failing_scan_return_proper_exit_code_on_failure():
     run_scan_click(args, expected_rc=1)
 
 
-@pytest.mark.xfail(py2, reason='May fail on Python 2 py2')
 def test_scan_should_not_fail_on_faulty_pdf_or_pdfminer_bug_but_instead_report_errors_and_keep_trucking_with_html():
     test_file = test_env.get_test_loc('failing/patchelf.pdf')
     result_file = test_env.get_temp_file('test.html')
@@ -305,7 +295,6 @@ def test_scan_license_should_not_fail_with_output_to_html_and_json():
     assert 'Object of type License is not JSON serializable' not in result.output
 
 
-@pytest.mark.xfail(reason='May fail on Python 2 py2')
 def test_scan_should_not_fail_on_faulty_pdf_or_pdfminer_bug_but_instead_report_errors_and_keep_trucking_with_html_app():
     test_file = test_env.get_test_loc('failing/patchelf.pdf')
     result_file = test_env.get_temp_file('test.app.html')
@@ -398,14 +387,18 @@ def test_scan_works_with_multiple_processes_and_timeouts():
 def check_scan_does_not_fail_when_scanning_unicode_files_and_paths(verbosity):
     test_dir = test_env.get_test_loc(u'unicodepath/uc')
     result_file = test_env.get_temp_file('json')
-
-    if on_linux and py2:
-        test_dir = fsencode(test_dir)
-        result_file = fsencode(result_file)
-
-    args = ['--info', '--license', '--copyright', '--package',
-            '--email', '--url', '--strip-root', test_dir , '--json',
-            result_file] + ([verbosity] if verbosity else [])
+    args = [
+        '--info',
+        '--license',
+        '--copyright',
+        '--package',
+        '--email',
+        '--url',
+        '--strip-root',
+        test_dir ,
+        '--json',
+        result_file
+    ] + ([verbosity] if verbosity else [])
     results = run_scan_click(args)
 
     # the paths for each OS ends up encoded differently.
@@ -453,9 +446,6 @@ def test_scan_does_not_fail_when_scanning_unicode_test_files_from_express():
 
     test_path = u'unicode_fixtures.tar.gz'
 
-    if on_linux and py2:
-        test_path = b'unicode_fixtures.tar.gz'
-
     test_dir = test_env.extract_test_tar_raw(test_path)
     test_dir = fsencode(test_dir)
 
@@ -527,17 +517,13 @@ def test_scan_can_handle_weird_file_names():
     check_json_scan(test_env.get_test_loc(expected), result_file, regen=False)
 
 
-@pytest.mark.skipif(on_macos_14_or_higher or (on_windows and py3),
-        reason='Cannot handle yet byte paths on macOS 10.14+. See https://github.com/nexB/scancode-toolkit/issues/1635'
-        ' Also this fails on Windows and Python 3')
+@pytest.mark.skipif(on_macos_14_or_higher or on_windows,
+    reason='Cannot handle yet byte paths on macOS 10.14+. '
+    'See https://github.com/nexB/scancode-toolkit/issues/1635')
 def test_scan_can_handle_non_utf8_file_names_on_posix():
     test_dir = test_env.extract_test_tar_raw('non_utf8/non_unicode.tgz')
     result_file = test_env.get_temp_file('json')
 
-    if on_linux and py2:
-        test_dir = fsencode(test_dir)
-        result_file = fsencode(result_file)
-
     args = ['-i', '--strip-root', test_dir, '--json', result_file]
     run_scan_click(args)
 
@@ -550,10 +536,8 @@ def test_scan_can_handle_non_utf8_file_names_on_posix():
         expected = 'non_utf8/expected-linux.json'
     elif on_mac:
         expected = 'non_utf8/expected-mac.json'
-    elif on_windows and py2:
-        expected = 'non_utf8/expected-win-py2.json'
-    elif on_windows and py3:
-        expected = 'non_utf8/expected-win-py3.json'
+    elif on_windows:
+        expected = 'non_utf8/expected-win.json'
 
     check_json_scan(test_env.get_test_loc(expected), result_file, regen=False)
 
@@ -744,7 +728,7 @@ def test_scan_valid_duration_field_in_json_output_headers():
 
 
 @pytest.mark.scanslow
-@pytest.mark.skipif(on_windows and py3, reason='Somehow this test fails for now on Python 3')
+@pytest.mark.skipif(on_windows, reason='Somehow this test fails for now on Python 3')
 def test_scan_with_timing_json_return_timings_for_each_scanner():
     test_dir = test_env.extract_test_tar('timing/basic.tgz')
     result_file = test_env.get_temp_file('json')
@@ -758,7 +742,7 @@ def test_scan_with_timing_json_return_timings_for_each_scanner():
 
 
 @pytest.mark.scanslow
-@pytest.mark.skipif(on_windows and py3, reason='Somehow this test fails for now on Python 3')
+@pytest.mark.skipif(on_windows, reason='Somehow this test fails for now on Python 3')
 def test_scan_with_timing_jsonpp_return_timings_for_each_scanner():
     test_dir = test_env.extract_test_tar('timing/basic.tgz')
     result_file = test_env.get_temp_file('json')
@@ -925,9 +909,9 @@ def test_merge_multiple_scans():
     args = ['--from-json', test_file_1, '--from-json', test_file_2, '--json', result_file]
     run_scan_click(args, expected_rc=0)
     expected = test_env.get_test_loc('merge_scans/expected.json')
-    with open(expected, read_mode) as f:
+    with open(expected) as f:
         expected_files = json.loads(f.read())['files']
-    with open(result_file, read_mode) as f:
+    with open(result_file) as f:
         result_files = json.loads(f.read())['files']
     assert expected_files == result_files
 
diff --git a/tests/scancode/test_outdated.py b/tests/scancode/test_outdated.py
index 9a02110c17..27e06c4e00 100644
--- a/tests/scancode/test_outdated.py
+++ b/tests/scancode/test_outdated.py
@@ -27,17 +27,11 @@
 from __future__ import division
 from __future__ import unicode_literals
 
-
-
 import pytest
 
-from commoncode.system import py3
 from scancode import outdated
 
 
-pytestmark = pytest.mark.skipif(not py3, reason='Mock is not available as a builtin on py2')
-
-
 def test_get_latest_version():
     from unittest import mock
     pypi_mock_releases = {
diff --git a/tests/summarycode/test_score.py b/tests/summarycode/test_score.py
index 59a878f915..fa764c0790 100644
--- a/tests/summarycode/test_score.py
+++ b/tests/summarycode/test_score.py
@@ -33,7 +33,6 @@
 click.disable_unicode_literals_warning = True
 import pytest
 
-from commoncode import compat
 from commoncode.testcase import FileDrivenTesting
 from commoncode.text import python_safe_name
 from scancode.cli_test_utils import check_json_scan
diff --git a/tests/summarycode/test_summarizer.py b/tests/summarycode/test_summarizer.py
index d3bbeb5ed0..ee3d07d2f6 100644
--- a/tests/summarycode/test_summarizer.py
+++ b/tests/summarycode/test_summarizer.py
@@ -25,24 +25,21 @@
 from __future__ import absolute_import
 from __future__ import unicode_literals
 
-from os.path import dirname
-from os.path import join
+from os import path
 
 import pytest
 
-from commoncode.system import py2
 from commoncode.testcase import FileDrivenTesting
 from scancode.cli_test_utils import run_scan_click
 from scancode.cli_test_utils import check_json_scan
 from scancode.cli_test_utils import check_jsonlines_scan
 
-
 pytestmark = pytest.mark.scanslow
 
 
 class TestScanSummary(FileDrivenTesting):
 
-    test_data_dir = join(dirname(__file__), 'data')
+    test_data_dir = path.join(path.dirname(__file__), 'data')
 
     def test_copyright_summary_base(self):
         test_dir = self.get_test_loc('copyright_summary/scan')
@@ -72,7 +69,6 @@ def test_copyright_summary_does_not_crash(self):
         run_scan_click(['-c', '--summary', '--json-pp', result_file, test_dir])
         check_json_scan(expected_file, result_file, remove_file_date=True, regen=False)
 
-    @pytest.mark.skipif(py2, reason='round() behaves differently between Python 2 and Python 3, causing the value for percentage_of_license_text to be different')
     def test_full_summary_base(self):
         test_dir = self.get_test_loc('full_summary/scan')
         result_file = self.get_temp_file('json')
@@ -80,7 +76,6 @@ def test_full_summary_base(self):
         run_scan_click(['-clip', '--summary', '--json-pp', result_file, test_dir])
         check_json_scan(expected_file, result_file, remove_file_date=True, regen=False)
 
-    @pytest.mark.skipif(py2, reason='round() behaves differently between Python 2 and Python 3, causing the value for percentage_of_license_text to be different')
     def test_full_summary_with_details(self):
         test_dir = self.get_test_loc('full_summary/scan')
         result_file = self.get_temp_file('json')
@@ -98,7 +93,6 @@ def test_copyright_summary_key_files(self):
 
         check_json_scan(expected_file, result_file, remove_file_date=True, regen=False)
 
-    @pytest.mark.skipif(py2, reason='round() behaves differently between Python 2 and Python 3, causing the value for percentage_of_license_text to be different')
     def test_full_summary_key_files(self):
         test_dir = self.get_test_loc('full_summary/scan')
         result_file = self.get_temp_file('json')
@@ -108,7 +102,6 @@ def test_full_summary_key_files(self):
              '--json-pp', result_file, test_dir])
         check_json_scan(expected_file, result_file, remove_file_date=True, regen=False)
 
-    @pytest.mark.skipif(py2, reason='round() behaves differently between Python 2 and Python 3, causing the value for percentage_of_license_text to be different')
     def test_full_summary_key_files_json_lines(self):
         test_dir = self.get_test_loc('full_summary/scan')
         result_file = self.get_temp_file('json')
@@ -118,7 +111,6 @@ def test_full_summary_key_files_json_lines(self):
              '--json-lines', result_file, test_dir])
         check_jsonlines_scan(expected_file, result_file, remove_file_date=True, regen=False)
 
-    @pytest.mark.skipif(py2, reason='round() behaves differently between Python 2 and Python 3, causing the value for percentage_of_license_text to be different')
     def test_full_summary_by_facet(self):
         test_dir = self.get_test_loc('full_summary/scan')
         result_file = self.get_temp_file('json')
diff --git a/tests/textcode/test_analysis.py b/tests/textcode/test_analysis.py
index 89fe8a33e8..4a9f274086 100644
--- a/tests/textcode/test_analysis.py
+++ b/tests/textcode/test_analysis.py
@@ -34,10 +34,7 @@
 
 from commoncode.testcase import FileBasedTesting
 
-from commoncode import compat
 from commoncode.fileutils import resource_iter
-from commoncode.system import py2
-from commoncode.system import py3
 
 from textcode.analysis import as_unicode
 from textcode.analysis import unicode_text_lines
@@ -46,11 +43,7 @@
 
 def check_text_lines(result, expected_file, regen=False):
         if regen:
-            if py2:
-                mode = 'wb'
-            if py3:
-                mode = 'w'
-            with open(expected_file, mode) as tf:
+            with open(expected_file, 'w') as tf:
                 json.dump(result, tf, indent=2)
         with open(expected_file, 'rb') as tf:
             expected = json.load(tf)
@@ -149,12 +142,12 @@ def test_numbered_text_lines_return_correct_number_of_lines(self):
     def test_as_unicode_converts_bytes_to_unicode(self):
         test_line = '    // as defined in https://tools.ietf.org/html/rfc2821#section-4.1.2.'.encode()
         result = as_unicode(test_line)
-        assert type(result) == compat.unicode
+        assert type(result) == str
 
     def test_numbered_text_lines_return_unicode(self):
         test_file = self.get_test_loc('analysis/verify.go')
         for _lineno, line in numbered_text_lines(test_file):
-            assert type(line) == compat.unicode
+            assert type(line) == str
 
     def test_unicode_text_lines_replaces_null_bytes_with_space(self):
         test_file = self.get_test_loc('analysis/text-with-trailing-null-bytes.txt')
diff --git a/tests/textcode/test_strings.py b/tests/textcode/test_strings.py
index 580287999d..ca23c59d4f 100644
--- a/tests/textcode/test_strings.py
+++ b/tests/textcode/test_strings.py
@@ -30,8 +30,6 @@
 import json
 import os
 
-from commoncode.system import py2
-from commoncode.system import py3
 from commoncode.testcase import FileBasedTesting
 from textcode import strings
 
@@ -44,11 +42,7 @@ def check_file_strings(self, test_file, expected_file, regen=False):
         results = list(strings.strings_from_file(test_file))
         expected = self.get_test_loc(expected_file)
         if regen:
-            if py2:
-                mode = 'wb'
-            if py3:
-                mode = 'w'
-            with io.open(expected, mode) as o:
+            with io.open(expected, 'w') as o:
                 o.write(json.dumps(results, indent=2))
 
         with io.open(expected) as i:

From beb7c95902bb4f28366dd0bf180678f34385b31f Mon Sep 17 00:00:00 2001
From: Philippe Ombredanne <pombredanne@nexb.com>
Date: Fri, 4 Dec 2020 16:06:34 +0100
Subject: [PATCH 3/5] Update cocoapods test with correct description

The summary line should be used as the first line of a package
description when present.

Signed-off-by: Philippe Ombredanne <pombredanne@nexb.com>
---
 .../cocoapods/podspec/BadgeHub.podspec.expected.json |  2 +-
 .../podspec/LoadingShimmer.podspec.expected.json     |  2 +-
 .../podspec/Starscream.podspec.expected.json         |  2 +-
 .../cocoapods/podspec/SwiftLib.podspec.expected.json |  2 +-
 .../cocoapods/podspec/nanopb.podspec.expected.json   |  2 +-
 tests/packagedcode/test_cocoapods.py                 | 12 ++++++------
 6 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/tests/packagedcode/data/cocoapods/podspec/BadgeHub.podspec.expected.json b/tests/packagedcode/data/cocoapods/podspec/BadgeHub.podspec.expected.json
index 4554a8b231..a547f0e92d 100644
--- a/tests/packagedcode/data/cocoapods/podspec/BadgeHub.podspec.expected.json
+++ b/tests/packagedcode/data/cocoapods/podspec/BadgeHub.podspec.expected.json
@@ -6,7 +6,7 @@
   "qualifiers": {},
   "subpath": null,
   "primary_language": "Objective-C",
-  "description": " Make any UIView a full fledged animated notification center. It is a way to quickly add a notification badge icon to a UIView. It make very easy to add badge to any view.",
+  "description": "A way to quickly add a notification bedge icon to any view.\n Make any UIView a full fledged animated notification center. It is a way to quickly add a notification badge icon to a UIView. It make very easy to add badge to any view.",
   "release_date": null,
   "parties": [
     {
diff --git a/tests/packagedcode/data/cocoapods/podspec/LoadingShimmer.podspec.expected.json b/tests/packagedcode/data/cocoapods/podspec/LoadingShimmer.podspec.expected.json
index c797a4e40e..3d2e23d02e 100644
--- a/tests/packagedcode/data/cocoapods/podspec/LoadingShimmer.podspec.expected.json
+++ b/tests/packagedcode/data/cocoapods/podspec/LoadingShimmer.podspec.expected.json
@@ -6,7 +6,7 @@
   "qualifiers": {},
   "subpath": null,
   "primary_language": "Objective-C",
-  "description": " An easy way to add a shimmering effect to any view with just single line of code. It is useful as an unobtrusive loading indicator. This is a network request waiting for the framework, the framework to increase the dynamic effect, convenient and fast, a line of code can be used.",
+  "description": "An easy way to add a shimmering effect to any view with just one line of code. It is useful as an unobtrusive loading indicator.\n An easy way to add a shimmering effect to any view with just single line of code. It is useful as an unobtrusive loading indicator. This is a network request waiting for the framework, the framework to increase the dynamic effect, convenient and fast, a line of code can be used.",
   "release_date": null,
   "parties": [
     {
diff --git a/tests/packagedcode/data/cocoapods/podspec/Starscream.podspec.expected.json b/tests/packagedcode/data/cocoapods/podspec/Starscream.podspec.expected.json
index 45368686ef..07b086411d 100644
--- a/tests/packagedcode/data/cocoapods/podspec/Starscream.podspec.expected.json
+++ b/tests/packagedcode/data/cocoapods/podspec/Starscream.podspec.expected.json
@@ -6,7 +6,7 @@
   "qualifiers": {},
   "subpath": null,
   "primary_language": "Objective-C",
-  "description": null,
+  "description": "A conforming WebSocket RFC 6455 client library in Swift.",
   "release_date": null,
   "parties": [
     {
diff --git a/tests/packagedcode/data/cocoapods/podspec/SwiftLib.podspec.expected.json b/tests/packagedcode/data/cocoapods/podspec/SwiftLib.podspec.expected.json
index 2985d494aa..f7dcc68978 100644
--- a/tests/packagedcode/data/cocoapods/podspec/SwiftLib.podspec.expected.json
+++ b/tests/packagedcode/data/cocoapods/podspec/SwiftLib.podspec.expected.json
@@ -6,7 +6,7 @@
   "qualifiers": {},
   "subpath": null,
   "primary_language": "Objective-C",
-  "description": " This CocoaPods library helps you perform calculation.",
+  "description": "A CocoaPods library written in Swift\n This CocoaPods library helps you perform calculation.",
   "release_date": null,
   "parties": [
     {
diff --git a/tests/packagedcode/data/cocoapods/podspec/nanopb.podspec.expected.json b/tests/packagedcode/data/cocoapods/podspec/nanopb.podspec.expected.json
index f301d43621..e198a8886a 100644
--- a/tests/packagedcode/data/cocoapods/podspec/nanopb.podspec.expected.json
+++ b/tests/packagedcode/data/cocoapods/podspec/nanopb.podspec.expected.json
@@ -6,7 +6,7 @@
   "qualifiers": {},
   "subpath": null,
   "primary_language": "Objective-C",
-  "description": " Nanopb is a small code-size Protocol Buffers implementation Nanopb is a small code-size Protocol Buffers implementation in ansi C. It is especially suitable for use in Nanopb is a small code-size Protocol Buffers implementation Nanopb is a small code-size Protocol Buffers implementation in ansi C. It is especially suitable for use in microcontrollers, but fits any memory restricted system.",
+  "description": "Protocol buffers with small code size.\n Nanopb is a small code-size Protocol Buffers implementation Nanopb is a small code-size Protocol Buffers implementation in ansi C. It is especially suitable for use in Nanopb is a small code-size Protocol Buffers implementation Nanopb is a small code-size Protocol Buffers implementation in ansi C. It is especially suitable for use in microcontrollers, but fits any memory restricted system.",
   "release_date": null,
   "parties": [
     {
diff --git a/tests/packagedcode/test_cocoapods.py b/tests/packagedcode/test_cocoapods.py
index ce1465fb33..b42e7c4b4c 100644
--- a/tests/packagedcode/test_cocoapods.py
+++ b/tests/packagedcode/test_cocoapods.py
@@ -32,34 +32,34 @@
 from packages_test_utils import PackageTester
 
 
-class TestRubyGemspec(PackageTester):
+class TestCocoaPodspec(PackageTester):
     test_data_dir = os.path.join(os.path.dirname(__file__), 'data')
 
-    def test_rubygems_can_parse_BadgeHub(self):
+    def test_cocoapods_can_parse_BadgeHub(self):
         test_file = self.get_test_loc('cocoapods/podspec/BadgeHub.podspec')
         expected_loc = self.get_test_loc('cocoapods/podspec/BadgeHub.podspec.expected.json')
         packages = cocoapods.parse(test_file)
         self.check_package(packages, expected_loc, regen=False)
 
-    def test_rubygems_can_parse_LoadingShimmer(self):
+    def test_cocoapods_can_parse_LoadingShimmer(self):
         test_file = self.get_test_loc('cocoapods/podspec/LoadingShimmer.podspec')
         expected_loc = self.get_test_loc('cocoapods/podspec/LoadingShimmer.podspec.expected.json')
         packages = cocoapods.parse(test_file)
         self.check_package(packages, expected_loc, regen=False)
 
-    def test_rubygems_can_parse_nanopb(self):
+    def test_cocoapods_can_parse_nanopb(self):
         test_file = self.get_test_loc('cocoapods/podspec/nanopb.podspec')
         expected_loc = self.get_test_loc('cocoapods/podspec/nanopb.podspec.expected.json')
         packages = cocoapods.parse(test_file)
         self.check_package(packages, expected_loc, regen=False)
 
-    def test_rubygems_can_parse_Starscream(self):
+    def test_cocoapods_can_parse_Starscream(self):
         test_file = self.get_test_loc('cocoapods/podspec/Starscream.podspec')
         expected_loc = self.get_test_loc('cocoapods/podspec/Starscream.podspec.expected.json')
         packages = cocoapods.parse(test_file)
         self.check_package(packages, expected_loc, regen=False)
 
-    def test_rubygems_can_parse_SwiftLib(self):
+    def test_cocoapods_can_parse_SwiftLib(self):
         test_file = self.get_test_loc('cocoapods/podspec/SwiftLib.podspec')
         expected_loc = self.get_test_loc('cocoapods/podspec/SwiftLib.podspec.expected.json')
         packages = cocoapods.parse(test_file)

From 65f4cacafe869f26a1a941b8a5d161d0a0a3bbbe Mon Sep 17 00:00:00 2001
From: Philippe Ombredanne <pombredanne@nexb.com>
Date: Fri, 4 Dec 2020 18:04:34 +0100
Subject: [PATCH 4/5] Add missing owner to license

Signed-off-by: Philippe Ombredanne <pombredanne@nexb.com>
---
 src/licensedcode/data/licenses/ecfonts-1.0.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/licensedcode/data/licenses/ecfonts-1.0.yml b/src/licensedcode/data/licenses/ecfonts-1.0.yml
index 9edb1e06b2..965726e7db 100644
--- a/src/licensedcode/data/licenses/ecfonts-1.0.yml
+++ b/src/licensedcode/data/licenses/ecfonts-1.0.yml
@@ -3,5 +3,6 @@ short_name: latex-ec-fonts
 name: Copyright notice to the ec fonts
 category: Permissive
 homepage_url: http://dante.ctan.org
+owner: Joerg Knappen
 text_urls:
     - http://dante.ctan.org/tex-archive/fonts/ec/src/copyrite.txt

From c7e3cc2e18472a023556b5e8b7b4c36544873028 Mon Sep 17 00:00:00 2001
From: Philippe Ombredanne <pombredanne@nexb.com>
Date: Fri, 4 Dec 2020 18:07:46 +0100
Subject: [PATCH 5/5] Update test expectations

This test is sensitive to additions of new words to the set of known
words in licenses and license rules.

Signed-off-by: Philippe Ombredanne <pombredanne@nexb.com>
---
 .../query/query_lines/yahoo-eula.txt.json     | 832 +++++++++---------
 1 file changed, 416 insertions(+), 416 deletions(-)

diff --git a/tests/licensedcode/data/query/query_lines/yahoo-eula.txt.json b/tests/licensedcode/data/query/query_lines/yahoo-eula.txt.json
index f95e177c07..5aecd5f6fe 100644
--- a/tests/licensedcode/data/query/query_lines/yahoo-eula.txt.json
+++ b/tests/licensedcode/data/query/query_lines/yahoo-eula.txt.json
@@ -1,842 +1,842 @@
 [
   [
     [
-      4835, 
+      4835,
       "software"
-    ], 
+    ],
     [
-      2493, 
+      2493,
       "license"
     ]
-  ], 
-  [], 
+  ],
+  [],
   [
     [
-      8070, 
+      8070,
       "welcome"
-    ], 
+    ],
     [
-      4772, 
+      4772,
       "to"
-    ], 
+    ],
     [
-      8654, 
+      8654,
       "yahoo"
-    ], 
+    ],
     [
-      4835, 
+      4835,
       "software"
-    ], 
+    ],
     [
-      4769, 
+      4769,
       "the"
-    ], 
+    ],
     [
-      8654, 
+      8654,
       "yahoo"
-    ], 
+    ],
     [
-      4835, 
+      4835,
       "software"
-    ], 
+    ],
     [
-      4499, 
+      4499,
       "by"
-    ], 
+    ],
     [
-      5343, 
+      5343,
       "clicking"
-    ], 
+    ],
     [
-      4769, 
+      4769,
       "the"
-    ], 
+    ],
     [
-      4922, 
+      4922,
       "i"
-    ], 
+    ],
     [
-      198, 
+      198,
       "agree"
-    ], 
+    ],
     [
-      5604, 
+      5604,
       "button"
-    ], 
+    ],
     [
-      4904, 
+      4904,
       "below"
-    ], 
+    ],
     [
-      4768, 
+      4768,
       "you"
-    ], 
+    ],
     [
-      198, 
+      198,
       "agree"
-    ], 
+    ],
     [
-      4772, 
+      4772,
       "to"
-    ], 
+    ],
     [
-      5161, 
+      5161,
       "these"
-    ], 
+    ],
     [
-      4835, 
+      4835,
       "software"
-    ], 
+    ],
     [
-      2493, 
+      2493,
       "license"
-    ], 
+    ],
     [
-      4913, 
+      4913,
       "terms"
-    ], 
+    ],
     [
-      4769, 
+      4769,
       "the"
-    ], 
+    ],
     [
-      4835, 
+      4835,
       "software"
-    ], 
+    ],
     [
-      2493, 
+      2493,
       "license"
-    ], 
+    ],
     [
-      4837, 
+      4837,
       "that"
-    ], 
+    ],
     [
-      5607, 
+      5607,
       "supplement"
-    ], 
+    ],
     [
-      4769, 
+      4769,
       "the"
-    ], 
+    ],
     [
-      8654, 
+      8654,
       "yahoo"
-    ], 
+    ],
     [
-      4913, 
+      4913,
       "terms"
-    ], 
+    ],
     [
-      4774, 
+      4774,
       "of"
-    ], 
+    ],
     [
-      5213, 
+      5213,
       "service"
-    ], 
+    ],
     [
-      5915, 
+      5915,
       "tos"
-    ], 
+    ],
     [
-      4904, 
+      4904,
       "below"
-    ], 
+    ],
     [
-      5220, 
+      5220,
       "also"
-    ], 
+    ],
     [
-      5062, 
+      5062,
       "located"
-    ], 
+    ],
     [
-      4485, 
+      4485,
       "at"
-    ], 
+    ],
     [
-      5028, 
+      5028,
       "http"
-    ], 
+    ],
     [
-      4577, 
+      4577,
       "info"
-    ], 
+    ],
     [
-      8654, 
+      8654,
       "yahoo"
-    ], 
+    ],
     [
-      5046, 
+      5046,
       "com"
-    ], 
+    ],
     [
-      2429, 
+      2429,
       "legal"
-    ], 
+    ],
     [
-      5278, 
+      5278,
       "us"
-    ], 
+    ],
     [
-      8654, 
+      8654,
       "yahoo"
-    ], 
+    ],
     [
-      20045, 
+      20075,
       "utos"
-    ], 
+    ],
     [
-      20045, 
+      20075,
       "utos"
-    ], 
+    ],
     [
-      7201, 
+      7201,
       "173"
-    ], 
+    ],
     [
-      4832, 
+      4832,
       "html"
-    ], 
+    ],
     [
-      4817, 
+      4817,
       "if"
-    ], 
+    ],
     [
-      4768, 
+      4768,
       "you"
-    ], 
+    ],
     [
-      1289, 
+      1289,
       "disagree"
-    ], 
+    ],
     [
-      4777, 
+      4777,
       "with"
-    ], 
+    ],
     [
-      4853, 
+      4853,
       "any"
-    ], 
+    ],
     [
-      4774, 
+      4774,
       "of"
-    ], 
+    ],
     [
-      4769, 
+      4769,
       "the"
-    ], 
+    ],
     [
-      4913, 
+      4913,
       "terms"
-    ], 
+    ],
     [
-      4904, 
+      4904,
       "below"
-    ], 
+    ],
     [
-      8654, 
+      8654,
       "yahoo"
-    ], 
+    ],
     [
-      4953, 
+      4953,
       "does"
-    ], 
+    ],
     [
-      4778, 
+      4778,
       "not"
-    ], 
+    ],
     [
-      1951, 
+      1951,
       "grant"
-    ], 
+    ],
     [
-      4768, 
+      4768,
       "you"
-    ], 
+    ],
     [
-      2493, 
+      2493,
       "license"
-    ], 
+    ],
     [
-      4772, 
+      4772,
       "to"
-    ], 
+    ],
     [
-      4800, 
+      4800,
       "use"
-    ], 
+    ],
     [
-      4769, 
+      4769,
       "the"
-    ], 
+    ],
     [
-      8654, 
+      8654,
       "yahoo"
-    ], 
+    ],
     [
-      4835, 
+      4835,
       "software"
-    ], 
+    ],
     [
-      5605, 
+      5605,
       "click"
-    ], 
+    ],
     [
-      4769, 
+      4769,
       "the"
-    ], 
+    ],
     [
-      4922, 
+      4922,
       "i"
-    ], 
+    ],
     [
-      1289, 
+      1289,
       "disagree"
-    ], 
+    ],
     [
-      4664, 
+      4664,
       "or"
-    ], 
+    ],
     [
-      5862, 
+      5862,
       "cancel"
     ]
-  ], 
+  ],
   [
     [
-      5604, 
+      5604,
       "button"
-    ], 
+    ],
     [
-      4772, 
+      4772,
       "to"
-    ], 
+    ],
     [
-      13148, 
+      13149,
       "exit"
-    ], 
+    ],
     [
-      4769, 
+      4769,
       "the"
-    ], 
+    ],
     [
-      5984, 
+      5984,
       "installer"
     ]
-  ], 
-  [], 
+  ],
+  [],
   [
     [
-      5116, 
+      5116,
       "information"
-    ], 
+    ],
     [
-      666, 
+      666,
       "collected"
-    ], 
+    ],
     [
-      4789, 
+      4789,
       "through"
-    ], 
+    ],
     [
-      8654, 
+      8654,
       "yahoo"
-    ], 
+    ],
     [
-      4835, 
+      4835,
       "software"
-    ], 
+    ],
     [
-      4499, 
+      4499,
       "by"
-    ], 
+    ],
     [
-      8654, 
+      8654,
       "yahoo"
-    ], 
+    ],
     [
-      4852, 
+      4852,
       "is"
-    ], 
+    ],
     [
-      4786, 
+      4786,
       "subject"
-    ], 
+    ],
     [
-      4772, 
+      4772,
       "to"
-    ], 
+    ],
     [
-      4769, 
+      4769,
       "the"
-    ], 
+    ],
     [
-      5073, 
+      5073,
       "full"
-    ], 
+    ],
     [
-      8654, 
+      8654,
       "yahoo"
-    ], 
+    ],
     [
-      3132, 
+      3132,
       "privacy"
-    ], 
+    ],
     [
-      5308, 
+      5308,
       "policy"
-    ], 
+    ],
     [
-      5028, 
+      5028,
       "http"
-    ], 
+    ],
     [
-      4577, 
+      4577,
       "info"
-    ], 
+    ],
     [
-      8654, 
+      8654,
       "yahoo"
-    ], 
+    ],
     [
-      5046, 
+      5046,
       "com"
-    ], 
+    ],
     [
-      3132, 
+      3132,
       "privacy"
-    ], 
+    ],
     [
-      5278, 
+      5278,
       "us"
-    ], 
+    ],
     [
-      8654, 
+      8654,
       "yahoo"
-    ], 
+    ],
     [
-      4772, 
+      4772,
       "to"
-    ], 
+    ],
     [
-      5109, 
+      5109,
       "find"
-    ], 
+    ],
     [
-      4872, 
+      4872,
       "out"
-    ], 
+    ],
     [
-      5172, 
+      5172,
       "more"
-    ], 
+    ],
     [
-      5084, 
+      5084,
       "about"
-    ], 
+    ],
     [
-      6446, 
+      6446,
       "setting"
-    ], 
+    ],
     [
-      4819, 
+      4819,
       "your"
-    ], 
+    ],
     [
-      3081, 
+      3081,
       "preferences"
-    ], 
+    ],
     [
-      4782, 
+      4782,
       "and"
-    ], 
+    ],
     [
-      4769, 
+      4769,
       "the"
-    ], 
+    ],
     [
-      5116, 
+      5116,
       "information"
-    ], 
+    ],
     [
-      666, 
+      666,
       "collected"
-    ], 
+    ],
     [
-      4499, 
+      4499,
       "by"
-    ], 
+    ],
     [
-      4769, 
+      4769,
       "the"
-    ], 
+    ],
     [
-      8654, 
+      8654,
       "yahoo"
-    ], 
+    ],
     [
-      4835, 
+      4835,
       "software"
-    ], 
+    ],
     [
-      4782, 
+      4782,
       "and"
-    ], 
+    ],
     [
-      4815, 
+      4815,
       "used"
-    ], 
+    ],
     [
-      4499, 
+      4499,
       "by"
-    ], 
+    ],
     [
-      8654, 
+      8654,
       "yahoo"
-    ], 
+    ],
     [
-      6074, 
+      6074,
       "go"
-    ], 
+    ],
     [
-      4772, 
+      4772,
       "to"
-    ], 
+    ],
     [
-      5028, 
+      5028,
       "http"
-    ], 
+    ],
     [
-      4577, 
+      4577,
       "info"
-    ], 
+    ],
     [
-      8654, 
+      8654,
       "yahoo"
-    ], 
+    ],
     [
-      5046, 
+      5046,
       "com"
-    ], 
+    ],
     [
-      3132, 
+      3132,
       "privacy"
-    ], 
+    ],
     [
-      5278, 
+      5278,
       "us"
-    ], 
+    ],
     [
-      8654, 
+      8654,
       "yahoo"
-    ], 
+    ],
     [
-      20053, 
+      20083,
       "toolbar"
-    ], 
+    ],
     [
-      5030, 
+      5030,
       "details"
-    ], 
+    ],
     [
-      4832, 
+      4832,
       "html"
     ]
-  ], 
-  [], 
+  ],
+  [],
   [
     [
-      8654, 
+      8654,
       "yahoo"
-    ], 
+    ],
     [
-      3534, 
+      3534,
       "reserves"
-    ], 
+    ],
     [
-      4769, 
+      4769,
       "the"
-    ], 
+    ],
     [
-      4771, 
+      4771,
       "right"
-    ], 
+    ],
     [
-      4772, 
+      4772,
       "to"
-    ], 
+    ],
     [
-      6004, 
+      6004,
       "update"
-    ], 
+    ],
     [
-      4782, 
+      4782,
       "and"
-    ], 
+    ],
     [
-      5563, 
+      5563,
       "change"
-    ], 
+    ],
     [
-      4804, 
+      4804,
       "from"
-    ], 
+    ],
     [
-      4921, 
+      4921,
       "time"
-    ], 
+    ],
     [
-      4772, 
+      4772,
       "to"
-    ], 
+    ],
     [
-      4921, 
+      4921,
       "time"
-    ], 
+    ],
     [
-      4775, 
+      4775,
       "this"
-    ], 
+    ],
     [
-      4835, 
+      4835,
       "software"
-    ], 
+    ],
     [
-      2493, 
+      2493,
       "license"
-    ], 
+    ],
     [
-      4782, 
+      4782,
       "and"
-    ], 
+    ],
     [
-      4812, 
+      4812,
       "all"
-    ], 
+    ],
     [
-      5147, 
+      5147,
       "documents"
-    ], 
+    ],
     [
-      2140, 
+      2140,
       "incorporated"
-    ], 
+    ],
     [
-      4499, 
+      4499,
       "by"
-    ], 
+    ],
     [
-      5184, 
+      5184,
       "reference"
-    ], 
+    ],
     [
-      4768, 
+      4768,
       "you"
-    ], 
+    ],
     [
-      5267, 
+      5267,
       "can"
-    ], 
+    ],
     [
-      5922, 
+      5922,
       "always"
-    ], 
+    ],
     [
-      5109, 
+      5109,
       "find"
-    ], 
+    ],
     [
-      4769, 
+      4769,
       "the"
-    ], 
+    ],
     [
-      5037, 
+      5037,
       "most"
-    ], 
+    ],
     [
-      11244, 
+      11244,
       "recent"
-    ], 
+    ],
     [
-      4820, 
+      4820,
       "version"
-    ], 
+    ],
     [
-      4774, 
+      4774,
       "of"
-    ], 
+    ],
     [
-      4775, 
+      4775,
       "this"
-    ], 
+    ],
     [
-      4835, 
+      4835,
       "software"
-    ], 
+    ],
     [
-      2493, 
+      2493,
       "license"
-    ], 
+    ],
     [
-      4485, 
+      4485,
       "at"
-    ], 
+    ],
     [
-      5028, 
+      5028,
       "http"
-    ], 
+    ],
     [
-      4577, 
+      4577,
       "info"
-    ], 
+    ],
     [
-      8654, 
+      8654,
       "yahoo"
-    ], 
+    ],
     [
-      5046, 
+      5046,
       "com"
-    ], 
+    ],
     [
-      2429, 
+      2429,
       "legal"
-    ], 
+    ],
     [
-      5278, 
+      5278,
       "us"
-    ], 
+    ],
     [
-      8654, 
+      8654,
       "yahoo"
-    ], 
+    ],
     [
-      20053, 
+      20083,
       "toolbar"
-    ], 
+    ],
     [
-      null, 
+      null,
       null
-    ], 
+    ],
     [
-      null, 
+      null,
       null
-    ], 
+    ],
     [
-      null, 
+      null,
       null
-    ], 
+    ],
     [
-      4832, 
+      4832,
       "html"
-    ], 
+    ],
     [
-      8654, 
+      8654,
       "yahoo"
-    ], 
+    ],
     [
-      4798, 
+      4798,
       "may"
-    ], 
+    ],
     [
-      5563, 
+      5563,
       "change"
-    ], 
+    ],
     [
-      4775, 
+      4775,
       "this"
-    ], 
+    ],
     [
-      4835, 
+      4835,
       "software"
-    ], 
+    ],
     [
-      2493, 
+      2493,
       "license"
-    ], 
+    ],
     [
-      4499, 
+      4499,
       "by"
-    ], 
+    ],
     [
-      3052, 
+      3052,
       "posting"
-    ], 
+    ],
     [
-      5215, 
+      5215,
       "new"
-    ], 
+    ],
     [
-      4820, 
+      4820,
       "version"
-    ], 
+    ],
     [
-      4805, 
+      4805,
       "without"
-    ], 
+    ],
     [
-      4843, 
+      4843,
       "notice"
-    ], 
+    ],
     [
-      4772, 
+      4772,
       "to"
-    ], 
+    ],
     [
-      4768, 
+      4768,
       "you"
-    ], 
+    ],
     [
-      4800, 
+      4800,
       "use"
-    ], 
+    ],
     [
-      4774, 
+      4774,
       "of"
-    ], 
+    ],
     [
-      4769, 
+      4769,
       "the"
-    ], 
+    ],
     [
-      8654, 
+      8654,
       "yahoo"
-    ], 
+    ],
     [
-      4835, 
+      4835,
       "software"
-    ], 
+    ],
     [
-      5669, 
+      5669,
       "after"
-    ], 
+    ],
     [
-      4875, 
+      4875,
       "such"
-    ], 
+    ],
     [
-      5563, 
+      5563,
       "change"
-    ], 
+    ],
     [
-      853, 
+      853,
       "constitutes"
-    ], 
+    ],
     [
-      20, 
+      20,
       "acceptance"
-    ], 
+    ],
     [
-      4774, 
+      4774,
       "of"
-    ], 
+    ],
     [
-      4875, 
+      4875,
       "such"
-    ], 
+    ],
     [
-      4810, 
+      4810,
       "changes"
     ]
   ]