Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,12 @@ License detection:

See https://github.com/nexB/scancode-toolkit/issues/3219

- A bugfix has been added to the ``--unknown-licenses`` option where
we would crash when using this option without using ``--matched-text``
option. This is now working correctly and also better tested.

See https://github.com/nexB/scancode-toolkit/issues/3343

v31.2.5 - 2023-04-21
----------------------------------

Expand Down
27 changes: 17 additions & 10 deletions src/licensedcode/match_unknown.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
TRACE = False

if TRACE:
use_print = True
import logging
import sys

Expand All @@ -31,6 +32,9 @@
def logger_debug(*args):
return logger.debug(' '.join(isinstance(a, str) and a or repr(a) for a in args))

if use_print:
logger_debug = print

logging.basicConfig(stream=sys.stdout)
logger.setLevel(logging.DEBUG)

Expand Down Expand Up @@ -142,22 +146,25 @@ def match_unknowns(
unknown_ngram_length=unknown_ngram_length,
)

# build match from merged matched ngrams
qspans = (Span(qstart, qend) for qstart, qend in matched_ngrams)
qspan = Span().union(*qspans)

if TRACE:
tokens_by_tid = idx.tokens_by_tid

def get_tokens(_toks):
return (' '.join(tokens_by_tid[t] for t in _toks))

print('match_unknowns: matched_ngrams')
for qstart, qend, matched_toks in matched_ngrams:

for qstart, qend in matched_ngrams:
_span = Span(qstart, qend)
_tokens = [query_tokens[qpos] for qpos in _span]
print(
' ', 'qstart', qstart,
'qend', qend,
'matched_toks', get_tokens(matched_toks))

# build match from merged matched ngrams
qspans = (Span(qstart, qend) for qstart, qend in matched_ngrams)
qspan = Span().union(*qspans)
'matched_toks', get_tokens(_tokens))

if not qspan:
return
Expand All @@ -169,7 +176,8 @@ def get_tokens(_toks):
match_len = len(qspan)

if TRACE:
print('match_unknowns: matched_span:', get_tokens(matched_tokens))
#print('match_unknowns: matched_span:', get_tokens(matched_tokens))
print('match_unknowns: qspan, match_len, matched_span:', qspan, match_len, matched_tokens)

# we use the query side to build the ispans
ispan = Span(0, match_len)
Expand All @@ -180,9 +188,8 @@ def get_tokens(_toks):
try:
match_start_line = line_by_pos[qspan.start]
match_end_line = line_by_pos[qspan.end]
except:
print('empty span:', qspan)
raise
except Exception as e:
raise Exception('empty span:', qspan) from e

text = ''.join(get_full_qspan_matched_text(
match_qspan=qspan,
Expand Down
4 changes: 1 addition & 3 deletions src/licensedcode/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2542,14 +2542,12 @@ def __attrs_post_init__(self, *args, **kwargs):
self.identifier = f'license-detection-unknown-{self._unique_id}'

self.license_expression = UNKNOWN_LICENSE_KEY
# note that this could be shared across rules as an optimization
#TODO: that this could be shared across rules as an optimization
self.license_expression_object = self.licensing.parse(UNKNOWN_LICENSE_KEY)
self.is_license_notice = True
self.notes = 'Unknown license based on a composite of license words.'
self.is_synthetic = True
self.setup()
# called only for it's side effects
self.tokens()


@attr.s(slots=True, repr=False)
Expand Down
32 changes: 32 additions & 0 deletions tests/licensedcode/data/match_unknown/apache-2.0.LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
---
key: apache-2.0
short_name: Apache 2.0
name: Apache License 2.0
category: Permissive
owner: Apache Software Foundation
homepage_url: http://www.apache.org/licenses/
spdx_license_key: Apache-2.0
---

7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.


Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
---
key: gpl-2.0-plus
short_name: GPL 2.0 or later
name: GNU General Public License 2.0 or later
category: Copyleft
owner: Free Software Foundation (FSF)
homepage_url: http://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html
spdx_license_key: GPL-2.0-or-later
---


This program is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE. See the GNU General Public License for more details.


BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.

IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.

If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.

To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
---
license_expression: gpl-2.0-plus
is_license_notice: yes
---

License:

This package is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This package is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
"license_detections": [
{
"identifier": "unknown-b0897d47-1c91-9898-2364-2e4d1a34b6fd",
"license_expression": "unknown",
"detection_count": 1
}
],
"files": [
{
"path": "unknown.txt",
"type": "file",
"detected_license_expression": "unknown",
"detected_license_expression_spdx": "LicenseRef-scancode-unknown",
"license_detections": [
{
"license_expression": "unknown",
"matches": [
{
"score": 86.89,
"start_line": 1,
"end_line": 10,
"matched_length": 53,
"match_coverage": 100.0,
"matcher": "6-unknown",
"license_expression": "unknown",
"rule_identifier": "license-detection-unknown-296da2cbc15d2bba73baa1359cda5fc8bf39b942",
"rule_relevance": 100,
"rule_url": null
}
],
"identifier": "unknown-b0897d47-1c91-9898-2364-2e4d1a34b6fd"
}
],
"license_clues": [],
"percentage_of_license_text": 86.89,
"scan_errors": []
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{
"license_detections": [
{
"identifier": "unknown-b0897d47-1c91-9898-2364-2e4d1a34b6fd",
"license_expression": "unknown",
"detection_count": 1,
"detection_log": []
}
],
"files": [
{
"path": "unknown.txt",
"type": "file",
"detected_license_expression": "unknown",
"detected_license_expression_spdx": "LicenseRef-scancode-unknown",
"license_detections": [
{
"license_expression": "unknown",
"matches": [
{
"score": 86.89,
"start_line": 1,
"end_line": 10,
"matched_length": 53,
"match_coverage": 100.0,
"matcher": "6-unknown",
"license_expression": "unknown",
"rule_identifier": "license-detection-unknown-296da2cbc15d2bba73baa1359cda5fc8bf39b942",
"rule_relevance": 100,
"rule_url": null,
"matched_text": "form shall mean the preferred form for making\nthe purposes of this definition control\n[software] [is] [modified] [by] [someone] [else]\n\n\n\nrepresent, as a whole, an original work of authorship. For the purposes\n of this License, Derivative Works shall not include works that remain\n separable from, or merely link (or bind by name) [to] [the] interfaces of,\n the Work and Derivative Works thereof."
}
],
"detection_log": [
"unknown-match"
],
"identifier": "unknown-b0897d47-1c91-9898-2364-2e4d1a34b6fd"
}
],
"license_clues": [],
"percentage_of_license_text": 86.89,
"scan_errors": []
}
]
}
10 changes: 10 additions & 0 deletions tests/licensedcode/data/match_unknown/unknown.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
form shall mean the preferred form for making
the purposes of this definition control
software is modified by someone else



represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
Loading