Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
6 changes: 6 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ Package detection:
manifest data parsing code outside of the scancode-toolkit context in other
libraries.

- The PackageData model now includes a ``holder`` field, which is populated with
holder data extracted from the copyright field if copyright data is present,
otherwise it remains empty.

https://github.com/nexB/scancode-toolkit/issues/3290


License detection:
~~~~~~~~~~~~~~~~~~~
Expand Down
42 changes: 42 additions & 0 deletions src/packagedcode/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -609,6 +609,11 @@ class PackageData(IdentifiablePackageData):
label='Copyright',
help='Copyright statements for this package. Typically one per line.')

holder = String(
label='Holder',
help='Holders for this package. Typically one per line.'
)

declared_license_expression = String(
label='license expression',
help='The license expression for this package typically derived '
Expand Down Expand Up @@ -711,8 +716,45 @@ class PackageData(IdentifiablePackageData):
repr=True,
)


def __attrs_post_init__(self, *args, **kwargs):
self.populate_license_fields()
self.populate_holder_field()

def populate_holder_field(self):
if not self.copyright:
return

from cluecode.copyrights import CopyrightDetector

numbered_lines = list(enumerate(self.copyright.split("\n"), start=1))
detector = CopyrightDetector()
holders = list(
detector.detect(
numbered_lines,
include_copyrights=False,
include_holders=True,
include_authors=False,
)
)
# If no holder detected, prefix each copyright statement with `Copyright`
if not holders:
numbered_lines = [
(count, f"Copyright {value}") for count, value in numbered_lines
]
holders = list(
detector.detect(
numbered_lines,
include_copyrights=False,
include_holders=True,
include_authors=False,
)
)
# If still no holder, then populate holder with copyright field
self.holder = (
"\n".join([holder_detection.holder for holder_detection in holders])
or self.copyright
)

def populate_license_fields(self):
"""
Expand Down
38 changes: 19 additions & 19 deletions tests/formattedcode/data/csv/livescan/expected.csv
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
path,type,name,base_name,extension,size,date,sha1,md5,sha256,mime_type,file_type,programming_language,is_binary,is_text,is_archive,is_media,is_source,is_script,detected_license_expression,detected_license_expression_spdx,percentage_of_license_text,files_count,dirs_count,size_count,scan_errors,license_expression,detection_log,license_match__score,start_line,end_line,license_match__matched_length,license_match__match_coverage,license_match__matcher,license_match__license_expression,license_match__rule_identifier,license_match__rule_relevance,license_match__rule_url,copyright,holder,email,url,package__type,package__namespace,package__name,package__version,package__qualifiers,package__subpath,package__primary_language,package__description,package__release_date,package__homepage_url,package__download_url,package__size,package__sha1,package__md5,package__sha256,package__sha512,package__bug_tracking_url,package__code_view_url,package__vcs_url,package__copyright,package__declared_license_expression,package__declared_license_expression_spdx,package__license_detections,package__other_license_expression,package__other_license_expression_spdx,package__other_license_detections,package__extracted_license_statement,package__notice_text,package__file_references,package__extra_data,package__repository_homepage_url,package__repository_download_url,package__api_data_url,package__datasource_id,package__purl
json2csv.rb,file,json2csv.rb,json2csv,.rb,912,2022-04-20,1236469a06a2bacbdd8e172ad718482af5b0a936,1307c281e0b153202e291b217eab85d5,12ba215313981dbe810d9ed696b7cc753d97adfcc26eba1e13f941dc7506aa4e,text/x-script.python,"Python script, ASCII text executable",Ruby,False,True,False,False,True,True,apache-2.0,Apache-2.0,62.04,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,apache-2.0,,100.00,5,13,85,100.00,2-aho,apache-2.0,apache-2.0_7.RULE,100.00,https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_7.RULE,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,3,,,,,,,,Copyright (c) 2017 nexB Inc. and others,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,3,,,,,,,,,nexB Inc. and others,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4,4,,,,,,,,,,,http://nexb.com/,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4,4,,,,,,,,,,,https://github.com/nexB/scancode-toolkit/,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8,8,,,,,,,,,,,http://www.apache.org/licenses/LICENSE-2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
license,file,license,license,,679,2022-04-20,75c5490a718ddd45e40e0cc7ce0c756abc373123,b965a762efb9421cf1bf4405f336e278,a34098a43e5677495f59dff825a3f9bc0f2b0261d75feb2356919f4c3ce049ab,text/plain,ASCII text,,False,True,False,False,False,False,gpl-2.0-plus,GPL-2.0-or-later,100.0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
license,,,,,,,,,,,,,,,,,,,,,,,,,,gpl-2.0-plus,,100.00,1,12,113,100.00,1-hash,gpl-2.0-plus,gpl-2.0-plus_420.RULE,100.00,https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-2.0-plus_420.RULE,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
package.json,file,package.json,package,.json,2200,2022-04-20,918376afce796ef90eeda1d6695f2289c90491ac,1f66239a9b850c5e60a9382dbe2162d2,29f6068a1b6c7d06f115a5edc4ed8558edde42c6bbf0145ed77cf1108a0dd529,application/json,JSON data,,False,True,False,False,False,False,mit,MIT,45.72,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
package.json,,,,,,,,,,,,,,,,,,,,,,,,,,mit,,100.00,24,24,3,100.00,2-aho,mit,mit_27.RULE,100.00,https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/mit_27.RULE,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
package.json,,,,,,,,,,,,,,,,,,,,,,,,,,mit,,84.68,24,24,136,85.53,3-seq,mit,mit_823.RULE,99.00,https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/mit_823.RULE,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,24,24,,,,,,,,Copyright (c) 2012 LearnBoost <[email protected]>,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,24,24,,,,,,,,,LearnBoost,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12,12,,,,,,,,,,[email protected],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,16,16,,,,,,,,,,,https://github.com/visionmedia/node-cookie-signature.git,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,27,27,,,,,,,,,,,https://github.com/visionmedia/node-cookie-signature/issues,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,npm,,cookie-signature,v 1.0.3,,,JavaScript,Sign and unsign cookies,,,https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.3.tgz,,,,,,https://github.com/visionmedia/node-cookie-signature/issues,,git+https://github.com/visionmedia/node-cookie-signature.git,,mit,MIT,"[{'license_expression': 'mit', 'matches': [{'score': 100.0, 'start_line': 24, 'end_line': 24, 'matched_length': 3, 'match_coverage': 100.0, 'matcher': '2-aho', 'license_expression': 'mit', 'rule_identifier': 'mit_27.RULE', 'rule_relevance': 100, 'rule_url': 'https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/mit_27.RULE'}, {'score': 84.68, 'start_line': 24, 'end_line': 24, 'matched_length': 136, 'match_coverage': 85.53, 'matcher': '3-seq', 'license_expression': 'mit', 'rule_identifier': 'mit_823.RULE', 'rule_relevance': 99, 'rule_url': 'https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/mit_823.RULE'}], 'identifier': 'mit-13195f55-8383-ff05-7a20-04ec94bbf4b1'}]",,,,,,,,https://www.npmjs.com/package/cookie-signature,https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.3.tgz,https://registry.npmjs.org/cookie-signature/1.0.3,npm_package_json,pkg:npm/[email protected]
path,type,name,base_name,extension,size,date,sha1,md5,sha256,mime_type,file_type,programming_language,is_binary,is_text,is_archive,is_media,is_source,is_script,detected_license_expression,detected_license_expression_spdx,percentage_of_license_text,files_count,dirs_count,size_count,scan_errors,license_expression,detection_log,license_match__score,start_line,end_line,license_match__matched_length,license_match__match_coverage,license_match__matcher,license_match__license_expression,license_match__rule_identifier,license_match__rule_relevance,license_match__rule_url,copyright,holder,email,url,package__type,package__namespace,package__name,package__version,package__qualifiers,package__subpath,package__primary_language,package__description,package__release_date,package__homepage_url,package__download_url,package__size,package__sha1,package__md5,package__sha256,package__sha512,package__bug_tracking_url,package__code_view_url,package__vcs_url,package__copyright,package__holder,package__declared_license_expression,package__declared_license_expression_spdx,package__license_detections,package__other_license_expression,package__other_license_expression_spdx,package__other_license_detections,package__extracted_license_statement,package__notice_text,package__file_references,package__extra_data,package__repository_homepage_url,package__repository_download_url,package__api_data_url,package__datasource_id,package__purl
json2csv.rb,file,json2csv.rb,json2csv,.rb,912,2023-03-30,1236469a06a2bacbdd8e172ad718482af5b0a936,1307c281e0b153202e291b217eab85d5,12ba215313981dbe810d9ed696b7cc753d97adfcc26eba1e13f941dc7506aa4e,text/x-script.python,"Python script, ASCII text executable",Ruby,False,True,False,False,True,True,apache-2.0,Apache-2.0,62.04,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,apache-2.0,,100.00,5,13,85,100.00,2-aho,apache-2.0,apache-2.0_7.RULE,100.00,https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_7.RULE,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,3,,,,,,,,Copyright (c) 2017 nexB Inc. and others,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,3,,,,,,,,,nexB Inc. and others,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4,4,,,,,,,,,,,http://nexb.com/,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4,4,,,,,,,,,,,https://github.com/nexB/scancode-toolkit/,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8,8,,,,,,,,,,,http://www.apache.org/licenses/LICENSE-2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
license,file,license,license,,679,2023-03-30,75c5490a718ddd45e40e0cc7ce0c756abc373123,b965a762efb9421cf1bf4405f336e278,a34098a43e5677495f59dff825a3f9bc0f2b0261d75feb2356919f4c3ce049ab,text/plain,ASCII text,,False,True,False,False,False,False,gpl-2.0-plus,GPL-2.0-or-later,100.0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
license,,,,,,,,,,,,,,,,,,,,,,,,,,gpl-2.0-plus,,100.00,1,12,113,100.00,1-hash,gpl-2.0-plus,gpl-2.0-plus_420.RULE,100.00,https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-2.0-plus_420.RULE,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
package.json,file,package.json,package,.json,2200,2023-03-30,918376afce796ef90eeda1d6695f2289c90491ac,1f66239a9b850c5e60a9382dbe2162d2,29f6068a1b6c7d06f115a5edc4ed8558edde42c6bbf0145ed77cf1108a0dd529,application/json,JSON data,,False,True,False,False,False,False,mit,MIT,45.72,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
package.json,,,,,,,,,,,,,,,,,,,,,,,,,,mit,,100.00,24,24,3,100.00,2-aho,mit,mit_27.RULE,100.00,https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/mit_27.RULE,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
package.json,,,,,,,,,,,,,,,,,,,,,,,,,,mit,,84.68,24,24,136,85.53,3-seq,mit,mit_823.RULE,99.00,https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/mit_823.RULE,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,24,24,,,,,,,,Copyright (c) 2012 LearnBoost <[email protected]>,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,24,24,,,,,,,,,LearnBoost,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12,12,,,,,,,,,,[email protected],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,16,16,,,,,,,,,,,https://github.com/visionmedia/node-cookie-signature.git,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,27,27,,,,,,,,,,,https://github.com/visionmedia/node-cookie-signature/issues,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,npm,,cookie-signature,v 1.0.3,,,JavaScript,Sign and unsign cookies,,,https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.3.tgz,,,,,,https://github.com/visionmedia/node-cookie-signature/issues,,git+https://github.com/visionmedia/node-cookie-signature.git,,,mit,MIT,"[{'license_expression': 'mit', 'matches': [{'score': 100.0, 'start_line': 24, 'end_line': 24, 'matched_length': 3, 'match_coverage': 100.0, 'matcher': '2-aho', 'license_expression': 'mit', 'rule_identifier': 'mit_27.RULE', 'rule_relevance': 100, 'rule_url': 'https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/mit_27.RULE'}, {'score': 84.68, 'start_line': 24, 'end_line': 24, 'matched_length': 136, 'match_coverage': 85.53, 'matcher': '3-seq', 'license_expression': 'mit', 'rule_identifier': 'mit_823.RULE', 'rule_relevance': 99, 'rule_url': 'https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/mit_823.RULE'}], 'identifier': 'mit-13195f55-8383-ff05-7a20-04ec94bbf4b1'}]",,,,,,,,https://www.npmjs.com/package/cookie-signature,https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.3.tgz,https://registry.npmjs.org/cookie-signature/1.0.3,npm_package_json,pkg:npm/[email protected]
6 changes: 3 additions & 3 deletions tests/formattedcode/data/csv/non-standard/identified.csv
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
path,type,name,base_name,extension,size,date,sha1,md5,mime_type,file_type,programming_language,is_binary,is_text,is_archive,is_media,is_source,is_script,files_count,dirs_count,size_count,scan_errors,package__download_url,package__sha1,package__md5,package__size,package__release_date,package__primary_language,package__description,package__copyright,package__declared_license_expression,package__declared_license_expression_spdx,package__license_detections,package__other_license_expression,package__other_license_expression_spdx,package__other_license_detections,package__extracted_license_statement,package__reference_notes,package__homepage_url,package__notice_text,package__components__name,package__components__version,package__components__owner_name,package__components__copyright,package__components__reference_notes,package__components__release_date,package__components__description,package__components__homepage_url,package__components__vcs_url,package__components__code_view_url,package__components__bug_tracking_url,package__components__primary_language,package__components__notice_text,package__components__notice_filename,package__components__notice_url,package__type,package__namespace,package__name,package__version,package__qualifiers,package__subpath
apache-log4j-extras-1.1.jar,file,apache-log4j-extras-1.1.jar,apache-log4j-extras-1.1,.jar,346729,2010-12-02,1e4b290f5c9ce5ea3a1a7352496c9c9d2a894800,acd91d528e26aa771198d930cf08e953,application/java-archive,Java archive data (JAR),,True,False,True,False,False,False,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
apache-log4j-extras-1.1.jar,,,,,,,,,,,,,,,,,,,,,,http://central.maven.org/maven2/log4j/apache-log4j-extras/1.1/apache-log4j-extras-1.1.jar,1e4b290f5c9ce5ea3a1a7352496c9c9d2a894800,acd91d528e26aa771198d930cf08e953,346729,,,,,,,,,,,,,,,Apache Log4j Extras,1.1,Apache Software Foundation,Copyright 2007 The Apache Software Foundation,,,Apache Extras for Apache log4j is a jar file full of additional functionality for log4j 1.2.x.,http://logging.apache.org/log4j/extras/,,,,Java,"Apache Extras Companion for log4j 1.2.
path,type,name,base_name,extension,size,date,sha1,md5,mime_type,file_type,programming_language,is_binary,is_text,is_archive,is_media,is_source,is_script,files_count,dirs_count,size_count,scan_errors,package__download_url,package__sha1,package__md5,package__size,package__release_date,package__primary_language,package__description,package__copyright,package__holder,package__declared_license_expression,package__declared_license_expression_spdx,package__license_detections,package__other_license_expression,package__other_license_expression_spdx,package__other_license_detections,package__extracted_license_statement,package__reference_notes,package__homepage_url,package__notice_text,package__components__name,package__components__version,package__components__owner_name,package__components__copyright,package__components__reference_notes,package__components__release_date,package__components__description,package__components__homepage_url,package__components__vcs_url,package__components__code_view_url,package__components__bug_tracking_url,package__components__primary_language,package__components__notice_text,package__components__notice_filename,package__components__notice_url,package__type,package__namespace,package__name,package__version,package__qualifiers,package__subpath
apache-log4j-extras-1.1.jar,file,apache-log4j-extras-1.1.jar,apache-log4j-extras-1.1,.jar,346729,2010-12-02,1e4b290f5c9ce5ea3a1a7352496c9c9d2a894800,acd91d528e26aa771198d930cf08e953,application/java-archive,Java archive data (JAR),,True,False,True,False,False,False,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
apache-log4j-extras-1.1.jar,,,,,,,,,,,,,,,,,,,,,,http://central.maven.org/maven2/log4j/apache-log4j-extras/1.1/apache-log4j-extras-1.1.jar,1e4b290f5c9ce5ea3a1a7352496c9c9d2a894800,acd91d528e26aa771198d930cf08e953,346729,,,,,,,,,,,,,,,,Apache Log4j Extras,1.1,Apache Software Foundation,Copyright 2007 The Apache Software Foundation,,,Apache Extras for Apache log4j is a jar file full of additional functionality for log4j 1.2.x.,http://logging.apache.org/log4j/extras/,,,,Java,"Apache Extras Companion for log4j 1.2.
Copyright 2007 The Apache Software Foundation

This product includes software developed at
Expand Down
Loading