1313distributed under the License is distributed on an "AS IS" BASIS,
1414WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1515See the License for the specific language governing permissions and
16- limitations
16+ limitations
1717"""
1818
19- # Asumptions for this script:
19+ # Asumptions for this script:
2020# 1. directory_name is scanned directory.
2121# Files are copied to this directory with full tree. As result, if we find
2222# license offender, we can have full path (just scrape directory_name). We do this
2323# magic because scancode allows to scan directories/one file.
2424# 2. SPDX and license text is a must for all code files
2525
26- import json
2726import argparse
28- import sys
29- import os .path
27+ import json
3028import logging
29+ import os .path
3130import re
31+ import sys
32+ from enum import Enum
33+
34+
35+ class ReturnCode (Enum ):
36+ """Return codes."""
37+
38+ SUCCESS = 0
39+ ERROR = - 1
3240
3341userlog = logging .getLogger ("scancode-evaluate" )
3442userlog .setLevel (logging .INFO )
3745userlog .addHandler (log_file_handler )
3846
3947MISSING_LICENSE_TEXT = "Missing license header"
40- MISSING_PERMISIVE_LICENSE_TEXT = "Non-permissive license"
48+ MISSING_PERMISSIVE_LICENSE_TEXT = "Non-permissive license"
4149MISSING_SPDX_TEXT = "Missing SPDX license identifier"
4250
43- def license_check (directory_name , file ):
44- """ Check licenses in the scancode json file for specified directory
51+
52+ def path_leaf (path ):
53+ """Return the leaf of a path."""
54+ head , tail = os .path .split (path )
55+ # Ensure the correct file name is returned if the file ends with a slash
56+ return tail or os .path .basename (head )
57+
58+ def has_permissive_text_in_scancode_output (scancode_output_data_file_licenses ):
59+ """Returns true if at list one license in the scancode output is permissive."""
60+ return any (
61+ scancode_output_data_file_license ['category' ] == 'Permissive'
62+ for scancode_output_data_file_license in scancode_output_data_file_licenses
63+ )
64+
65+ def has_spdx_text_in_scancode_output (scancode_output_data_file_licenses ):
66+ """Returns true if at least one license in the scancode output has the spdx identifier."""
67+ return any (
68+ 'spdx' in scancode_output_data_file_license ['matched_rule' ]['identifier' ]
69+ for scancode_output_data_file_license in scancode_output_data_file_licenses
70+ )
71+
72+ def has_spdx_text_in_analysed_file (scanned_file_content ):
73+ """Returns true if the file analysed by ScanCode contains SPDX identifier."""
74+ return bool (re .findall ("SPDX-License-Identifier:?" , scanned_file_content ))
75+
76+ def license_check (scancode_output ):
77+ """Check licenses in the scancode json file for specified directory.
4578
4679 This function does not verify if file exists, should be done prior the call.
4780
4881 Args:
49- directory_name - where scancode was run, used to scrape this from paths
5082 file - scancode json output file (output from scancode --license --json-pp)
5183
52- Returns:
84+ Returns:
5385 0 if nothing found
5486 >0 - count how many license isses found
55- -1 if any error in file licenses found
87+ ReturnCode.ERROR.value if any error in file licenses found
5688 """
5789
5890 offenders = []
5991 try :
60- # find all licenses in the files, must be licensed and permissive
61- with open ( file , 'r' ) as scancode_output :
62- results = json .load ( scancode_output )
63- except ValueError :
64- userlog . warning ( "JSON could not be decoded" )
65- return - 1
66-
67- try :
68- for file in results [ 'files' ]:
69- license_offender = {}
70- license_offender [ 'file' ] = file
71- # ignore directory, not relevant here
72- if license_offender [ 'file' ][ ' type' ] == 'directory ' :
92+ with open ( scancode_output , 'r' ) as read_file :
93+ scancode_output_data = json . load ( read_file )
94+ except json .JSONDecodeError as jex :
95+ userlog . warning ( "JSON could not be decoded, Invalid JSON in body: %s" , jex )
96+ return ReturnCode . ERROR . value
97+
98+ if 'files' not in scancode_output_data :
99+ userlog . warning ( "Missing `files` attribute in %s" % ( scancode_output ))
100+ return ReturnCode . ERROR . value
101+
102+ for scancode_output_data_file in scancode_output_data [ 'files' ]:
103+ try :
104+ if scancode_output_data_file [ ' type' ] != 'file ' :
73105 continue
74- if not license_offender ['file' ]['licenses' ]:
75- license_offender ['reason' ] = MISSING_LICENSE_TEXT
76- offenders .append (license_offender .copy ())
106+ except KeyError as e :
107+ userlog .warning ("Could not find %s attribute in %s" % (str (e ), scancode_output ))
108+ return ReturnCode .ERROR .value
109+
110+ try :
111+ if not scancode_output_data_file ['licenses' ]:
112+ scancode_output_data_file ['fail_reason' ] = MISSING_LICENSE_TEXT
113+ offenders .append (scancode_output_data_file )
114+ # check the next file in the scancode output
77115 continue
78-
79- found_spdx = spdx_check (offenders , license_offender )
80-
81- if not found_spdx :
116+ except KeyError as e :
117+ userlog .warning ("Could not find %s attribute in %s" % (str (e ), scancode_output ))
118+ return ReturnCode .ERROR .value
119+
120+ try :
121+ if not has_permissive_text_in_scancode_output (scancode_output_data_file ['licenses' ]):
122+ scancode_output_data_file ['fail_reason' ] = MISSING_PERMISSIVE_LICENSE_TEXT
123+ offenders .append (scancode_output_data_file )
124+ except KeyError as e :
125+ userlog .warning ("Could not find %s attribute in %s" % (str (e ), scancode_output ))
126+ return ReturnCode .ERROR .value
127+
128+ try :
129+ if not has_spdx_text_in_scancode_output (scancode_output_data_file ['licenses' ]):
130+ # Scancode does not recognize license notice in Python file headers.
131+ # Issue: https://github.com/nexB/scancode-toolkit/issues/1913
132+ # Therefore check if the file tested by ScanCode actually has a licence notice.
133+ file_path = os .path .abspath (scancode_output_data_file ['path' ])
82134 try :
83- # Issue reported here https://github.com/nexB/scancode-toolkit/issues/1913
84- # We verify here if SPDX is not really there as SDPX is part of the license text
85- # scancode has some problems detecting it properly
86- with open (os .path .join (os .path .abspath (license_offender ['file' ]['path' ])), 'r' ) as spdx_file_check :
87- filetext = spdx_file_check .read ()
88- matches = re .findall ("SPDX-License-Identifier:?" , filetext )
89- if matches :
90- continue
91- license_offender ['reason' ] = MISSING_SPDX_TEXT
92- offenders .append (license_offender .copy ())
135+ with open (file_path , 'r' ) as read_file :
136+ scanned_file_content = read_file .read ()
93137 except UnicodeDecodeError :
94- # not valid file for license check
138+ userlog .warning ("Unable to look for SPDX text in `{}`:" .format (file ))
139+ # Ignore files that cannot be decoded
140+ # check the next file in the scancode output
95141 continue
96- except KeyError :
97- userlog .warning ("Invalid scancode json file" )
98- return - 1
142+ if not has_spdx_text_in_analysed_file (scanned_file_content ):
143+ scancode_output_data_file ['fail_reason' ] = MISSING_SPDX_TEXT
144+ offenders .append (scancode_output_data_file )
145+ except KeyError as e :
146+ userlog .warning ("Could not find %s attribute in %s" % (str (e ), scancode_output ))
147+ return ReturnCode .ERROR .value
99148
100149 if offenders :
101150 userlog .warning ("Found files with missing license details, please review and fix" )
102151 for offender in offenders :
103- userlog .warning ("File: " + offender ['file' ][ ' path' ][ len ( directory_name ):] + " " + "reason: " + offender ['reason' ] )
152+ userlog .warning ("File: %s reason: %s" % ( path_leaf ( offender ['path' ]), offender ['fail_reason' ]) )
104153 return len (offenders )
105154
106-
107- def spdx_check (offenders , license_offender ):
108- """ Parse through list of licenses to determine whether licenses are permissive
109- @input list of offender, individual offender dict
110- @output none
111- """
112- found_spdx = False
113- # iterate through licenses, stop once permissive license has been found
114- for i in range (len (license_offender ['file' ]['licenses' ])):
115- # is any of the licenses permissive ?
116- if license_offender ['file' ]['licenses' ][i ]['category' ] == 'Permissive' :
117- # confirm that it has spdx license key
118- if license_offender ['file' ]['licenses' ][i ]['matched_rule' ]['identifier' ].find ("spdx" ) != - 1 :
119- found_spdx = True
120- # if no spdx found return anyway
121- return found_spdx
122- # otherwise file is missing permissive license
123- license_offender ['reason' ] = MISSING_PERMISIVE_LICENSE_TEXT
124- offenders .append (license_offender .copy ())
125-
126- # missing spdx and permissive license
127- return found_spdx
128-
129155def parse_args ():
130156 parser = argparse .ArgumentParser (
131157 description = "License check." )
@@ -135,15 +161,15 @@ def parse_args():
135161 help = 'Directory name where are files being checked' )
136162 return parser .parse_args ()
137163
138-
139164if __name__ == "__main__" :
165+
140166 args = parse_args ()
141167 if args .file and os .path .isfile (args .file ):
142- count = license_check (args .directory_name , args . file )
168+ count = license_check (args .file )
143169 if count == 0 :
144- sys .exit (0 )
170+ sys .exit (ReturnCode . SUCCESS . value )
145171 else :
146- sys .exit (- 1 )
172+ sys .exit (ReturnCode . ERROR . value )
147173 else :
148174 userlog .warning ("Could not find the scancode json file" )
149- sys .exit (- 1 )
175+ sys .exit (ReturnCode . ERROR . value )
0 commit comments