1313distributed under the License is distributed on an "AS IS" BASIS, 
1414WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
1515See the License for the specific language governing permissions and 
16- limitations 
16+ limitations   
1717""" 
1818
19- # Asumptions for this script: 
19+ # Asumptions for this script:   
2020# 1. directory_name is scanned directory. 
2121#  Files are copied to this directory with full tree. As result, if we find 
2222#  license offender, we can have full path (just scrape directory_name). We do this 
2929import  os .path 
3030import  logging 
3131import  re 
32+ import  ntpath 
3233
3334userlog  =  logging .getLogger ("scancode-evaluate" )
3435userlog .setLevel (logging .INFO )
4041MISSING_PERMISIVE_LICENSE_TEXT  =  "Non-permissive license" 
4142MISSING_SPDX_TEXT  =  "Missing SPDX license identifier" 
4243
43- def  license_check (directory_name , file ):
44-     """ Check licenses in the scancode json file for specified directory 
44+ class  FileDecodeError (Exception ):
45+     """An exception for a failure to decode a file being tested.""" 
46+ 
47+ def  path_leaf (path ):
48+     """Return the leaf of a path.""" 
49+     head , tail  =  ntpath .split (path )
50+     # Ensure the correct file name is returned if the file ends with a slash 
51+     return  tail  or  ntpath .basename (head )
52+ 
53+ def  has_permissive_text_in_scancode_output (scancode_output_data_file_licenses ):
54+     """Returns true if at list one license in the scancode output is permissive.""" 
55+     return  any (
56+         scancode_output_data_file_license ['category' ] ==  'Permissive' 
57+         for  scancode_output_data_file_license  in  scancode_output_data_file_licenses 
58+     )
59+ 
60+ def  has_spdx_text_in_scancode_output (scancode_output_data_file_licenses ):
61+     """Returns true if at least one license in the scancode output has the spdx identifier.""" 
62+     return  any (
63+         'spdx'  in  scancode_output_data_file_license ['matched_rule' ]['identifier' ]
64+         for  scancode_output_data_file_license  in  scancode_output_data_file_licenses 
65+     )
66+ 
67+ def  has_spdx_text_in_analysed_file (file ):
68+     """Returns true if the file analysed by ScanCode contains SPDX identifier.""" 
69+     try :
70+         with  open (file , 'r' ) as  read_file :
71+             filetext  =  read_file .read ()
72+     except  UnicodeDecodeError :
73+         raise  FileDecodeError (
74+             "Unable to look for SPDX text in `{}`:" .format (file )
75+         )
76+ 
77+     return  re .findall ("SPDX-License-Identifier:?" , filetext )
78+ 
79+ def  license_check (scancode_output ):
80+     """Check licenses in the scancode json file for specified directory. 
4581
4682    This function does not verify if file exists, should be done prior the call. 
4783
4884    Args: 
49-     directory_name - where scancode was run, used to scrape this from paths 
5085    file - scancode json output file (output from scancode --license --json-pp) 
5186
52-     Returns: 
87+     Returns:      
5388    0 if nothing found 
5489    >0 - count how many license isses found 
5590    -1 if any error in file licenses found 
5691    """ 
5792
5893    offenders  =  []
5994    try :
60-         # find all licenses in the files, must be licensed and permissive 
61-         with  open (file , 'r' ) as  scancode_output :
62-             results  =  json .load (scancode_output )
63-     except  ValueError :
64-         userlog .warning ("JSON could not be decoded" )
95+         with  open (scancode_output , 'r' ) as  read_file :
96+             scancode_output_data  =  json .load (read_file )
97+     except  json .JSONDecodeError  as  jex :
98+         userlog .warning ("JSON could not be decoded, Invalid JSON in body: %s" , jex )
6599        return  - 1 
66100
67-     try :
68-         for  file  in  results ['files' ]:
69-             license_offender  =  {}
70-             license_offender ['file' ] =  file 
71-             # ignore directory, not relevant here 
72-             if  license_offender ['file' ]['type' ] ==  'directory' :
101+     if  'files'  not  in   scancode_output_data :
102+         userlog .warning ("Missing `files` attribute in %s"  %  (scancode_output ))
103+         return  - 1 
104+ 
105+     for  scancode_output_data_file  in  scancode_output_data ['files' ]:
106+         try :
107+             if  scancode_output_data_file ['type' ] !=  'file' :
73108                continue 
74-             if  not  license_offender ['file' ]['licenses' ]:
75-                 license_offender ['reason' ] =  MISSING_LICENSE_TEXT 
76-                 offenders .append (license_offender .copy ())
109+         except  KeyError  as  e :
110+             userlog .warning ("Could not find %s attribute in %s"  %  (str (e ), scancode_output ))
111+             return  - 1 
112+ 
113+         try :
114+             if  not  scancode_output_data_file ['licenses' ]:
115+                 scancode_output_data_file ['fail_reason' ] =  MISSING_LICENSE_TEXT 
116+                 offenders .append (scancode_output_data_file )
117+                 # check the next file in the scancode output 
77118                continue 
78- 
79-             found_spdx  =  spdx_check (offenders , license_offender )
80- 
81-             if  not  found_spdx :
119+         except  KeyError  as  e :
120+             userlog .warning ("Could not find %s attribute in %s"  %  (str (e ), scancode_output ))
121+             return  - 1 
122+ 
123+         try :
124+             if  not  has_permissive_text_in_scancode_output (scancode_output_data_file ['licenses' ]):
125+                 scancode_output_data_file ['fail_reason' ] =  MISSING_PERMISIVE_LICENSE_TEXT 
126+                 offenders .append (scancode_output_data_file )
127+         except  KeyError  as  e :
128+             userlog .warning ("Could not find %s attribute in %s"  %  (str (e ), scancode_output ))
129+             return  - 1 
130+ 
131+         try :
132+             if  not  has_spdx_text_in_scancode_output (scancode_output_data_file ['licenses' ]):
133+                 # Scancode does not recognize license notice in Python file headers. 
134+                 # Issue: https://github.com/nexB/scancode-toolkit/issues/1913 
135+                 # Therefore check if the file tested by ScanCode actually has a licence notice. 
82136                try :
83-                     # Issue reported here https://github.com/nexB/scancode-toolkit/issues/1913 
84-                     # We verify here if SPDX is not really there as SDPX is part of the license text 
85-                     # scancode has some problems detecting it properly 
86-                     with  open (os .path .join (os .path .abspath (license_offender ['file' ]['path' ])), 'r' ) as  spdx_file_check :
87-                         filetext  =  spdx_file_check .read ()
88-                     matches  =  re .findall ("SPDX-License-Identifier:?" , filetext )
89-                     if  matches :
90-                         continue 
91-                     license_offender ['reason' ] =  MISSING_SPDX_TEXT 
92-                     offenders .append (license_offender .copy ())
93-                 except  UnicodeDecodeError :
94-                     # not valid file for license check 
137+                     file_path  =  os .path .abspath (scancode_output_data_file ['path' ])
138+                     if  not  has_spdx_text_in_analysed_file (file_path ):
139+                         scancode_output_data_file ['fail_reason' ] =  MISSING_SPDX_TEXT 
140+                         offenders .append (scancode_output_data_file )
141+                 except  FileDecodeError :
142+                     # Ignore files that cannot be decoded 
143+                     # check the next file in the scancode output 
95144                    continue 
96-     except  KeyError :
97-         userlog .warning ("Invalid scancode json file"  )
98-         return  - 1 
145+          except  KeyError   as   e :
146+              userlog .warning ("Could not find %s attribute in %s"    %  ( str ( e ),  scancode_output ) )
147+              return  - 1 
99148
100149    if  offenders :
101150        userlog .warning ("Found files with missing license details, please review and fix" )
102151        for  offender  in  offenders :
103-             userlog .warning ("File: "    +   offender ['file'  ][ ' path' ][ len ( directory_name ):]  +   " "   +   "reason: "   +   offender ['reason'  ] )
152+             userlog .warning ("File: %s reason: %s"    %  ( path_leaf ( offender ['path' ]),  offender ['fail_reason'  ]) )
104153    return  len (offenders )
105154
106- 
107- def  spdx_check (offenders , license_offender ):
108-         """ Parse through list of licenses to determine whether licenses are permissive 
109-                 @input list of offender, individual offender dict 
110-                 @output none 
111-         """ 
112-         found_spdx  =  False 
113-         # iterate through licenses, stop once permissive license has been found 
114-         for  i  in  range (len (license_offender ['file' ]['licenses' ])):
115-                 # is any of the licenses permissive ? 
116-                 if  license_offender ['file' ]['licenses' ][i ]['category' ] ==  'Permissive' :
117-                         # confirm that it has spdx license key 
118-                         if  license_offender ['file' ]['licenses' ][i ]['matched_rule' ]['identifier' ].find ("spdx" ) !=  - 1 :
119-                                 found_spdx  =  True 
120-                         # if no spdx found return anyway 
121-                         return  found_spdx 
122-         # otherwise file is missing permissive license 
123-         license_offender ['reason' ] =  MISSING_PERMISIVE_LICENSE_TEXT 
124-         offenders .append (license_offender .copy ())
125- 
126-         # missing spdx and permissive license 
127-         return  found_spdx 
128- 
129155def  parse_args ():
130156    parser  =  argparse .ArgumentParser (
131157        description = "License check." )
@@ -135,11 +161,11 @@ def parse_args():
135161                        help = 'Directory name where are files being checked' )
136162    return  parser .parse_args ()
137163
138- 
139164if  __name__  ==  "__main__" :
165+ 
140166    args  =  parse_args ()
141167    if  args .file  and  os .path .isfile (args .file ):
142-         count  =  license_check (args .directory_name ,  args . file )
168+         count  =  license_check (args .file )
143169        if  count  ==  0 :
144170            sys .exit (0 )
145171        else :
0 commit comments