Skip to content

Commit fb68ace

Browse files
committed
Add option for printing matched text
1 parent 1b0a0f4 commit fb68ace

File tree

5 files changed

+110
-30
lines changed

5 files changed

+110
-30
lines changed
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
# Copyright (c) 2021 LG Electronics Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
import logging
6+
import fosslight_util.constant as constant
7+
8+
logger = logging.getLogger(constant.LOGGER_NAME)
9+
HEADER = ['No', 'Category', 'License',
10+
'Matched Text', 'File Count', 'Files']
11+
12+
class MatchedLicense:
13+
license = ""
14+
files = []
15+
category = ""
16+
matched_text = ""
17+
18+
def __init__(self, lic, category, text, file):
19+
self.files = [file]
20+
self.license = lic
21+
self.category = category
22+
self.matched_text = text
23+
24+
def __del__(self):
25+
pass
26+
27+
def set_license(self, value):
28+
self.license = value
29+
30+
def set_files(self, value):
31+
self.files.append(value)
32+
33+
def set_category(self, value):
34+
self.category = value
35+
36+
def set_matched_text(self, value):
37+
self.matched_text = value
38+
39+
def get_row_to_print(self):
40+
print_rows = [self.category, self.license, self.matched_text, str(len(self.files)), ','.join(self.files)]
41+
return print_rows
42+
43+
44+
def get_license_list_to_print(license_list):
45+
license_items = license_list.values()
46+
license_items = sorted(license_items, key=lambda row: (row.category, row.license))
47+
license_rows = [lic_item.get_row_to_print() for lic_item in license_items]
48+
license_rows.insert(0, HEADER)
49+
return license_rows

src/fosslight_source/_parsing_scancode_file_item.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import logging
88
import re
99
import fosslight_util.constant as constant
10+
from ._license_matched import MatchedLicense
1011

1112
logger = logging.getLogger(constant.LOGGER_NAME)
1213
_replace_word = ["-only", "-old-style", "-or-later", "licenseref-scancode-"]
@@ -116,10 +117,11 @@ def get_error_from_header(header_item):
116117
return has_error, str_error
117118

118119

119-
def parsing_file_item(scancode_file_list, has_error):
120+
def parsing_file_item(scancode_file_list, has_error, need_matched_license=False):
120121

121122
rc = True
122123
scancode_file_item = []
124+
license_list = {} # Key :[license]+[matched_text], value: MatchedLicense()
123125
msg = "TOTAL FILE COUNT: " + str(len(scancode_file_list)) + "\n"
124126

125127
prev_dir = ""
@@ -199,6 +201,18 @@ def parsing_file_item(scancode_file_list, has_error):
199201
license_value = license_value.replace(word, "")
200202
license_detected.append(license_value)
201203

204+
# Add matched licenses
205+
if need_matched_license and "category" in lic_item:
206+
lic_category = lic_item["category"]
207+
if "matched_text" in lic_item:
208+
lic_matched_text = lic_item["matched_text"]
209+
lic_matched_key = license_value + lic_matched_text
210+
if lic_matched_key in license_list:
211+
license_list[lic_matched_key].set_files(file_path)
212+
else:
213+
lic_info = MatchedLicense(license_value, lic_category, lic_matched_text, file_path)
214+
license_list[lic_matched_key] = lic_info
215+
202216
matched_rule = lic_item["matched_rule"]
203217
if matched_rule["is_license_text"]:
204218
result_item.set_is_license_text(True)
@@ -221,4 +235,4 @@ def parsing_file_item(scancode_file_list, has_error):
221235
rc = False
222236
logger.debug(msg)
223237

224-
return rc, scancode_file_item, msg.strip()
238+
return rc, scancode_file_item, msg.strip(), license_list

src/fosslight_source/convert_scancode.py

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,77 +15,86 @@
1515
from ._parsing_scancode_file_item import parsing_file_item, get_error_from_header
1616
from fosslight_util.write_excel import write_excel_and_csv
1717
from ._help import print_help_msg_convert
18+
from ._license_matched import get_license_list_to_print
1819

1920
logger = logging.getLogger(constant.LOGGER_NAME)
2021
_PKG_NAME = "fosslight_source"
2122

2223

23-
def convert_json_to_excel(scancode_json, excel_name, _result_log):
24+
def convert_json_to_excel(scancode_json, excel_name, result_log, need_license=False):
25+
sheet_license_prefix = "matched_text"
26+
sheet_SRC_prefix = "SRC"
2427
file_list = []
28+
lic_list = {}
2529
msg = ""
2630
success = True
2731

2832
try:
2933
sheet_list = {}
3034
if os.path.isfile(scancode_json):
31-
file_list = get_detected_licenses_from_scancode(
32-
scancode_json)
35+
file_list, lic_list = get_detected_licenses_from_scancode(
36+
scancode_json, need_license)
3337
if len(file_list) > 0:
3438
file_list = sorted(
3539
file_list, key=lambda row: (''.join(row.licenses)))
36-
sheet_list["SRC"] = [scan_item.get_row_to_print() for scan_item in file_list]
40+
sheet_list[sheet_SRC_prefix] = [scan_item.get_row_to_print() for scan_item in file_list]
41+
if need_license:
42+
sheet_list[sheet_license_prefix] = get_license_list_to_print(lic_list)
3743
elif os.path.isdir(scancode_json):
3844
for root, dirs, files in os.walk(scancode_json):
3945
for file in files:
4046
if file.endswith(".json"):
4147
try:
4248
result_file = os.path.join(root, file)
43-
file_list = get_detected_licenses_from_scancode(
44-
result_file)
49+
file_list, lic_list = get_detected_licenses_from_scancode(
50+
result_file, need_license)
4551
if len(file_list) > 0:
4652
file_name = os.path.basename(file)
4753
file_list = sorted(
4854
file_list, key=lambda row: (''.join(row.licenses)))
49-
sheet_list["SRC_" + file_name] = [scan_item.get_row_to_print() for scan_item in file_list]
55+
sheet_list[sheet_SRC_prefix +"_" + file_name] = [scan_item.get_row_to_print() for scan_item in file_list]
56+
if need_license:
57+
sheet_list[sheet_license_prefix+"_" + file_name] = get_license_list_to_print(lic_list)
5058
except Exception as ex:
5159
logger.warning("Error parsing "+file+":" + str(ex))
5260

5361
success_to_write, writing_msg = write_excel_and_csv(excel_name, sheet_list)
5462
logger.info("Writing excel :" + str(success_to_write) + " " + writing_msg)
5563
if success_to_write:
56-
_result_log["FOSSLight Report"] = excel_name + ".xlsx"
64+
result_log["FOSSLight Report"] = excel_name + ".xlsx"
5765

5866
except Exception as ex:
5967
success = False
6068
logger.warning(str(ex))
6169

6270
scan_result_msg = str(success) if msg == "" else str(success) + "," + msg
63-
_result_log["Scan Result"] = scan_result_msg
71+
result_log["Scan Result"] = scan_result_msg
6472

6573
try:
66-
_str_final_result_log = yaml.safe_dump(_result_log, allow_unicode=True, sort_keys=True)
74+
_str_final_result_log = yaml.safe_dump(result_log, allow_unicode=True, sort_keys=True)
6775
logger.info(_str_final_result_log)
6876
except Exception as ex:
6977
logger.warning("Failed to print result log.: " + str(ex))
7078

7179
return file_list
7280

7381

74-
def get_detected_licenses_from_scancode(scancode_json_file):
82+
def get_detected_licenses_from_scancode(scancode_json_file, need_license):
7583
file_list = []
84+
license_list = {}
7685
try:
7786
logger.info("Start parsing " + scancode_json_file)
7887
with open(scancode_json_file, "r") as st_json:
7988
st_python = json.load(st_json)
8089
has_error, str_error = get_error_from_header(st_python["headers"])
81-
rc, file_list, msg = parsing_file_item(st_python["files"], has_error)
90+
rc, file_list, msg, license_list = parsing_file_item(st_python["files"], has_error, need_license)
8291
logger.info("|---"+msg)
8392
if has_error:
8493
logger.info("|---Scan error:"+str_error)
8594
except Exception as error:
8695
logger.warning("Parsing " + scancode_json_file + ":" + str(error))
8796
logger.info("|---Number of files detected: " + str(len(file_list)))
88-
return file_list
97+
return file_list, license_list
8998

9099

91100
def main():
@@ -95,16 +104,19 @@ def main():
95104
path_to_find_bin = os.getcwd()
96105
start_time = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
97106
output_file_name = ""
107+
print_matched_text = False
98108

99109
try:
100-
opts, args = getopt.getopt(argv, 'hp:o:')
110+
opts, args = getopt.getopt(argv, 'hmp:o:')
101111
for opt, arg in opts:
102112
if opt == "-h":
103113
print_help_msg_convert()
104114
elif opt == "-p":
105115
path_to_find_bin = arg
106116
elif opt == "-o":
107117
output_file_name = arg
118+
elif opt == "-m":
119+
print_matched_text = True
108120
except Exception:
109121
print_help_msg_convert()
110122

@@ -117,7 +129,7 @@ def main():
117129

118130
logger, _result_log = init_log(os.path.join(output_dir, "fosslight_src_log_" + start_time + ".txt"),
119131
True, logging.INFO, logging.DEBUG, _PKG_NAME)
120-
convert_json_to_excel(path_to_find_bin, oss_report_name, _result_log)
132+
convert_json_to_excel(path_to_find_bin, oss_report_name, _result_log, print_matched_text)
121133

122134

123135
if __name__ == '__main__':

src/fosslight_source/run_scancode.py

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from ._parsing_scancode_file_item import get_error_from_header
2121
from fosslight_util.write_excel import write_excel_and_csv
2222
from ._help import print_help_msg_source
23+
from ._license_matched import get_license_list_to_print
2324

2425
logger = logging.getLogger(constant.LOGGER_NAME)
2526
warnings.filterwarnings("ignore", category=FutureWarning)
@@ -28,33 +29,35 @@
2829

2930
def main():
3031
argv = sys.argv[1:]
31-
_path_to_scan = ""
32-
_write_json_file = False
33-
_output_file = ""
32+
path_to_scan = ""
33+
write_json_file = False
34+
output_file = ""
35+
print_matched_text = False
3436

3537
try:
36-
opts, args = getopt.getopt(argv, 'hjp:o:')
38+
opts, args = getopt.getopt(argv, 'hmjp:o:')
3739
for opt, arg in opts:
3840
if opt == "-h":
3941
print_help_msg_source()
4042
elif opt == "-p":
41-
_path_to_scan = arg
43+
path_to_scan = arg
4244
elif opt == "-j":
43-
_write_json_file = True
45+
write_json_file = True
4446
elif opt == "-o":
45-
_output_file = arg
46-
47+
output_file = arg
48+
elif opt == "-m":
49+
print_matched_text = True
4750
except Exception:
4851
print_help_msg_source()
4952

5053
timer = TimerThread()
5154
timer.setDaemon(True)
5255
timer.start()
53-
run_scan(_path_to_scan, _output_file, _write_json_file, -1, False)
56+
run_scan(path_to_scan, output_file, write_json_file, -1, False, print_matched_text)
5457

5558

5659
def run_scan(path_to_scan, output_file_name="",
57-
_write_json_file=False, num_cores=-1, return_results=False):
60+
_write_json_file=False, num_cores=-1, return_results=False, need_license=False):
5861
global logger
5962

6063
success = True
@@ -111,14 +114,16 @@ def run_scan(path_to_scan, output_file_name="",
111114
_result_log["Error_files"] = error_msg
112115
msg = "Failed to analyze :" + error_msg
113116
if "files" in results:
114-
rc, result_list, parsing_msg = parsing_file_item(results["files"], has_error)
117+
rc, result_list, parsing_msg, license_list = parsing_file_item(results["files"], has_error, need_license)
115118
_result_log["Parsing Log"] = parsing_msg
116119
if rc:
117120
if not success:
118121
success = True
119122
result_list = sorted(
120123
result_list, key=lambda row: (''.join(row.licenses)))
121124
sheet_list["SRC"] = [scan_item.get_row_to_print() for scan_item in result_list]
125+
if need_license:
126+
sheet_list["matched_text"] = get_license_list_to_print(license_list)
122127

123128
success_to_write, writing_msg = write_excel_and_csv(
124129
output_file, sheet_list)

tox.ini

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ filterwarnings = ignore::DeprecationWarning
2020

2121
[testenv:test_run]
2222
commands =
23-
fosslight_source -p tests/test_files -j -o test_scan/scan_result
24-
cat test_scan/scan_result.csv
23+
fosslight_source -p tests/test_files -j -o test_scan/scan_result -m
24+
cat test_scan/scan_result_SRC.csv
2525
fosslight_convert -p tests/json_result/scan_has_error.json -o test_convert/convert_result2
2626
fosslight_convert -p test_scan/scan_result.json -o test_convert/convert_result
2727
cat test_convert/convert_result_SRC.csv

0 commit comments

Comments
 (0)