Skip to content

Commit 89ff2fc

Browse files
authored
Add option for printing matched text
2 parents d8a7f07 + 00fd800 commit 89ff2fc

File tree

5 files changed

+122
-33
lines changed

5 files changed

+122
-33
lines changed
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
# Copyright (c) 2021 LG Electronics Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
import logging
6+
import fosslight_util.constant as constant
7+
8+
logger = logging.getLogger(constant.LOGGER_NAME)
9+
HEADER = ['No', 'Category', 'License',
10+
'Matched Text', 'File Count', 'Files']
11+
LOW_PRIORITY = ['Permissive', 'Public Domain']
12+
13+
14+
class MatchedLicense:
15+
license = ""
16+
files = []
17+
category = ""
18+
matched_text = ""
19+
priority = 0
20+
21+
def __init__(self, lic, category, text, file):
22+
self.files = [file]
23+
self.license = lic
24+
self.matched_text = text
25+
self.set_category(category)
26+
27+
def __del__(self):
28+
pass
29+
30+
def set_license(self, value):
31+
self.license = value
32+
33+
def set_files(self, value):
34+
self.files.append(value)
35+
36+
def set_category(self, value):
37+
self.category = value
38+
if value in LOW_PRIORITY:
39+
self.priority = 1
40+
else:
41+
self.priority = 0
42+
43+
def set_matched_text(self, value):
44+
self.matched_text = value
45+
46+
def get_row_to_print(self):
47+
print_rows = [self.category, self.license, self.matched_text, str(len(self.files)), ','.join(self.files)]
48+
return print_rows
49+
50+
51+
def get_license_list_to_print(license_list):
52+
license_items = license_list.values()
53+
license_items = sorted(license_items, key=lambda row: (row.priority, row.category, row.license))
54+
license_rows = [lic_item.get_row_to_print() for lic_item in license_items]
55+
license_rows.insert(0, HEADER)
56+
return license_rows

src/fosslight_source/_parsing_scancode_file_item.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import logging
88
import re
99
import fosslight_util.constant as constant
10+
from ._license_matched import MatchedLicense
1011

1112
logger = logging.getLogger(constant.LOGGER_NAME)
1213
_replace_word = ["-only", "-old-style", "-or-later", "licenseref-scancode-"]
@@ -116,10 +117,11 @@ def get_error_from_header(header_item):
116117
return has_error, str_error
117118

118119

119-
def parsing_file_item(scancode_file_list, has_error):
120+
def parsing_file_item(scancode_file_list, has_error, need_matched_license=False):
120121

121122
rc = True
122123
scancode_file_item = []
124+
license_list = {} # Key :[license]+[matched_text], value: MatchedLicense()
123125
msg = "TOTAL FILE COUNT: " + str(len(scancode_file_list)) + "\n"
124126

125127
prev_dir = ""
@@ -199,6 +201,18 @@ def parsing_file_item(scancode_file_list, has_error):
199201
license_value = license_value.replace(word, "")
200202
license_detected.append(license_value)
201203

204+
# Add matched licenses
205+
if need_matched_license and "category" in lic_item:
206+
lic_category = lic_item["category"]
207+
if "matched_text" in lic_item:
208+
lic_matched_text = lic_item["matched_text"]
209+
lic_matched_key = license_value + lic_matched_text
210+
if lic_matched_key in license_list:
211+
license_list[lic_matched_key].set_files(file_path)
212+
else:
213+
lic_info = MatchedLicense(license_value, lic_category, lic_matched_text, file_path)
214+
license_list[lic_matched_key] = lic_info
215+
202216
matched_rule = lic_item["matched_rule"]
203217
if matched_rule["is_license_text"]:
204218
result_item.set_is_license_text(True)
@@ -221,4 +235,4 @@ def parsing_file_item(scancode_file_list, has_error):
221235
rc = False
222236
logger.debug(msg)
223237

224-
return rc, scancode_file_item, msg.strip()
238+
return rc, scancode_file_item, msg.strip(), license_list

src/fosslight_source/convert_scancode.py

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,77 +15,88 @@
1515
from ._parsing_scancode_file_item import parsing_file_item, get_error_from_header
1616
from fosslight_util.write_excel import write_excel_and_csv
1717
from ._help import print_help_msg_convert
18+
from ._license_matched import get_license_list_to_print
1819

1920
logger = logging.getLogger(constant.LOGGER_NAME)
2021
_PKG_NAME = "fosslight_source"
2122

2223

23-
def convert_json_to_excel(scancode_json, excel_name, _result_log):
24+
def convert_json_to_excel(scancode_json, excel_name, result_log, need_license=False):
25+
sheet_license_prefix = "matched_text"
26+
sheet_SRC_prefix = "SRC"
2427
file_list = []
28+
lic_list = {}
2529
msg = ""
2630
success = True
2731

2832
try:
2933
sheet_list = {}
3034
if os.path.isfile(scancode_json):
31-
file_list = get_detected_licenses_from_scancode(
32-
scancode_json)
35+
file_list, lic_list = get_detected_licenses_from_scancode(
36+
scancode_json, need_license)
3337
if len(file_list) > 0:
3438
file_list = sorted(
3539
file_list, key=lambda row: (''.join(row.licenses)))
36-
sheet_list["SRC"] = [scan_item.get_row_to_print() for scan_item in file_list]
40+
sheet_list[sheet_SRC_prefix] = [scan_item.get_row_to_print() for scan_item in file_list]
41+
if need_license:
42+
sheet_list[sheet_license_prefix] = get_license_list_to_print(lic_list)
3743
elif os.path.isdir(scancode_json):
3844
for root, dirs, files in os.walk(scancode_json):
3945
for file in files:
4046
if file.endswith(".json"):
4147
try:
4248
result_file = os.path.join(root, file)
43-
file_list = get_detected_licenses_from_scancode(
44-
result_file)
49+
file_list, lic_list = get_detected_licenses_from_scancode(
50+
result_file, need_license)
4551
if len(file_list) > 0:
4652
file_name = os.path.basename(file)
4753
file_list = sorted(
4854
file_list, key=lambda row: (''.join(row.licenses)))
49-
sheet_list["SRC_" + file_name] = [scan_item.get_row_to_print() for scan_item in file_list]
55+
sheet_name = sheet_SRC_prefix + "_" + file_name
56+
sheet_list[sheet_name] = [scan_item.get_row_to_print() for scan_item in file_list]
57+
if need_license:
58+
lic_sheet_name = sheet_license_prefix + "_" + file_name
59+
sheet_list[lic_sheet_name] = get_license_list_to_print(lic_list)
5060
except Exception as ex:
5161
logger.warning("Error parsing "+file+":" + str(ex))
5262

5363
success_to_write, writing_msg = write_excel_and_csv(excel_name, sheet_list)
5464
logger.info("Writing excel :" + str(success_to_write) + " " + writing_msg)
5565
if success_to_write:
56-
_result_log["FOSSLight Report"] = excel_name + ".xlsx"
66+
result_log["FOSSLight Report"] = excel_name + ".xlsx"
5767

5868
except Exception as ex:
5969
success = False
6070
logger.warning(str(ex))
6171

6272
scan_result_msg = str(success) if msg == "" else str(success) + "," + msg
63-
_result_log["Scan Result"] = scan_result_msg
73+
result_log["Scan Result"] = scan_result_msg
6474

6575
try:
66-
_str_final_result_log = yaml.safe_dump(_result_log, allow_unicode=True, sort_keys=True)
76+
_str_final_result_log = yaml.safe_dump(result_log, allow_unicode=True, sort_keys=True)
6777
logger.info(_str_final_result_log)
6878
except Exception as ex:
6979
logger.warning("Failed to print result log.: " + str(ex))
7080

7181
return file_list
7282

7383

74-
def get_detected_licenses_from_scancode(scancode_json_file):
84+
def get_detected_licenses_from_scancode(scancode_json_file, need_license):
7585
file_list = []
86+
license_list = {}
7687
try:
7788
logger.info("Start parsing " + scancode_json_file)
7889
with open(scancode_json_file, "r") as st_json:
7990
st_python = json.load(st_json)
8091
has_error, str_error = get_error_from_header(st_python["headers"])
81-
rc, file_list, msg = parsing_file_item(st_python["files"], has_error)
92+
rc, file_list, msg, license_list = parsing_file_item(st_python["files"], has_error, need_license)
8293
logger.info("|---"+msg)
8394
if has_error:
8495
logger.info("|---Scan error:"+str_error)
8596
except Exception as error:
8697
logger.warning("Parsing " + scancode_json_file + ":" + str(error))
8798
logger.info("|---Number of files detected: " + str(len(file_list)))
88-
return file_list
99+
return file_list, license_list
89100

90101

91102
def main():
@@ -95,16 +106,19 @@ def main():
95106
path_to_find_bin = os.getcwd()
96107
start_time = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
97108
output_file_name = ""
109+
print_matched_text = False
98110

99111
try:
100-
opts, args = getopt.getopt(argv, 'hp:o:')
112+
opts, args = getopt.getopt(argv, 'hmp:o:')
101113
for opt, arg in opts:
102114
if opt == "-h":
103115
print_help_msg_convert()
104116
elif opt == "-p":
105117
path_to_find_bin = arg
106118
elif opt == "-o":
107119
output_file_name = arg
120+
elif opt == "-m":
121+
print_matched_text = True
108122
except Exception:
109123
print_help_msg_convert()
110124

@@ -117,7 +131,7 @@ def main():
117131

118132
logger, _result_log = init_log(os.path.join(output_dir, "fosslight_src_log_" + start_time + ".txt"),
119133
True, logging.INFO, logging.DEBUG, _PKG_NAME)
120-
convert_json_to_excel(path_to_find_bin, oss_report_name, _result_log)
134+
convert_json_to_excel(path_to_find_bin, oss_report_name, _result_log, print_matched_text)
121135

122136

123137
if __name__ == '__main__':

src/fosslight_source/run_scancode.py

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from ._parsing_scancode_file_item import get_error_from_header
2121
from fosslight_util.write_excel import write_excel_and_csv
2222
from ._help import print_help_msg_source
23+
from ._license_matched import get_license_list_to_print
2324

2425
logger = logging.getLogger(constant.LOGGER_NAME)
2526
warnings.filterwarnings("ignore", category=FutureWarning)
@@ -28,33 +29,35 @@
2829

2930
def main():
3031
argv = sys.argv[1:]
31-
_path_to_scan = ""
32-
_write_json_file = False
33-
_output_file = ""
32+
path_to_scan = ""
33+
write_json_file = False
34+
output_file = ""
35+
print_matched_text = False
3436

3537
try:
36-
opts, args = getopt.getopt(argv, 'hjp:o:')
38+
opts, args = getopt.getopt(argv, 'hmjp:o:')
3739
for opt, arg in opts:
3840
if opt == "-h":
3941
print_help_msg_source()
4042
elif opt == "-p":
41-
_path_to_scan = arg
43+
path_to_scan = arg
4244
elif opt == "-j":
43-
_write_json_file = True
45+
write_json_file = True
4446
elif opt == "-o":
45-
_output_file = arg
46-
47+
output_file = arg
48+
elif opt == "-m":
49+
print_matched_text = True
4750
except Exception:
4851
print_help_msg_source()
4952

5053
timer = TimerThread()
5154
timer.setDaemon(True)
5255
timer.start()
53-
run_scan(_path_to_scan, _output_file, _write_json_file, -1, False)
56+
run_scan(path_to_scan, output_file, write_json_file, -1, False, print_matched_text)
5457

5558

5659
def run_scan(path_to_scan, output_file_name="",
57-
_write_json_file=False, num_cores=-1, return_results=False):
60+
_write_json_file=False, num_cores=-1, return_results=False, need_license=False):
5861
global logger
5962

6063
success = True
@@ -111,14 +114,16 @@ def run_scan(path_to_scan, output_file_name="",
111114
_result_log["Error_files"] = error_msg
112115
msg = "Failed to analyze :" + error_msg
113116
if "files" in results:
114-
rc, result_list, parsing_msg = parsing_file_item(results["files"], has_error)
117+
rc, result_list, parsing_msg, license_list = parsing_file_item(results["files"], has_error, need_license)
115118
_result_log["Parsing Log"] = parsing_msg
116119
if rc:
117120
if not success:
118121
success = True
119122
result_list = sorted(
120123
result_list, key=lambda row: (''.join(row.licenses)))
121124
sheet_list["SRC"] = [scan_item.get_row_to_print() for scan_item in result_list]
125+
if need_license:
126+
sheet_list["matched_text"] = get_license_list_to_print(license_list)
122127

123128
success_to_write, writing_msg = write_excel_and_csv(
124129
output_file, sheet_list)

tox.ini

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@ filterwarnings = ignore::DeprecationWarning
2020

2121
[testenv:test_run]
2222
commands =
23-
fosslight_source -p tests/test_files -j -o test_scan/scan_result
24-
cat test_scan/scan_result.csv
23+
fosslight_source -p tests/test_files -j -o test_scan/scan_result -m
24+
cat test_scan/scan_result_SRC.csv
2525
fosslight_convert -p tests/json_result/scan_has_error.json -o test_convert/convert_result2
26-
fosslight_convert -p test_scan/scan_result.json -o test_convert/convert_result
26+
fosslight_convert -p test_scan/scan_result.json -o test_convert/convert_result -m
2727
cat test_convert/convert_result_SRC.csv
2828
python tests/cli_test.py
2929

@@ -34,10 +34,10 @@ deps =
3434
commands =
3535
fosslight_source -h
3636
fosslight_convert -h
37-
fosslight_source -p tests/test_files -j -o test_scan/scan_result
37+
fosslight_source -p tests/test_files -j -o test_scan/scan_result -m
3838
cat test_scan/scan_result_SRC.csv
3939
fosslight_convert -p tests/json_result/scan_has_error.json -o test_convert/convert_result2
40-
fosslight_convert -p test_scan/scan_result.json -o test_convert/convert_result
40+
fosslight_convert -p test_scan/scan_result.json -o test_convert/convert_result -m
4141
cat test_convert/convert_result_SRC.csv
4242
python tests/cli_test.py
4343
pytest -v --flake8

0 commit comments

Comments
 (0)