Skip to content

Commit 7f3895b

Browse files
committed
refactor(scanoss): ScanOssResultParser to improve snippets findings
* Generate multiple snippetFindings for each detected line range * Remove duplicate licenses to optimize results * Remove identified snippets from the summary Signed-off-by: Agustin Isasmendi <[email protected]>
1 parent b39f4b8 commit 7f3895b

File tree

2 files changed

+77
-19
lines changed

2 files changed

+77
-19
lines changed

plugins/scanners/scanoss/src/main/kotlin/ScanOssResultParser.kt

Lines changed: 39 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,19 @@
1818
*/
1919

2020
package org.ossreviewtoolkit.plugins.scanners.scanoss
21-
21+
import com.scanoss.dto.LicenseDetails
2222
import com.scanoss.dto.ScanFileDetails
2323
import com.scanoss.dto.ScanFileResult
2424
import com.scanoss.dto.enums.MatchType
25+
import com.scanoss.dto.enums.StatusType
2526

27+
import java.lang.invoke.MethodHandles
2628
import java.time.Instant
2729

30+
import kotlin.math.min
31+
32+
import org.apache.logging.log4j.kotlin.loggerOf
33+
2834
import org.ossreviewtoolkit.downloader.VcsHost
2935
import org.ossreviewtoolkit.model.CopyrightFinding
3036
import org.ossreviewtoolkit.model.LicenseFinding
@@ -36,7 +42,8 @@ import org.ossreviewtoolkit.model.TextLocation
3642
import org.ossreviewtoolkit.utils.spdx.SpdxConstants
3743
import org.ossreviewtoolkit.utils.spdx.SpdxExpression
3844
import org.ossreviewtoolkit.utils.spdx.SpdxLicenseIdExpression
39-
import org.ossreviewtoolkit.utils.spdx.andOrNull
45+
46+
private val logger = loggerOf(MethodHandles.lookup().lookupClass())
4047

4148
/**
4249
* Generate a summary from the given SCANOSS [result], using [startTime], [endTime] as metadata. This variant can be
@@ -56,16 +63,23 @@ internal fun generateSummary(startTime: Instant, endTime: Instant, results: List
5663
}
5764

5865
MatchType.snippet -> {
59-
val file = requireNotNull(details.file)
60-
val lines = requireNotNull(details.lines)
61-
val sourceLocations = convertLines(file, lines)
62-
val snippets = getSnippets(details)
63-
64-
snippets.forEach { snippet ->
65-
sourceLocations.forEach { sourceLocation ->
66-
// TODO: Aggregate the snippet by source file location.
67-
snippetFindings += SnippetFinding(sourceLocation, setOf(snippet))
66+
val file = requireNotNull(result.filePath)
67+
if (details.status == StatusType.pending) {
68+
val lines = requireNotNull(details.lines)
69+
val sourceLocations = convertLines(file, lines)
70+
val snippets = getSnippets(details)
71+
72+
if (sourceLocations.size != snippets.size) {
73+
logger.warn("number of local line ranges does not match with oss lines on file '$file'")
6874
}
75+
76+
for (i in 0 until min(sourceLocations.size, snippets.size)) {
77+
snippetFindings += SnippetFinding(sourceLocations[i], setOf(snippets[i]))
78+
}
79+
} else {
80+
logger.warn("file '$file' is identified, not including on snippet findings")
81+
licenseFindings += getLicenseFindings(details)
82+
copyrightFindings += getCopyrightFindings(details)
6983
}
7084
}
7185

@@ -84,7 +98,6 @@ internal fun generateSummary(startTime: Instant, endTime: Instant, results: List
8498
licenseFindings = licenseFindings,
8599
copyrightFindings = copyrightFindings,
86100
snippetFindings = snippetFindings
87-
88101
)
89102
}
90103

@@ -138,16 +151,14 @@ private fun getCopyrightFindings(details: ScanFileDetails): List<CopyrightFindin
138151
* Get the snippet findings from the given [details]. If a snippet returned by ScanOSS contains several Purls,
139152
* several snippets are created in ORT each containing a single Purl.
140153
*/
141-
private fun getSnippets(details: ScanFileDetails): Set<Snippet> {
154+
private fun getSnippets(details: ScanFileDetails): List<Snippet> {
142155
val matched = requireNotNull(details.matched)
143156
val fileUrl = requireNotNull(details.fileUrl)
144157
val ossLines = requireNotNull(details.ossLines)
145158
val url = requireNotNull(details.url)
146159
val purls = requireNotNull(details.purls)
147160

148-
val licenses = details.licenseDetails.orEmpty().mapTo(mutableSetOf()) { license ->
149-
SpdxExpression.parse(license.name)
150-
}
161+
val license = getUniqueLicenseExpression(details.licenseDetails)
151162

152163
val score = matched.substringBeforeLast("%").toFloat()
153164
val locations = convertLines(fileUrl, ossLines)
@@ -157,11 +168,9 @@ private fun getSnippets(details: ScanFileDetails): Set<Snippet> {
157168

158169
val additionalData = mapOf("release_date" to details.releaseDate)
159170

160-
return buildSet {
171+
return buildList {
161172
purls.forEach { purl ->
162173
locations.forEach { snippetLocation ->
163-
val license = licenses.andOrNull()?.sorted() ?: SpdxLicenseIdExpression(SpdxConstants.NOASSERTION)
164-
165174
add(Snippet(score, snippetLocation, provenance, purl, license, additionalData))
166175
}
167176
}
@@ -181,3 +190,14 @@ private fun convertLines(file: String, lineRanges: String): List<TextLocation> =
181190
else -> throw IllegalArgumentException("Unsupported line range '$lineRange'.")
182191
}
183192
}
193+
194+
fun getUniqueLicenseExpression(licensesDetails: Array<LicenseDetails>): SpdxExpression {
195+
if (licensesDetails.isEmpty()) {
196+
return SpdxLicenseIdExpression(SpdxConstants.NOASSERTION)
197+
}
198+
199+
return licensesDetails
200+
.map { license -> SpdxExpression.parse(license.name) }
201+
.reduce { acc, expr -> acc.and(expr) }
202+
.simplify()
203+
}

plugins/scanners/scanoss/src/test/kotlin/ScanOssResultParserTest.kt

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
package org.ossreviewtoolkit.plugins.scanners.scanoss
2121

22+
import com.scanoss.dto.LicenseDetails
2223
import com.scanoss.utils.JsonUtils
2324

2425
import io.kotest.core.spec.style.WordSpec
@@ -28,6 +29,7 @@ import io.kotest.matchers.collections.shouldContain
2829
import io.kotest.matchers.collections.shouldContainExactly
2930
import io.kotest.matchers.collections.shouldHaveSize
3031
import io.kotest.matchers.should
32+
import io.kotest.matchers.shouldBe
3133

3234
import java.io.File
3335
import java.time.Instant
@@ -40,9 +42,45 @@ import org.ossreviewtoolkit.model.SnippetFinding
4042
import org.ossreviewtoolkit.model.TextLocation
4143
import org.ossreviewtoolkit.model.VcsInfo
4244
import org.ossreviewtoolkit.model.VcsType
45+
import org.ossreviewtoolkit.utils.spdx.SpdxConstants
4346
import org.ossreviewtoolkit.utils.spdx.SpdxExpression
47+
import org.ossreviewtoolkit.utils.spdx.SpdxLicenseIdExpression
4448

4549
class ScanOssResultParserTest : WordSpec({
50+
"getUniqueLicenseDetails()" should {
51+
52+
"deduplicate complex license expressions" {
53+
val uniqueLicenses = getUniqueLicenseExpression(
54+
arrayOf(
55+
LicenseDetails.builder().name("MIT").build(),
56+
LicenseDetails.builder().name("MIT").build(),
57+
LicenseDetails.builder().name("GPL-2.0-only").build(),
58+
LicenseDetails.builder().name("GPL-2.0-only WITH Linux-syscall-note").build(),
59+
LicenseDetails.builder().name("GPL-2.0-only AND MIT").build()
60+
)
61+
)
62+
63+
val decomposed = uniqueLicenses.decompose().toList()
64+
65+
val expressionStrings = decomposed.map { it.toString() }
66+
67+
// Check that each license appears exactly once
68+
expressionStrings.count { it == "MIT" } shouldBe 1
69+
expressionStrings.count { it == "GPL-2.0-only" } shouldBe 1
70+
expressionStrings.count { it == "GPL-2.0-only WITH Linux-syscall-note" } shouldBe 1
71+
72+
// Ensure no unexpected elements
73+
expressionStrings.size shouldBe 3
74+
}
75+
76+
"handle empty license list" {
77+
val emptyLicenses = getUniqueLicenseExpression(arrayOf())
78+
79+
// Verify empty license list returns NOASSERTION
80+
emptyLicenses shouldBe SpdxLicenseIdExpression(SpdxConstants.NOASSERTION)
81+
}
82+
}
83+
4684
"generateSummary()" should {
4785
"properly summarize JUnit 4.12 findings" {
4886
val results = File("src/test/assets/scanoss-junit-4.12.json").readText().let {

0 commit comments

Comments
 (0)