Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ import org.ossreviewtoolkit.utils.common.Options
import org.ossreviewtoolkit.utils.common.collectMessages
import org.ossreviewtoolkit.utils.ort.OkHttpClientHelper
import org.ossreviewtoolkit.utils.ort.showStackTrace
import org.ossreviewtoolkit.utils.spdx.SpdxExpression
import org.ossreviewtoolkit.utils.spdx.toSpdx
import org.ossreviewtoolkit.utils.spdx.SpdxExpression.Strictness
import org.ossreviewtoolkit.utils.spdx.toSpdxOrNull

import retrofit2.HttpException

Expand Down Expand Up @@ -149,12 +149,9 @@ class ClearlyDefinedPackageCurationProvider(
filteredCurations.forEach inner@{ (coordinates, curation) ->
val pkgId = coordinatesToIds[coordinates] ?: return@inner

val declaredLicenseParsed = curation.licensed?.declared?.let { declaredLicense ->
// Only take curations of good quality (i.e. those not using deprecated identifiers) and in
// particular none that contain "OTHER" as a license, also see
// https://github.com/clearlydefined/curated-data/issues/7836.
runCatching { declaredLicense.toSpdx(SpdxExpression.Strictness.ALLOW_CURRENT) }.getOrNull()
}
// Only take curations of good quality (i.e. those not using deprecated identifiers) and in particular none
// that contain "OTHER" as a license, also see https://github.com/clearlydefined/curated-data/issues/7836.
val declaredLicenseParsed = curation.licensed?.declared?.toSpdxOrNull(Strictness.ALLOW_CURRENT)

val sourceLocation = curation.described?.sourceLocation?.toArtifactOrVcs()

Expand Down
16 changes: 3 additions & 13 deletions utils/ort/src/main/kotlin/DeclaredLicenseProcessor.kt
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,14 @@ import com.fasterxml.jackson.annotation.JsonInclude
import com.fasterxml.jackson.annotation.JsonPropertyOrder
import com.fasterxml.jackson.databind.annotation.JsonSerialize

import org.apache.logging.log4j.kotlin.logger

import org.ossreviewtoolkit.utils.common.StringSortedSetConverter
import org.ossreviewtoolkit.utils.common.collectMessages
import org.ossreviewtoolkit.utils.common.unquote
import org.ossreviewtoolkit.utils.spdx.SpdxCompoundExpression
import org.ossreviewtoolkit.utils.spdx.SpdxConstants
import org.ossreviewtoolkit.utils.spdx.SpdxDeclaredLicenseMapping
import org.ossreviewtoolkit.utils.spdx.SpdxExpression
import org.ossreviewtoolkit.utils.spdx.SpdxOperator
import org.ossreviewtoolkit.utils.spdx.toSpdx
import org.ossreviewtoolkit.utils.spdx.toSpdxOrNull

object DeclaredLicenseProcessor {
private val urlPrefixesToRemove = listOf(
Expand Down Expand Up @@ -92,9 +89,9 @@ object DeclaredLicenseProcessor {
?: SpdxDeclaredLicenseMapping.map(strippedLicense)
?: SpdxDeclaredLicenseMapping.map(strippedLicense.unquote())
?: SpdxDeclaredLicenseMapping.map(strippedLicense.removePrefix(SpdxConstants.TAG).trim())
?: parseLicense(strippedLicense)

return mappedLicense?.normalize()?.takeIf { it.isValid() || it.toString() == SpdxConstants.NONE }
val processedLicense = mappedLicense ?: strippedLicense.toSpdxOrNull()
return processedLicense?.normalize()?.takeIf { it.isValid() || it.toString() == SpdxConstants.NONE }
}

/**
Expand Down Expand Up @@ -133,13 +130,6 @@ object DeclaredLicenseProcessor {

return ProcessedDeclaredLicense(spdxExpression, mapped, unmapped)
}

private fun parseLicense(declaredLicense: String) =
runCatching {
declaredLicense.toSpdx()
}.onFailure {
logger.debug { "Could not parse declared license '$declaredLicense': ${it.collectMessages()}" }
}.getOrNull()
}

data class ProcessedDeclaredLicense(
Expand Down
22 changes: 21 additions & 1 deletion utils/spdx/src/main/kotlin/Extensions.kt
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,19 @@
* License-Filename: LICENSE
*/

@file:Suppress("TooManyFunctions")

package org.ossreviewtoolkit.utils.spdx

import java.lang.invoke.MethodHandles

import org.apache.logging.log4j.kotlin.loggerOf

import org.ossreviewtoolkit.utils.common.collectMessages
import org.ossreviewtoolkit.utils.spdx.SpdxExpression.Strictness

private val logger = loggerOf(MethodHandles.lookup().lookupClass())

/**
* Create an [SpdxExpression] by concatenating [this][SpdxLicense] and [other] using [SpdxOperator.AND].
*/
Expand Down Expand Up @@ -79,12 +88,23 @@ fun String.isSpdxExpressionOrNotPresent(strictness: Strictness = Strictness.ALLO
SpdxConstants.isNotPresent(this) || isSpdxExpression(strictness)

/**
* Parses the string as an [SpdxExpression] of the given [strictness] and returns the result on success, or throws an
* Parse this string as an [SpdxExpression] of the given [strictness] and return the result on success, or throw an
* [SpdxException] if the string cannot be parsed.
*/
fun String.toSpdx(strictness: Strictness = Strictness.ALLOW_ANY): SpdxExpression =
SpdxExpression.parse(this, strictness)

/**
* Parse this string as an [SpdxExpression] of the given [strictness] and return the result on success, or null if this
* string cannot be parsed.
*/
fun String.toSpdxOrNull(strictness: Strictness = Strictness.ALLOW_ANY): SpdxExpression? =
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Parses -> Parse
returns -> return
maybe: the string -> this string

runCatching {
toSpdx(strictness)
}.onFailure {
logger.debug { "Could not parse '$this' as an SPDX license: ${it.collectMessages()}" }
}.getOrNull()

/**
* Convert a [String] to an SPDX "idstring" (like license IDs, package IDs, etc.) which may only contain letters,
* numbers, ".", and / or "-". If [allowPlusSuffix] is enabled, a "+" (as used in license IDs) is kept as the suffix.
Expand Down
7 changes: 3 additions & 4 deletions utils/spdx/src/main/kotlin/SpdxExpression.kt
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,9 @@ sealed class SpdxExpression {
}

/**
* Normalize all license IDs using a mapping containing common misspellings of license IDs. If [mapDeprecated] is
* `true`, also deprecated IDs are mapped to their current counterparts. The result of this function is not
* guaranteed to contain only valid IDs. Use [validate] to check the returned [SpdxExpression] for validity
* afterwards.
* Normalize all license IDs using [SpdxSimpleLicenseMapping]. If [mapDeprecated] is `true`, also deprecated IDs are
* mapped to their current counterparts. The result of this function is not guaranteed to contain only valid IDs.
* Use [validate] or [isValid] to check the returned [SpdxExpression] for validity afterwards.
*/
abstract fun normalize(mapDeprecated: Boolean = true): SpdxExpression

Expand Down
5 changes: 2 additions & 3 deletions utils/spdx/src/main/kotlin/SpdxSimpleLicenseMapping.kt
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,8 @@ import com.fasterxml.jackson.module.kotlin.readValue

/**
* A mapping from simple license names to valid SPDX license IDs. This mapping only contains license strings which *can*
* be parsed by [SpdxExpression.parse] but have a corresponding valid SPDX license ID that should be used instead. When
* mapping a name without any indication of a version to an ID with a version, the most commonly used version at the
* time of writing is used. See [SpdxDeclaredLicenseMapping] for a mapping of unparsable license strings.
* be parsed by [SpdxExpression.parse] but have a corresponding valid SPDX license ID that should be used instead. See
* [SpdxDeclaredLicenseMapping] for a mapping of unparsable license strings.
*/
object SpdxSimpleLicenseMapping {
/**
Expand Down
4 changes: 4 additions & 0 deletions utils/spdx/src/main/resources/declared-license-mapping.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
# SPDX-License-Identifier: Apache-2.0
# License-Filename: LICENSE

# A mapping from license strings collected from the declared licenses of Open Source packages to SPDX expressions. This
# mapping only contains license strings which can *not* be parsed by [SpdxExpression.parse], for example because the
# license names contain white spaces. See [SpdxSimpleLicenseMapping] for a mapping of varied license names.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should it be various instead of varied?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, "varied" is correct (and also used in the original text that I copied this from) as it's supposed to mean "variants of license names" and not "several license names".


# Sort the entries below via IntelliJ's "Edit" -> "Sort Lines".
# Map a declared license string to "NONE" in order to discard it.
---
Expand Down
4 changes: 4 additions & 0 deletions utils/spdx/src/main/resources/simple-license-mapping.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
# SPDX-License-Identifier: Apache-2.0
# License-Filename: LICENSE

# A mapping from simple license names to valid SPDX license IDs. This mapping only contains license strings which *can*
# be parsed by [SpdxExpression.parse] but have a corresponding valid SPDX license ID that should be used instead. See
# [SpdxDeclaredLicenseMapping] for a mapping of unparsable license strings.

# Sort the entries below via IntelliJ's "Edit" -> "Sort Lines".
---

Expand Down