diff --git a/build.sbt b/build.sbt index 3ba60f1..ecb1f7f 100644 --- a/build.sbt +++ b/build.sbt @@ -7,9 +7,9 @@ organization := "org.monarchinitiative" name := "dosdp-tools" -version := "0.19.3" +version := "0.19.4" -scalaVersion := "2.13.8" +scalaVersion := "2.13.12" scalacOptions := Seq("-unchecked", "-deprecation", "-encoding", "utf8") diff --git a/project/build.properties b/project/build.properties index c8fcab5..04267b1 100644 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1 @@ -sbt.version=1.6.2 +sbt.version=1.9.9 diff --git a/src/main/scala/org/monarchinitiative/dosdp/ExpandedDOSDP.scala b/src/main/scala/org/monarchinitiative/dosdp/ExpandedDOSDP.scala index 9c3686f..5a7f1e1 100644 --- a/src/main/scala/org/monarchinitiative/dosdp/ExpandedDOSDP.scala +++ b/src/main/scala/org/monarchinitiative/dosdp/ExpandedDOSDP.scala @@ -132,7 +132,7 @@ final case class ExpandedDOSDP(dosdp: DOSDP, prefixes: PartialFunction[String, S ZIO.collectAll(owlAnnotations).map(_.to(Set).flatten) } - def filledAnnotationAxioms(annotationBindings: Option[Bindings], logicalBindings: Option[Bindings]): ZIO[Logging, DOSDPError, Set[OWLAnnotationAssertionAxiom]] = { + def filledAnnotationAxioms(annotationBindings: Option[Bindings], logicalBindings: Option[Bindings], synonymIndex: Map[IRI, Map[IRI, Set[String]]] = Map.empty): ZIO[Logging, DOSDPError, Set[OWLAnnotationAssertionAxiom]] = { val definedTerm = (for { actualBindings <- annotationBindings SingleValue(value) <- actualBindings.get(DOSDP.DefinedClassVariable) @@ -145,7 +145,7 @@ final case class ExpandedDOSDP(dosdp: DOSDP, prefixes: PartialFunction[String, S } yield { for { normalizedAnnotationField <- allNormalizedAnns - annotation <- translateAnnotations(normalizedAnnotationField, annotationBindings, logicalBindings) + annotation <- translateAnnotations(normalizedAnnotationField, annotationBindings, logicalBindings, synonymIndex) } yield AnnotationAssertion(annotation.getAnnotations.asScala.toSet, annotation.getProperty, definedTerm, annotation.getValue) } } @@ -171,7 +171,7 @@ final case class ExpandedDOSDP(dosdp: DOSDP, prefixes: PartialFunction[String, S }.map(_.flatten.to(Set)) } - private def translateAnnotations(annotationField: NormalizedAnnotation, annotationBindings: Option[Bindings], logicalBindings: Option[Bindings]): Set[OWLAnnotation] = annotationField match { + private def translateAnnotations(annotationField: NormalizedAnnotation, annotationBindings: Option[Bindings], logicalBindings: Option[Bindings], synonymIndex: Map[IRI, Map[IRI, Set[String]]] = Map.empty): Set[OWLAnnotation] = annotationField match { case NormalizedPrintfAnnotation(prop, text, vars, multiClause, overrideColumnOpt, subAnnotations) => val valueOpts = (for { column <- overrideColumnOpt @@ -179,14 +179,14 @@ final case class ExpandedDOSDP(dosdp: DOSDP, prefixes: PartialFunction[String, S SingleValue(binding) <- bindings.get(column) trimmed = binding.trim if trimmed.nonEmpty - } yield Seq(trimmed)).orElse(Some(printAnnotation(text, vars, multiClause, annotationBindings))) - valueOpts.getOrElse(Seq.empty).toSet[String].map(value => Annotation(subAnnotations.flatMap(translateAnnotations(_, annotationBindings, logicalBindings)), prop, value)) + } yield Seq(trimmed)).orElse(Some(printAnnotation(text, vars, multiClause, annotationBindings, logicalBindings, synonymIndex))) + valueOpts.getOrElse(Seq.empty).toSet[String].map(value => Annotation(subAnnotations.flatMap(translateAnnotations(_, annotationBindings, logicalBindings, synonymIndex)), prop, value)) case NormalizedListAnnotation(prop, value, subAnnotations) => // If no variable bindings are passed in, dummy value is filled in using variable name val multiValBindingsOpt = annotationBindings.map(multiValueBindings) val bindingsMap = multiValBindingsOpt.getOrElse(Map(value -> MultiValue(Set("'$" + value + "'")))) val listValueOpt = bindingsMap.get(value) - listValueOpt.toSet[MultiValue].flatMap(listValue => listValue.value.map(v => Annotation(subAnnotations.flatMap(translateAnnotations(_, annotationBindings, logicalBindings)), prop, v))) + listValueOpt.toSet[MultiValue].flatMap(listValue => listValue.value.map(v => Annotation(subAnnotations.flatMap(translateAnnotations(_, annotationBindings, logicalBindings, synonymIndex)), prop, v))) case NormalizedIRIValueAnnotation(prop, varr, subAnnotations) => val maybeIRIValue = logicalBindings.map { actualBindings => for { @@ -195,7 +195,7 @@ final case class ExpandedDOSDP(dosdp: DOSDP, prefixes: PartialFunction[String, S } yield iri }.getOrElse(Some(DOSDP.variableToIRI(varr))) maybeIRIValue.toSet[IRI].map(iriValue => Annotation( - subAnnotations.flatMap(translateAnnotations(_, annotationBindings, logicalBindings)), + subAnnotations.flatMap(translateAnnotations(_, annotationBindings, logicalBindings, synonymIndex)), prop, iriValue)) } @@ -208,9 +208,11 @@ final case class ExpandedDOSDP(dosdp: DOSDP, prefixes: PartialFunction[String, S * @param vars annotation variables * @param multiClause annotation multiClauses * @param annotationBindings variable bindings + * @param logicalBindings logical bindings to resolve IRIs + * @param synonymIndex index of synonyms for generating permutations * @return a sequence of printed and replaced annotation texts */ - def printAnnotation(text: Option[String], vars: Option[List[String]], multiClause: Option[MultiClausePrintf], annotationBindings: Option[Bindings]): Seq[String] = { + def printAnnotation(text: Option[String], vars: Option[List[String]], multiClause: Option[MultiClausePrintf], annotationBindings: Option[Bindings], logicalBindings: Option[Bindings] = None, synonymIndex: Map[IRI, Map[IRI, Set[String]]] = Map.empty): Seq[String] = { val clauseVars = for { mc <- multiClause.toList clauses <- mc.clauses.toList @@ -221,15 +223,91 @@ final case class ExpandedDOSDP(dosdp: DOSDP, prefixes: PartialFunction[String, S val annotationRelatedMultiValueBindings = annotationBindings.getOrElse(Map.empty[String, Binding]) .view.filterKeys(variables.contains(_)).collectFirst { case (key, MultiValue(value)) => (key, value) } val singleValBindings = annotationBindings.getOrElse(Map.empty[String, Binding]).collect { case (key, SingleValue(value)) => (key, SingleValue(value)) } - annotationRelatedMultiValueBindings match { - case None => - PrintfText.replaced(text, vars, multiClause, annotationBindings.map(singleValueBindings), quote = false).toSeq - case Some(multiValuePair) => - val multiValueText = for { - value <- multiValuePair._2 - multiText <- PrintfText.replaced(None, None, multiClause, Some(singleValBindings + (multiValuePair._1 -> SingleValue(value))), quote = false) - } yield multiText - multiValueText.toSeq + + // If synonym permutations are enabled, generate all permutations + if (synonymIndex.nonEmpty && logicalBindings.isDefined) { + generateSynonymPermutations(text, vars, multiClause, singleValBindings, annotationRelatedMultiValueBindings, logicalBindings.get, synonymIndex) + } else { + // Original behavior + annotationRelatedMultiValueBindings match { + case None => + PrintfText.replaced(text, vars, multiClause, annotationBindings.map(singleValueBindings), quote = false).toSeq + case Some(multiValuePair) => + val multiValueText = for { + value <- multiValuePair._2 + multiText <- PrintfText.replaced(None, None, multiClause, Some(singleValBindings + (multiValuePair._1 -> SingleValue(value))), quote = false) + } yield multiText + multiValueText.toSeq + } + } + } + + private def generateSynonymPermutations(text: Option[String], vars: Option[List[String]], multiClause: Option[MultiClausePrintf], singleValBindings: Map[String, SingleValue], annotationRelatedMultiValueBindings: Option[(String, Set[String])], logicalBindings: Map[String, Binding], synonymIndex: Map[IRI, Map[IRI, Set[String]]]): Seq[String] = { + // Collect all synonym alternatives for each variable + val varSynonymAlternatives: Map[String, Seq[String]] = singleValBindings.flatMap { case (varName, SingleValue(value)) => + // Try to get the IRI for this binding value from logical bindings + val iriOpt = for { + logicalBinding <- logicalBindings.get(varName) + iriString <- logicalBinding match { + case SingleValue(v) => Some(v) + case _ => None + } + iri <- Prefixes.idToIRI(iriString, prefixes) + } yield iri + + // Collect all synonyms for this IRI + iriOpt.map { iri => + val allValues = synonymIndex.values.flatMap(_.get(iri)).flatten.toSeq + // Include the original label value first, then all synonyms + varName -> (value +: allValues.filterNot(_ == value)) + } + } + + // If no variables have synonyms, return original behavior + if (varSynonymAlternatives.isEmpty) { + annotationRelatedMultiValueBindings match { + case None => + PrintfText.replaced(text, vars, multiClause, Some(singleValBindings), quote = false).toSeq + case Some(multiValuePair) => + val multiValueText = for { + value <- multiValuePair._2 + multiText <- PrintfText.replaced(None, None, multiClause, Some(singleValBindings + (multiValuePair._1 -> SingleValue(value))), quote = false) + } yield multiText + multiValueText.toSeq + } + } else { + // Generate cartesian product of all synonym combinations + val varsWithSynonyms = varSynonymAlternatives.keys.toSeq + val synonymLists = varsWithSynonyms.map(varSynonymAlternatives) + + def cartesianProduct[T](lists: Seq[Seq[T]]): Seq[Seq[T]] = lists match { + case Nil => Seq(Seq.empty) + case head :: tail => + for { + item <- head + rest <- cartesianProduct(tail) + } yield item +: rest + } + + val allCombinations = cartesianProduct(synonymLists) + + // Generate text for each combination + allCombinations.flatMap { combination => + val bindingsForThisCombination = varsWithSynonyms.zip(combination).map { + case (varName, synonymValue) => varName -> SingleValue(synonymValue) + }.toMap + val updatedBindings = singleValBindings ++ bindingsForThisCombination + + annotationRelatedMultiValueBindings match { + case None => + PrintfText.replaced(text, vars, multiClause, Some(updatedBindings), quote = false).toSeq + case Some(multiValuePair) => + for { + value <- multiValuePair._2.toSeq + multiText <- PrintfText.replaced(None, None, multiClause, Some(updatedBindings + (multiValuePair._1 -> SingleValue(value))), quote = false) + } yield multiText + } + } } } diff --git a/src/main/scala/org/monarchinitiative/dosdp/cli/Config.scala b/src/main/scala/org/monarchinitiative/dosdp/cli/Config.scala index dc17f2a..d13028d 100644 --- a/src/main/scala/org/monarchinitiative/dosdp/cli/Config.scala +++ b/src/main/scala/org/monarchinitiative/dosdp/cli/Config.scala @@ -114,7 +114,10 @@ final case class GenerateConfig(@Recurse addAxiomSourceAnnotation: BoolValue = FalseValue, @HelpMessage("IRI for annotation property to use to link generated axioms to pattern IRI") @ValueDescription("IRI") - axiomSourceAnnotationProperty: String = "http://www.geneontology.org/formats/oboInOwl#source" + axiomSourceAnnotationProperty: String = "http://www.geneontology.org/formats/oboInOwl#source", + @HelpMessage("Generate synonym permutations from filler synonyms in addition to labels") + @ValueDescription("true|false") + synonymPermutations: BoolValue = FalseValue ) extends Config { override def run: ZIO[ZEnv with Logging, DOSDPError, Unit] = Generate.run(this) diff --git a/src/main/scala/org/monarchinitiative/dosdp/cli/Docs.scala b/src/main/scala/org/monarchinitiative/dosdp/cli/Docs.scala index 49ad905..477a187 100644 --- a/src/main/scala/org/monarchinitiative/dosdp/cli/Docs.scala +++ b/src/main/scala/org/monarchinitiative/dosdp/cli/Docs.scala @@ -58,10 +58,11 @@ object Docs { variableReadableIdentifiers = (dosdp.vars.getOrElse(Map.empty).map { case (k, _) => k -> s"http://dosdp.org/filler/$k" } ++ dosdp.list_vars.getOrElse(Map.empty).map { case (k, _) => k -> s"http://dosdp.org/filler/$k" }).map(e => IRI.create(e._2) -> s"`{${e._1}}`") renderer = objectRenderer(ontology, variableReadableIdentifiers) - axioms <- Generate.renderPattern(dosdp, prefixes, fillers, Some(ontology), true, true, None, false, OboInOwlSource, false, Map(RDFSLabel.getIRI -> variableReadableIdentifiers)) + extraReadableIds = Map(RDFSLabel.getIRI -> variableReadableIdentifiers.toMap) + axioms <- Generate.renderPattern(dosdp, prefixes, fillers, Some(ontology), true, true, None, false, OboInOwlSource, false, false, extraReadableIds) patternIRI = IRI.create(iri) docAxioms = findDocAxioms(patternIRI, axioms, target, config.dataLocationPrefix) - data = columns.to(List) :: rows.take(5).map(formatDataRow(_, columns.to(List), prefixes)) ::: Nil + data = columns.to(List) :: rows.take(5).map(formatDataRow(_, columns.to(List), prefixes)) markdown <- DocsMarkdown.markdown(eDOSDP, docAxioms, renderer, data) _ <- effectBlockingIO(new PrintWriter(target.outputFile, "utf-8")).bracketAuto { writer => effectBlockingIO(writer.print(markdown)) diff --git a/src/main/scala/org/monarchinitiative/dosdp/cli/Generate.scala b/src/main/scala/org/monarchinitiative/dosdp/cli/Generate.scala index d039df0..dc15b71 100644 --- a/src/main/scala/org/monarchinitiative/dosdp/cli/Generate.scala +++ b/src/main/scala/org/monarchinitiative/dosdp/cli/Generate.scala @@ -42,7 +42,7 @@ object Generate { (columns, fillers) = columnsAndFillers missingColumns = dosdp.allVars.diff(columns.to(Set)) _ <- ZIO.foreach_(missingColumns)(c => log.warn(s"Input ${target.inputFile} for pattern ${target.templateFile} is missing column for pattern variable <$c>")) - axioms <- renderPattern(dosdp, prefixes, fillers, ontologyOpt, outputLogicalAxioms, outputAnnotationAxioms, config.restrictAxiomsColumn, config.addAxiomSourceAnnotation.bool, axiomSourceProperty, config.generateDefinedClass.bool, Map.empty) + axioms <- renderPattern(dosdp, prefixes, fillers, ontologyOpt, outputLogicalAxioms, outputAnnotationAxioms, config.restrictAxiomsColumn, config.addAxiomSourceAnnotation.bool, axiomSourceProperty, config.generateDefinedClass.bool, config.synonymPermutations.bool, Map.empty) _ <- Utilities.saveAxiomsToOntology(axioms, target.outputFile) } yield () } @@ -50,17 +50,29 @@ object Generate { } yield () } - def renderPattern(dosdp: DOSDP, prefixes: PartialFunction[String, String], fillers: Map[String, String], ontOpt: Option[OWLOntology], outputLogicalAxioms: Boolean, outputAnnotationAxioms: Boolean, restrictAxiomsColumnName: Option[String], annotateAxiomSource: Boolean, axiomSourceProperty: OWLAnnotationProperty, generateDefinedClass: Boolean, extraReadableIdentifiers: Map[IRI, Map[IRI, String]]): ZIO[Logging, DOSDPError, Set[OWLAxiom]] = - renderPattern(dosdp, prefixes, List(fillers), ontOpt, outputLogicalAxioms, outputAnnotationAxioms, restrictAxiomsColumnName, annotateAxiomSource, axiomSourceProperty, generateDefinedClass, extraReadableIdentifiers) + def renderPattern(dosdp: DOSDP, prefixes: PartialFunction[String, String], fillers: Map[String, String], ontOpt: Option[OWLOntology], outputLogicalAxioms: Boolean, outputAnnotationAxioms: Boolean, restrictAxiomsColumnName: Option[String], annotateAxiomSource: Boolean, axiomSourceProperty: OWLAnnotationProperty, generateDefinedClass: Boolean, synonymPermutations: Boolean, extraReadableIdentifiers: Map[IRI, Map[IRI, String]]): ZIO[Logging, DOSDPError, Set[OWLAxiom]] = + renderPattern(dosdp, prefixes, List(fillers), ontOpt, outputLogicalAxioms, outputAnnotationAxioms, restrictAxiomsColumnName, annotateAxiomSource, axiomSourceProperty, generateDefinedClass, synonymPermutations, extraReadableIdentifiers) - def renderPattern(dosdp: DOSDP, prefixes: PartialFunction[String, String], fillers: List[Map[String, String]], ontOpt: Option[OWLOntology], outputLogicalAxioms: Boolean, outputAnnotationAxioms: Boolean, restrictAxiomsColumnName: Option[String], annotateAxiomSource: Boolean, axiomSourceProperty: OWLAnnotationProperty, generateDefinedClass: Boolean, extraReadableIdentifiers: Map[IRI, Map[IRI, String]]): ZIO[Logging, DOSDPError, Set[OWLAxiom]] = { + def renderPattern(dosdp: DOSDP, prefixes: PartialFunction[String, String], fillers: List[Map[String, String]], ontOpt: Option[OWLOntology], outputLogicalAxioms: Boolean, outputAnnotationAxioms: Boolean, restrictAxiomsColumnName: Option[String], annotateAxiomSource: Boolean, axiomSourceProperty: OWLAnnotationProperty, generateDefinedClass: Boolean, synonymPermutations: Boolean, extraReadableIdentifiers: Map[IRI, Map[IRI, String]]): ZIO[Logging, DOSDPError, Set[OWLAxiom]] = { val eDOSDP = ExpandedDOSDP(dosdp, prefixes) val knownColumns = dosdp.allVars for { readableIdentifiers <- eDOSDP.readableIdentifierProperties - initialReadableIDIndex = ontOpt.map(ont => createReadableIdentifierIndex(readableIdentifiers, eDOSDP, ont)).getOrElse(Map.empty) + synonymProperties = if (synonymPermutations) List( + AnnotationProperty("http://www.geneontology.org/formats/oboInOwl#hasExactSynonym"), + AnnotationProperty("http://www.geneontology.org/formats/oboInOwl#hasNarrowSynonym"), + AnnotationProperty("http://www.geneontology.org/formats/oboInOwl#hasBroadSynonym"), + AnnotationProperty("http://www.geneontology.org/formats/oboInOwl#hasRelatedSynonym") + ) else List.empty + allIdentifiers = readableIdentifiers ++ synonymProperties + initialReadableIDIndex = ontOpt.map(ont => createReadableIdentifierIndex(allIdentifiers, eDOSDP, ont)).getOrElse(Map.empty) extraReadableIdentifiersInSets = extraReadableIdentifiers.map { case (p, termsToLabel) => p -> termsToLabel.map { case (t, label) => t -> Set(label) } } - readableIDIndex = (initialReadableIDIndex |+| extraReadableIdentifiersInSets).map { case (p, termsToLabels) => p -> termsToLabels.map { case (t, labels) => t -> labels.toSeq.min } } + // For labels, take min. For synonyms, keep all values + readableIDIndex = (initialReadableIDIndex |+| extraReadableIdentifiersInSets).map { case (p, termsToLabels) => + p -> termsToLabels.map { case (t, labels) => t -> labels.toSeq.min } + } + // Keep full synonym index for permutation generation + synonymIndex: Map[IRI, Map[IRI, Set[String]]] = if (synonymPermutations) initialReadableIDIndex else Map.empty generatedAxioms <- ZIO.foreach(fillers) { row => val (varBindingsItems, localLabelItems) = (for { vars <- dosdp.vars.toSeq @@ -127,12 +139,12 @@ object Generate { eDOSDP.filledLogicalAxioms(Some(logicalBindingsExtended), Some(annotationBindings)) else ZIO.succeed(Set.empty) annotationAxioms <- if (localOutputAnnotationAxioms) - eDOSDP.filledAnnotationAxioms(Some(annotationBindings), Some(logicalBindingsExtended)) + eDOSDP.filledAnnotationAxioms(Some(annotationBindings), Some(logicalBindingsExtended), synonymIndex) else ZIO.succeed(Set.empty) } yield logicalAxioms ++ annotationAxioms maybeAxioms } - allAxioms = generatedAxioms.to(Set).flatten + allAxioms: Set[OWLAxiom] = generatedAxioms.to(Set).flatten res <- if (annotateAxiomSource) { ZIO.fromOption { dosdp.pattern_iri.map(IRI.create).map { patternIRI => diff --git a/src/main/scala/org/monarchinitiative/dosdp/cli/Prototype.scala b/src/main/scala/org/monarchinitiative/dosdp/cli/Prototype.scala index f2d76d9..f321be7 100644 --- a/src/main/scala/org/monarchinitiative/dosdp/cli/Prototype.scala +++ b/src/main/scala/org/monarchinitiative/dosdp/cli/Prototype.scala @@ -46,7 +46,7 @@ object Prototype { dosdp.data_vars.getOrElse(Map.empty) ++ dosdp.data_list_vars.getOrElse(Map.empty) + (DOSDP.DefinedClassVariable -> iri) - axioms <- Generate.renderPattern(dosdp, prefixes, fillers, ontologyOpt, true, true, None, false, OboInOwlSource, false, Map.empty) + axioms <- Generate.renderPattern(dosdp, prefixes, fillers, ontologyOpt, true, true, None, false, OboInOwlSource, false, false, Map.empty) maybeTitleAxiom = dosdp.pattern_name.map(name => Class(iri) Annotation(DCTTitle, name)) } yield axioms ++ maybeTitleAxiom }