|  | 
|  | 1 | +/* | 
|  | 2 | + * Licensed to the Apache Software Foundation (ASF) under one or more | 
|  | 3 | + * contributor license agreements.  See the NOTICE file distributed with | 
|  | 4 | + * this work for additional information regarding copyright ownership. | 
|  | 5 | + * The ASF licenses this file to You under the Apache License, Version 2.0 | 
|  | 6 | + * (the "License"); you may not use this file except in compliance with | 
|  | 7 | + * the License.  You may obtain a copy of the License at | 
|  | 8 | + * | 
|  | 9 | + *    http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 10 | + * | 
|  | 11 | + * Unless required by applicable law or agreed to in writing, software | 
|  | 12 | + * distributed under the License is distributed on an "AS IS" BASIS, | 
|  | 13 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | 14 | + * See the License for the specific language governing permissions and | 
|  | 15 | + * limitations under the License. | 
|  | 16 | + */ | 
|  | 17 | + | 
|  | 18 | +package org.apache.spark.mllib.linalg | 
|  | 19 | + | 
|  | 20 | +import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV} | 
|  | 21 | +import com.github.fommil.netlib.ARPACK | 
|  | 22 | +import org.netlib.util.{intW, doubleW} | 
|  | 23 | + | 
|  | 24 | +import org.apache.spark.annotation.Experimental | 
|  | 25 | + | 
|  | 26 | +/** | 
|  | 27 | + * :: Experimental :: | 
|  | 28 | + * Compute eigen-decomposition. | 
|  | 29 | + */ | 
|  | 30 | +@Experimental | 
|  | 31 | +private[mllib] object EigenValueDecomposition { | 
|  | 32 | +  /** | 
|  | 33 | +   * Compute the leading k eigenvalues and eigenvectors on a symmetric square matrix using ARPACK. | 
|  | 34 | +   * The caller needs to ensure that the input matrix is real symmetric. This function requires | 
|  | 35 | +   * memory for `n*(4*k+4)` doubles. | 
|  | 36 | +   * | 
|  | 37 | +   * @param mul a function that multiplies the symmetric matrix with a DenseVector. | 
|  | 38 | +   * @param n dimension of the square matrix (maximum Int.MaxValue). | 
|  | 39 | +   * @param k number of leading eigenvalues required, 0 < k < n. | 
|  | 40 | +   * @param tol tolerance of the eigs computation. | 
|  | 41 | +   * @param maxIterations the maximum number of Arnoldi update iterations. | 
|  | 42 | +   * @return a dense vector of eigenvalues in descending order and a dense matrix of eigenvectors | 
|  | 43 | +   *         (columns of the matrix). | 
|  | 44 | +   * @note The number of computed eigenvalues might be smaller than k when some Ritz values do not | 
|  | 45 | +   *       satisfy the convergence criterion specified by tol (see ARPACK Users Guide, Chapter 4.6 | 
|  | 46 | +   *       for more details). The maximum number of Arnoldi update iterations is set to 300 in this | 
|  | 47 | +   *       function. | 
|  | 48 | +   */ | 
|  | 49 | +  private[mllib] def symmetricEigs( | 
|  | 50 | +      mul: BDV[Double] => BDV[Double], | 
|  | 51 | +      n: Int, | 
|  | 52 | +      k: Int, | 
|  | 53 | +      tol: Double, | 
|  | 54 | +      maxIterations: Int): (BDV[Double], BDM[Double]) = { | 
|  | 55 | +    // TODO: remove this function and use eigs in breeze when switching breeze version | 
|  | 56 | +    require(n > k, s"Number of required eigenvalues $k must be smaller than matrix dimension $n") | 
|  | 57 | + | 
|  | 58 | +    val arpack = ARPACK.getInstance() | 
|  | 59 | + | 
|  | 60 | +    // tolerance used in stopping criterion | 
|  | 61 | +    val tolW = new doubleW(tol) | 
|  | 62 | +    // number of desired eigenvalues, 0 < nev < n | 
|  | 63 | +    val nev = new intW(k) | 
|  | 64 | +    // nev Lanczos vectors are generated in the first iteration | 
|  | 65 | +    // ncv-nev Lanczos vectors are generated in each subsequent iteration | 
|  | 66 | +    // ncv must be smaller than n | 
|  | 67 | +    val ncv = math.min(2 * k, n) | 
|  | 68 | + | 
|  | 69 | +    // "I" for standard eigenvalue problem, "G" for generalized eigenvalue problem | 
|  | 70 | +    val bmat = "I" | 
|  | 71 | +    // "LM" : compute the NEV largest (in magnitude) eigenvalues | 
|  | 72 | +    val which = "LM" | 
|  | 73 | + | 
|  | 74 | +    var iparam = new Array[Int](11) | 
|  | 75 | +    // use exact shift in each iteration | 
|  | 76 | +    iparam(0) = 1 | 
|  | 77 | +    // maximum number of Arnoldi update iterations, or the actual number of iterations on output | 
|  | 78 | +    iparam(2) = maxIterations | 
|  | 79 | +    // Mode 1: A*x = lambda*x, A symmetric | 
|  | 80 | +    iparam(6) = 1 | 
|  | 81 | + | 
|  | 82 | +    var ido = new intW(0) | 
|  | 83 | +    var info = new intW(0) | 
|  | 84 | +    var resid = new Array[Double](n) | 
|  | 85 | +    var v = new Array[Double](n * ncv) | 
|  | 86 | +    var workd = new Array[Double](n * 3) | 
|  | 87 | +    var workl = new Array[Double](ncv * (ncv + 8)) | 
|  | 88 | +    var ipntr = new Array[Int](11) | 
|  | 89 | + | 
|  | 90 | +    // call ARPACK's reverse communication, first iteration with ido = 0 | 
|  | 91 | +    arpack.dsaupd(ido, bmat, n, which, nev.`val`, tolW, resid, ncv, v, n, iparam, ipntr, workd, | 
|  | 92 | +      workl, workl.length, info) | 
|  | 93 | + | 
|  | 94 | +    val w = BDV(workd) | 
|  | 95 | + | 
|  | 96 | +    // ido = 99 : done flag in reverse communication | 
|  | 97 | +    while (ido.`val` != 99) { | 
|  | 98 | +      if (ido.`val` != -1 && ido.`val` != 1) { | 
|  | 99 | +        throw new IllegalStateException("ARPACK returns ido = " + ido.`val` + | 
|  | 100 | +            " This flag is not compatible with Mode 1: A*x = lambda*x, A symmetric.") | 
|  | 101 | +      } | 
|  | 102 | +      // multiply working vector with the matrix | 
|  | 103 | +      val inputOffset = ipntr(0) - 1 | 
|  | 104 | +      val outputOffset = ipntr(1) - 1 | 
|  | 105 | +      val x = w.slice(inputOffset, inputOffset + n) | 
|  | 106 | +      val y = w.slice(outputOffset, outputOffset + n) | 
|  | 107 | +      y := mul(x) | 
|  | 108 | +      // call ARPACK's reverse communication | 
|  | 109 | +      arpack.dsaupd(ido, bmat, n, which, nev.`val`, tolW, resid, ncv, v, n, iparam, ipntr, | 
|  | 110 | +        workd, workl, workl.length, info) | 
|  | 111 | +    } | 
|  | 112 | + | 
|  | 113 | +    if (info.`val` != 0) { | 
|  | 114 | +      info.`val` match { | 
|  | 115 | +        case 1 => throw new IllegalStateException("ARPACK returns non-zero info = " + info.`val` + | 
|  | 116 | +            " Maximum number of iterations taken. (Refer ARPACK user guide for details)") | 
|  | 117 | +        case 2 => throw new IllegalStateException("ARPACK returns non-zero info = " + info.`val` + | 
|  | 118 | +            " No shifts could be applied. Try to increase NCV. " + | 
|  | 119 | +            "(Refer ARPACK user guide for details)") | 
|  | 120 | +        case _ => throw new IllegalStateException("ARPACK returns non-zero info = " + info.`val` + | 
|  | 121 | +            " Please refer ARPACK user guide for error message.") | 
|  | 122 | +      } | 
|  | 123 | +    } | 
|  | 124 | + | 
|  | 125 | +    val d = new Array[Double](nev.`val`) | 
|  | 126 | +    val select = new Array[Boolean](ncv) | 
|  | 127 | +    // copy the Ritz vectors | 
|  | 128 | +    val z = java.util.Arrays.copyOfRange(v, 0, nev.`val` * n) | 
|  | 129 | + | 
|  | 130 | +    // call ARPACK's post-processing for eigenvectors | 
|  | 131 | +    arpack.dseupd(true, "A", select, d, z, n, 0.0, bmat, n, which, nev, tol, resid, ncv, v, n, | 
|  | 132 | +      iparam, ipntr, workd, workl, workl.length, info) | 
|  | 133 | + | 
|  | 134 | +    // number of computed eigenvalues, might be smaller than k | 
|  | 135 | +    val computed = iparam(4) | 
|  | 136 | + | 
|  | 137 | +    val eigenPairs = java.util.Arrays.copyOfRange(d, 0, computed).zipWithIndex.map { r => | 
|  | 138 | +      (r._1, java.util.Arrays.copyOfRange(z, r._2 * n, r._2 * n + n)) | 
|  | 139 | +    } | 
|  | 140 | + | 
|  | 141 | +    // sort the eigen-pairs in descending order | 
|  | 142 | +    val sortedEigenPairs = eigenPairs.sortBy(- _._1) | 
|  | 143 | + | 
|  | 144 | +    // copy eigenvectors in descending order of eigenvalues | 
|  | 145 | +    val sortedU = BDM.zeros[Double](n, computed) | 
|  | 146 | +    sortedEigenPairs.zipWithIndex.foreach { r => | 
|  | 147 | +      val b = r._2 * n | 
|  | 148 | +      var i = 0 | 
|  | 149 | +      while (i < n) { | 
|  | 150 | +        sortedU.data(b + i) = r._1._2(i) | 
|  | 151 | +        i += 1 | 
|  | 152 | +      } | 
|  | 153 | +    } | 
|  | 154 | + | 
|  | 155 | +    (BDV[Double](sortedEigenPairs.map(_._1)), sortedU) | 
|  | 156 | +  } | 
|  | 157 | +} | 
0 commit comments