12 lat temu · b22d063075
--- a/internal/ceres/compressed_col_sparse_matrix_utils.h
+++ b/internal/ceres/compressed_col_sparse_matrix_utils.h
@@ -61,6 +61,81 @@ void BlockOrderingToScalarOrdering(const vector<int>& blocks,
 
				                                    const vector<int>& block_ordering,
			
 
				                                    vector<int>* scalar_ordering);
			
 
				 
			
 
				+// Solve the linear system
			
 
				+//
			
 
				+//   R * solution = rhs
			
 
				+//
			
 
				+// Where R is an upper triangular compressed column sparse matrix.
			
 
				+template <typename IntegerType>
			
 
				+void SolveUpperTriangularInPlace(IntegerType num_cols,
			
 
				+                                 const IntegerType* rows,
			
 
				+                                 const IntegerType* cols,
			
 
				+                                 const double* values,
			
 
				+                                 double* rhs_and_solution) {
			
 
				+  for (IntegerType c = num_cols - 1; c >= 0; --c) {
			
 
				+    rhs_and_solution[c] /= values[cols[c + 1] - 1];
			
 
				+    for (IntegerType idx = cols[c]; idx < cols[c + 1] - 1; ++idx) {
			
 
				+      const IntegerType r = rows[idx];
			
 
				+      const double v = values[idx];
			
 
				+      rhs_and_solution[r] -= v * rhs_and_solution[c];
			
 
				+    }
			
 
				+  }
			
 
				+};
			
 
				+
			
 
				+// Solve the linear system
			
 
				+//
			
 
				+//   R' * solution = rhs
			
 
				+//
			
 
				+// Where R is an upper triangular compressed column sparse matrix.
			
 
				+template <typename IntegerType>
			
 
				+void SolveUpperTriangularTransposeInPlace(IntegerType num_cols,
			
 
				+                                          const IntegerType* rows,
			
 
				+                                          const IntegerType* cols,
			
 
				+                                          const double* values,
			
 
				+                                          double* rhs_and_solution) {
			
 
				+  for (IntegerType c = 0; c < num_cols; ++c) {
			
 
				+    for (IntegerType idx = cols[c]; idx < cols[c + 1] - 1; ++idx) {
			
 
				+      const IntegerType r = rows[idx];
			
 
				+      const double v = values[idx];
			
 
				+      rhs_and_solution[c] -= v * rhs_and_solution[r];
			
 
				+    }
			
 
				+    rhs_and_solution[c] =  rhs_and_solution[c] / values[cols[c + 1] - 1];
			
 
				+  };
			
 
				+};
			
 
				+
			
 
				+// Given a upper triangular matrix R in compressed column form, solve
			
 
				+// the linear system,
			
 
				+//
			
 
				+//  R'R x = b
			
 
				+//
			
 
				+// Where b is all zeros except for rhs_nonzero_index, where it is
			
 
				+// equal to one.
			
 
				+//
			
 
				+// The function exploits this knowledge to reduce the number of
			
 
				+// floating point operations.
			
 
				+template <typename IntegerType>
			
 
				+void SolveRTRWithSparseRHS(IntegerType num_cols,
			
 
				+                           const IntegerType* rows,
			
 
				+                           const IntegerType* cols,
			
 
				+                           const double* values,
			
 
				+                           const int rhs_nonzero_index,
			
 
				+                           double* solution) {
			
 
				+  fill(solution, solution + num_cols, 0.0);
			
 
				+  solution[rhs_nonzero_index] = 1.0 / values[cols[rhs_nonzero_index + 1] - 1];
			
 
				+
			
 
				+  for (IntegerType c = rhs_nonzero_index + 1; c < num_cols; ++c) {
			
 
				+    for (IntegerType idx = cols[c]; idx < cols[c + 1] - 1; ++idx) {
			
 
				+      const IntegerType r = rows[idx];
			
 
				+      if (r < rhs_nonzero_index) continue;
			
 
				+      const double v = values[idx];
			
 
				+      solution[c] -= v * solution[r];
			
 
				+    }
			
 
				+    solution[c] =  solution[c] / values[cols[c + 1] - 1];
			
 
				+  };
			
 
				+  SolveUpperTriangularInPlace(num_cols, rows, cols, values, solution);
			
 
				+};
			
 
				+
			
 
				+
			
 
				 }  // namespace internal
			
 
				 }  // namespace ceres
			
 
				 
			
--- a/internal/ceres/compressed_col_sparse_matrix_utils_test.cc
+++ b/internal/ceres/compressed_col_sparse_matrix_utils_test.cc
@@ -193,5 +193,92 @@ TEST(_, ScalarMatrixToBlockMatrix) {
 
				   ss.Free(ccsm.release());
			
 
				 }
			
 
				 
			
 
				+class SolveUpperTriangularTest : public ::testing::Test {
			
 
				+ protected:
			
 
				+  void SetUp() {
			
 
				+    cols.resize(5);
			
 
				+    rows.resize(7);
			
 
				+    values.resize(7);
			
 
				+
			
 
				+    cols[0] = 0;
			
 
				+    rows[0] = 0;
			
 
				+    values[0] = 0.50754;
			
 
				+
			
 
				+    cols[1] = 1;
			
 
				+    rows[1] = 1;
			
 
				+    values[1] = 0.80483;
			
 
				+
			
 
				+    cols[2] = 2;
			
 
				+    rows[2] = 1;
			
 
				+    values[2] = 0.14120;
			
 
				+    rows[3] = 2;
			
 
				+    values[3] = 0.3;
			
 
				+
			
 
				+    cols[3] = 4;
			
 
				+    rows[4] = 0;
			
 
				+    values[4] = 0.77696;
			
 
				+    rows[5] = 1;
			
 
				+    values[5] = 0.41860;
			
 
				+    rows[6] = 3;
			
 
				+    values[6] = 0.88979;
			
 
				+
			
 
				+    cols[4] = 7;
			
 
				+  }
			
 
				+
			
 
				+  vector<int> cols;
			
 
				+  vector<int> rows;
			
 
				+  vector<double> values;
			
 
				+};
			
 
				+
			
 
				+TEST_F(SolveUpperTriangularTest, SolveInPlace) {
			
 
				+  double rhs_and_solution[] = {1.0, 1.0, 2.0, 2.0};
			
 
				+  const double expected[] = { -1.4706, -1.0962, 6.6667, 2.2477};
			
 
				+
			
 
				+  SolveUpperTriangularInPlace<int>(cols.size() - 1,
			
 
				+                                   &rows[0],
			
 
				+                                   &cols[0],
			
 
				+                                   &values[0],
			
 
				+                                   rhs_and_solution);
			
 
				+
			
 
				+  for (int i = 0; i < 4; ++i) {
			
 
				+    EXPECT_NEAR(rhs_and_solution[i], expected[i], 1e-4) << i;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+TEST_F(SolveUpperTriangularTest, TransposeSolveInPlace) {
			
 
				+  double rhs_and_solution[] = {1.0, 1.0, 2.0, 2.0};
			
 
				+  double expected[] = {1.970288,  1.242498,  6.081864, -0.057255};
			
 
				+
			
 
				+  SolveUpperTriangularTransposeInPlace<int>(cols.size() - 1,
			
 
				+                                            &rows[0],
			
 
				+                                            &cols[0],
			
 
				+                                            &values[0],
			
 
				+                                            rhs_and_solution);
			
 
				+
			
 
				+  for (int i = 0; i < 4; ++i) {
			
 
				+    EXPECT_NEAR(rhs_and_solution[i], expected[i], 1e-4) << i;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+TEST_F(SolveUpperTriangularTest, RTRSolveWithSparseRHS) {
			
 
				+  double solution[4];
			
 
				+  double expected[] = { 6.8420e+00,   1.0057e+00,  -1.4907e-16,  -1.9335e+00,
			
 
				+                        1.0057e+00,   2.2275e+00,  -1.9493e+00,  -6.5693e-01,
			
 
				+                       -1.4907e-16,  -1.9493e+00,   1.1111e+01,   9.7381e-17,
			
 
				+                       -1.9335e+00,  -6.5693e-01,   9.7381e-17,   1.2631e+00 };
			
 
				+
			
 
				+  for (int i = 0; i < 4; ++i) {
			
 
				+    SolveRTRWithSparseRHS<int>(cols.size() - 1,
			
 
				+                               &rows[0],
			
 
				+                               &cols[0],
			
 
				+                               &values[0],
			
 
				+                               i,
			
 
				+                               solution);
			
 
				+    for (int j = 0; j < 4; ++j) {
			
 
				+      EXPECT_NEAR(solution[j], expected[4 * i + j], 1e-3) << i;
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				 }  // namespace internal
			
 
				 }  // namespace ceres
			
--- a/internal/ceres/covariance_impl.cc
+++ b/internal/ceres/covariance_impl.cc
@@ -38,6 +38,7 @@
 
				 #include <utility>
			
 
				 #include <vector>
			
 
				 #include "Eigen/SVD"
			
 
				+#include "ceres/compressed_col_sparse_matrix_utils.h"
			
 
				 #include "ceres/compressed_row_sparse_matrix.h"
			
 
				 #include "ceres/covariance.h"
			
 
				 #include "ceres/crs_matrix.h"
			
@@ -55,6 +56,7 @@ namespace {
 
				 
			
 
				 // Per thread storage for SuiteSparse.
			
 
				 #ifndef CERES_NO_SUITESPARSE
			
 
				+
			
 
				 struct PerThreadContext {
			
 
				   explicit PerThreadContext(int num_rows)
			
 
				       : solution(NULL),
			
@@ -80,6 +82,7 @@ struct PerThreadContext {
 
				   cholmod_dense* rhs;
			
 
				   SuiteSparse ss;
			
 
				 };
			
 
				+
			
 
				 #endif
			
 
				 
			
 
				 }  // namespace
			
@@ -605,7 +608,6 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingSparseQR() {
 
				 
			
 
				   vector<SuiteSparse_long> transpose_rows(num_cols + 1, 0);
			
 
				   vector<SuiteSparse_long> transpose_cols(num_nonzeros, 0);
			
 
				-
			
 
				   vector<double> transpose_values(num_nonzeros, 0);
			
 
				 
			
 
				   for (int idx = 0; idx < num_nonzeros; ++idx) {
			
@@ -650,23 +652,49 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingSparseQR() {
 
				   cholmod_common cc;
			
 
				   cholmod_l_start(&cc);
			
 
				 
			
 
				-  SuiteSparseQR_factorization<double>* factor =
			
 
				-      SuiteSparseQR_factorize<double>(SPQR_ORDERING_BESTAMD,
			
 
				-                                      SPQR_DEFAULT_TOL,
			
 
				-                                      &cholmod_jacobian,
			
 
				-                                      &cc);
			
 
				+  cholmod_sparse* R = NULL;
			
 
				+  SuiteSparse_long* permutation = NULL;
			
 
				+
			
 
				+  // Compute a Q-less QR factorization of the Jacobian. Since we are
			
 
				+  // only interested in inverting J'J = R'R, we do not need Q. This
			
 
				+  // saves memory and gives us R as a permuted compressed column
			
 
				+  // sparse matrix.
			
 
				+  //
			
 
				+  // TODO(sameeragarwal): Currently the symbolic factorization and the
			
 
				+  // numeric factorization is done at the same time, and this does not
			
 
				+  // explicitly account for the block column and row structure in the
			
 
				+  // matrix. When using AMD, we have observed in the past that
			
 
				+  // computing the ordering with the block matrix is significantly
			
 
				+  // more efficient, both in runtime as well as the quality of
			
 
				+  // ordering computed. So, it maybe worth doing that analysis
			
 
				+  // separately.
			
 
				+  const SuiteSparse_long rank =
			
 
				+      SuiteSparseQR<double>(SPQR_ORDERING_BESTAMD,
			
 
				+                            SPQR_DEFAULT_TOL,
			
 
				+                            cholmod_jacobian.ncol,
			
 
				+                            &cholmod_jacobian,
			
 
				+                            &R,
			
 
				+                            &permutation,
			
 
				+                            &cc);
			
 
				   event_logger.AddEvent("Numeric Factorization");
			
 
				+  CHECK_NOTNULL(permutation);
			
 
				+  CHECK_NOTNULL(R);
			
 
				 
			
 
				-  const int rank = cc.SPQR_istat[4];
			
 
				   if (rank < cholmod_jacobian.ncol) {
			
 
				     LOG(WARNING) << "Jacobian matrix is rank deficient."
			
 
				                  << "Number of columns: " << cholmod_jacobian.ncol
			
 
				                  << " rank: " << rank;
			
 
				-    SuiteSparseQR_free(&factor, &cc);
			
 
				+    delete []permutation;
			
 
				+    cholmod_l_free_sparse(&R, &cc);
			
 
				     cholmod_l_finish(&cc);
			
 
				     return false;
			
 
				   }
			
 
				 
			
 
				+  vector<int> inverse_permutation(num_cols);
			
 
				+  for (SuiteSparse_long i = 0; i < num_cols; ++i) {
			
 
				+    inverse_permutation[permutation[i]] = i;
			
 
				+  }
			
 
				+
			
 
				   const int* rows = covariance_matrix_->rows();
			
 
				   const int* cols = covariance_matrix_->cols();
			
 
				   double* values = covariance_matrix_->mutable_values();
			
@@ -680,35 +708,39 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingSparseQR() {
 
				   //
			
 
				   // Since the covariance matrix is symmetric, the i^th row and column
			
 
				   // are equal.
			
 
				+  const int num_threads = options_.num_threads;
			
 
				+  scoped_array<double> workspace(new double[num_threads * num_cols]);
			
 
				 
			
 
				-  cholmod_dense* rhs = cholmod_l_zeros(num_cols, 1, CHOLMOD_REAL, &cc);
			
 
				-  double* rhs_x = reinterpret_cast<double*>(rhs->x);
			
 
				-
			
 
				+#pragma omp parallel for num_threads(num_threads) schedule(dynamic)
			
 
				   for (int r = 0; r < num_cols; ++r) {
			
 
				-    int row_begin = rows[r];
			
 
				-    int row_end = rows[r + 1];
			
 
				+    const int row_begin = rows[r];
			
 
				+    const int row_end = rows[r + 1];
			
 
				     if (row_end == row_begin) {
			
 
				       continue;
			
 
				     }
			
 
				 
			
 
				-    rhs_x[r] = 1.0;
			
 
				-
			
 
				-    cholmod_dense* y1 = SuiteSparseQR_solve<double>(SPQR_RTX_EQUALS_ETB, factor, rhs, &cc);
			
 
				-    cholmod_dense* solution = SuiteSparseQR_solve<double>(SPQR_RETX_EQUALS_B, factor, y1, &cc);
			
 
				+#  ifdef CERES_USE_OPENMP
			
 
				+    int thread_id = omp_get_thread_num();
			
 
				+#  else
			
 
				+    int thread_id = 0;
			
 
				+#  endif
			
 
				 
			
 
				-    double* solution_x = reinterpret_cast<double*>(solution->x);
			
 
				+    double* solution = workspace.get() + thread_id * num_cols;
			
 
				+    SolveRTRWithSparseRHS<SuiteSparse_long>(
			
 
				+        num_cols,
			
 
				+        static_cast<SuiteSparse_long*>(R->i),
			
 
				+        static_cast<SuiteSparse_long*>(R->p),
			
 
				+        static_cast<double*>(R->x),
			
 
				+        inverse_permutation[r],
			
 
				+        solution);
			
 
				     for (int idx = row_begin; idx < row_end; ++idx) {
			
 
				-      const int c = cols[idx];
			
 
				-      values[idx] = solution_x[c];
			
 
				+     const int c = cols[idx];
			
 
				+     values[idx] = solution[inverse_permutation[c]];
			
 
				     }
			
 
				-
			
 
				-    cholmod_l_free_dense(&y1, &cc);
			
 
				-    cholmod_l_free_dense(&solution, &cc);
			
 
				-    rhs_x[r] = 0.0;
			
 
				   }
			
 
				 
			
 
				-  cholmod_l_free_dense(&rhs, &cc);
			
 
				-  SuiteSparseQR_free(&factor, &cc);
			
 
				+  delete []permutation;
			
 
				+  cholmod_l_free_sparse(&R, &cc);
			
 
				   cholmod_l_finish(&cc);
			
 
				   event_logger.AddEvent("Inversion");
			
 
				   return true;
			
--- a/internal/ceres/covariance_test.cc
+++ b/internal/ceres/covariance_test.cc
@@ -499,6 +499,9 @@ TEST_F(CovarianceTest, ConstantParameterBlock) {
 
				 #ifndef CERES_NO_SUITESPARSE
			
 
				   options.algorithm_type = SPARSE_CHOLESKY;
			
 
				   ComputeAndCompareCovarianceBlocks(options, expected_covariance);
			
 
				+
			
 
				+  options.algorithm_type = SPARSE_QR;
			
 
				+  ComputeAndCompareCovarianceBlocks(options, expected_covariance);
			
 
				 #endif
			
 
				 
			
 
				   options.algorithm_type = DENSE_SVD;
			
@@ -552,6 +555,9 @@ TEST_F(CovarianceTest, LocalParameterization) {
 
				 #ifndef CERES_NO_SUITESPARSE
			
 
				   options.algorithm_type = SPARSE_CHOLESKY;
			
 
				   ComputeAndCompareCovarianceBlocks(options, expected_covariance);
			
 
				+
			
 
				+  options.algorithm_type = SPARSE_QR;
			
 
				+  ComputeAndCompareCovarianceBlocks(options, expected_covariance);
			
 
				 #endif
			
 
				 
			
 
				   options.algorithm_type = DENSE_SVD;
			
@@ -776,6 +782,7 @@ class LargeScaleCovarianceTest : public ::testing::Test {
 
				 
			
 
				 TEST_F(LargeScaleCovarianceTest, Parallel) {
			
 
				   ComputeAndCompare(SPARSE_CHOLESKY, 4);
			
 
				+  ComputeAndCompare(SPARSE_QR, 4);
			
 
				 }
			
 
				 
			
 
				 #endif  // !defined(CERES_NO_SUITESPARSE) && defined(CERES_USE_OPENMP)