vor 8 Jahren · 07dbf31eca
--- a/internal/ceres/compressed_row_jacobian_writer.cc
+++ b/internal/ceres/compressed_row_jacobian_writer.cc
@@ -121,6 +121,13 @@ SparseMatrix* CompressedRowJacobianWriter::CreateJacobian() const {
 
				   // seems to be the only way to construct it without doing a memory copy.
			
 
				   int* rows = jacobian->mutable_rows();
			
 
				   int* cols = jacobian->mutable_cols();
			
 
				+
			
 
				+  // Initialize crsb rows and cols.
			
 
				+  std::vector<int>& crsb_rows = *jacobian->mutable_crsb_rows();
			
 
				+  std::vector<int>& crsb_cols = *jacobian->mutable_crsb_cols();
			
 
				+  crsb_rows.resize(residual_blocks.size() + 1);
			
 
				+  crsb_rows[0] = 0;
			
 
				+
			
 
				   int row_pos = 0;
			
 
				   rows[0] = 0;
			
 
				   for (int i = 0; i < residual_blocks.size(); ++i) {
			
@@ -157,6 +164,11 @@ SparseMatrix* CompressedRowJacobianWriter::CreateJacobian() const {
 
				                  << "Parameter Blocks: " << parameter_block_description;
			
 
				     }
			
 
				 
			
 
				+    // Populate crsb rows and cols.
			
 
				+    crsb_rows[i + 1] = crsb_rows[i] + parameter_indices.size();
			
 
				+    std::copy(parameter_indices.begin(), parameter_indices.end(),
			
 
				+              std::back_inserter(crsb_cols));
			
 
				+
			
 
				     // Update the row indices.
			
 
				     const int num_residuals = residual_block->NumResiduals();
			
 
				     for (int j = 0; j < num_residuals; ++j) {
			
--- a/internal/ceres/compressed_row_sparse_matrix.cc
+++ b/internal/ceres/compressed_row_sparse_matrix.cc
@@ -69,6 +69,39 @@ struct RowColLessThan {
 
				   const int* cols;
			
 
				 };
			
 
				 
			
 
				+void TransposeForCompressedRowSparseStructure(const int num_rows,
			
 
				+                                              const int num_cols,
			
 
				+                                              const int num_nonzeros,
			
 
				+                                              const int *rows,
			
 
				+                                              const int *cols,
			
 
				+                                              const double *values,
			
 
				+                                              int* transpose_rows,
			
 
				+                                              int *transpose_cols,
			
 
				+                                              double *transpose_values) {
			
 
				+  for (int idx = 0; idx < num_nonzeros; ++idx) {
			
 
				+    ++transpose_rows[cols[idx] + 1];
			
 
				+  }
			
 
				+
			
 
				+  for (int i = 1; i < num_cols + 1; ++i) {
			
 
				+    transpose_rows[i] += transpose_rows[i - 1];
			
 
				+  }
			
 
				+
			
 
				+  for (int r = 0; r < num_rows; ++r) {
			
 
				+    for (int idx = rows[r]; idx < rows[r + 1]; ++idx) {
			
 
				+      const int c = cols[idx];
			
 
				+      const int transpose_idx = transpose_rows[c]++;
			
 
				+      transpose_cols[transpose_idx] = r;
			
 
				+      if (values) {
			
 
				+         transpose_values[transpose_idx] = values[idx];
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  for (int i = num_cols - 1; i > 0 ; --i) {
			
 
				+    transpose_rows[i] = transpose_rows[i - 1];
			
 
				+  }
			
 
				+  transpose_rows[0] = 0;
			
 
				+}
			
 
				 }  // namespace
			
 
				 
			
 
				 // This constructor gives you a semi-initialized CompressedRowSparseMatrix.
			
@@ -220,6 +253,16 @@ void CompressedRowSparseMatrix::DeleteRows(int delta_rows) {
 
				   num_rows_ -= delta_rows;
			
 
				   rows_.resize(num_rows_ + 1);
			
 
				 
			
 
				+  // The rest of the code update block information.
			
 
				+  // Immediately return in case of no block information.
			
 
				+  if (row_blocks_.empty()) {
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  // Sanity check for compressed row sparse block information
			
 
				+  CHECK_EQ(crsb_rows_.size(), row_blocks_.size() + 1);
			
 
				+  CHECK_EQ(crsb_rows_.back(), crsb_cols_.size());
			
 
				+
			
 
				   // Walk the list of row blocks until we reach the new number of rows
			
 
				   // and the drop the rest of the row blocks.
			
 
				   int num_row_blocks = 0;
			
@@ -230,12 +273,18 @@ void CompressedRowSparseMatrix::DeleteRows(int delta_rows) {
 
				   }
			
 
				 
			
 
				   row_blocks_.resize(num_row_blocks);
			
 
				+
			
 
				+  // Update compressed row sparse block (crsb) information.
			
 
				+  CHECK_EQ(num_rows, num_rows_);
			
 
				+  crsb_rows_.resize(num_row_blocks + 1);
			
 
				+  crsb_cols_.resize(crsb_rows_[num_row_blocks]);
			
 
				 }
			
 
				 
			
 
				 void CompressedRowSparseMatrix::AppendRows(const CompressedRowSparseMatrix& m) {
			
 
				   CHECK_EQ(m.num_cols(), num_cols_);
			
 
				 
			
 
				-  CHECK(row_blocks_.size() == 0 || m.row_blocks().size() !=0)
			
 
				+  CHECK((row_blocks_.size() == 0 && m.row_blocks().size() == 0) ||
			
 
				+        (row_blocks_.size() != 0 && m.row_blocks().size() != 0))
			
 
				       << "Cannot append a matrix with row blocks to one without and vice versa."
			
 
				       << "This matrix has : " << row_blocks_.size() << " row blocks."
			
 
				       << "The matrix being appended has: " << m.row_blocks().size()
			
@@ -270,9 +319,38 @@ void CompressedRowSparseMatrix::AppendRows(const CompressedRowSparseMatrix& m) {
 
				   }
			
 
				 
			
 
				   num_rows_ += m.num_rows();
			
 
				+
			
 
				+  // The rest of the code update block information.
			
 
				+  // Immediately return in case of no block information.
			
 
				+  if (row_blocks_.empty()) {
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  // Sanity check for compressed row sparse block information
			
 
				+  CHECK_EQ(crsb_rows_.size(), row_blocks_.size() + 1);
			
 
				+  CHECK_EQ(crsb_rows_.back(), crsb_cols_.size());
			
 
				+
			
 
				   row_blocks_.insert(row_blocks_.end(),
			
 
				                      m.row_blocks().begin(),
			
 
				                      m.row_blocks().end());
			
 
				+
			
 
				+  // The rest of the code update compressed row sparse block (crsb) information.
			
 
				+  const int num_crsb_nonzeros = crsb_cols_.size();
			
 
				+  const int m_num_crsb_nonzeros = m.crsb_cols_.size();
			
 
				+  crsb_cols_.resize(num_crsb_nonzeros + m_num_crsb_nonzeros);
			
 
				+  std::copy(&m.crsb_cols()[0], &m.crsb_cols()[0] + m_num_crsb_nonzeros,
			
 
				+            &crsb_cols_[num_crsb_nonzeros]);
			
 
				+
			
 
				+  const int num_crsb_rows = crsb_rows_.size() - 1;
			
 
				+  const int m_num_crsb_rows = m.crsb_rows_.size() - 1;
			
 
				+  crsb_rows_.resize(num_crsb_rows + m_num_crsb_rows + 1);
			
 
				+  std::fill(crsb_rows_.begin() + num_crsb_rows,
			
 
				+            crsb_rows_.begin() + num_crsb_rows + m_num_crsb_rows + 1,
			
 
				+            crsb_rows_[num_crsb_rows]);
			
 
				+
			
 
				+  for (int r = 0; r < m_num_crsb_rows + 1; ++r) {
			
 
				+    crsb_rows_[num_crsb_rows + r] += m.crsb_rows()[r];
			
 
				+  }
			
 
				 }
			
 
				 
			
 
				 void CompressedRowSparseMatrix::ToTextFile(FILE* file) const {
			
@@ -364,6 +442,15 @@ CompressedRowSparseMatrix* CompressedRowSparseMatrix::CreateBlockDiagonalMatrix(
 
				   *matrix->mutable_row_blocks() = blocks;
			
 
				   *matrix->mutable_col_blocks() = blocks;
			
 
				 
			
 
				+  // Fill compressed row sparse block (crsb) information.
			
 
				+  vector<int>& crsb_rows = *matrix->mutable_crsb_rows();
			
 
				+  vector<int>& crsb_cols = *matrix->mutable_crsb_cols();
			
 
				+  for (int i = 0; i < blocks.size(); ++i) {
			
 
				+    crsb_rows.push_back(i);
			
 
				+    crsb_cols.push_back(i);
			
 
				+  }
			
 
				+  crsb_rows.push_back(blocks.size());
			
 
				+
			
 
				   CHECK_EQ(idx_cursor, num_nonzeros);
			
 
				   CHECK_EQ(col_cursor, num_rows);
			
 
				   return matrix;
			
@@ -373,40 +460,44 @@ CompressedRowSparseMatrix* CompressedRowSparseMatrix::Transpose() const {
 
				   CompressedRowSparseMatrix* transpose =
			
 
				       new CompressedRowSparseMatrix(num_cols_, num_rows_, num_nonzeros());
			
 
				 
			
 
				-  int* transpose_rows = transpose->mutable_rows();
			
 
				-  int* transpose_cols = transpose->mutable_cols();
			
 
				-  double* transpose_values = transpose->mutable_values();
			
 
				+  TransposeForCompressedRowSparseStructure(
			
 
				+      num_rows(), num_cols(), num_nonzeros(),
			
 
				+      rows(), cols(), values(),
			
 
				+      transpose->mutable_rows(),
			
 
				+      transpose->mutable_cols(),
			
 
				+      transpose->mutable_values());
			
 
				 
			
 
				-  for (int idx = 0; idx < num_nonzeros(); ++idx) {
			
 
				-    ++transpose_rows[cols_[idx] + 1];
			
 
				+  // The rest of the code update block information.
			
 
				+  // Immediately return in case of no block information.
			
 
				+  if (row_blocks_.empty()) {
			
 
				+    return transpose;
			
 
				   }
			
 
				 
			
 
				-  for (int i = 1; i < transpose->num_rows() + 1; ++i) {
			
 
				-    transpose_rows[i] += transpose_rows[i - 1];
			
 
				-  }
			
 
				-
			
 
				-  for (int r = 0; r < num_rows(); ++r) {
			
 
				-    for (int idx = rows_[r]; idx < rows_[r + 1]; ++idx) {
			
 
				-      const int c = cols_[idx];
			
 
				-      const int transpose_idx = transpose_rows[c]++;
			
 
				-      transpose_cols[transpose_idx] = r;
			
 
				-      transpose_values[transpose_idx] = values_[idx];
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  for (int i = transpose->num_rows() - 1; i > 0 ; --i) {
			
 
				-    transpose_rows[i] = transpose_rows[i - 1];
			
 
				-  }
			
 
				-  transpose_rows[0] = 0;
			
 
				+  // Sanity check for compressed row sparse block information
			
 
				+  CHECK_EQ(crsb_rows_.size(), row_blocks_.size() + 1);
			
 
				+  CHECK_EQ(crsb_rows_.back(), crsb_cols_.size());
			
 
				 
			
 
				   *(transpose->mutable_row_blocks()) = col_blocks_;
			
 
				   *(transpose->mutable_col_blocks()) = row_blocks_;
			
 
				 
			
 
				+  // The rest of the code update compressed row sparse block (crsb) information.
			
 
				+  vector<int>& transpose_crsb_rows = *transpose->mutable_crsb_rows();
			
 
				+  vector<int>& transpose_crsb_cols = *transpose->mutable_crsb_cols();
			
 
				+
			
 
				+  transpose_crsb_rows.resize(col_blocks_.size() + 1);
			
 
				+  std::fill(transpose_crsb_rows.begin(), transpose_crsb_rows.end(), 0);
			
 
				+  transpose_crsb_cols.resize(crsb_cols_.size());
			
 
				+
			
 
				+  TransposeForCompressedRowSparseStructure(
			
 
				+      row_blocks().size(), col_blocks().size(), crsb_cols().size(),
			
 
				+      crsb_rows().data(), crsb_cols().data(), NULL,
			
 
				+      transpose_crsb_rows.data(), transpose_crsb_cols.data(), NULL);
			
 
				+
			
 
				   return transpose;
			
 
				 }
			
 
				 
			
 
				 namespace {
			
 
				-// A ProductTerm is a term in the outer product of a matrix with
			
 
				+// A ProductTerm is a term in the block outer product of a matrix with
			
 
				 // itself.
			
 
				 struct ProductTerm {
			
 
				   ProductTerm(const int row, const int col, const int index)
			
@@ -428,72 +519,176 @@ struct ProductTerm {
 
				   int index;
			
 
				 };
			
 
				 
			
 
				+// Create outerproduct matrix based on the block product information.
			
 
				+// The input block product is already sorted.  This function does not
			
 
				+// set the sparse rows/cols information.  Instead, it only collects the
			
 
				+// nonzeros for each compressed row and puts in row_nnz.
			
 
				+// The caller of this function will traverse the block product in a second
			
 
				+// round to generate the sparse rows/cols information.
			
 
				+// This function also computes the block offset information for
			
 
				+// the outerproduct matrix, which is used in outer product computation.
			
 
				 CompressedRowSparseMatrix*
			
 
				-CompressAndFillProgram(const int num_rows,
			
 
				-                       const int num_cols,
			
 
				-                       const vector<ProductTerm>& product,
			
 
				-                       vector<int>* program) {
			
 
				-  CHECK_GT(product.size(), 0);
			
 
				+CreateOuterProductMatrix(const int num_cols,
			
 
				+                         const vector<int>& blocks,
			
 
				+                         const vector<ProductTerm>& product,
			
 
				+                         vector<int>* row_nnz) {
			
 
				 
			
 
				   // Count the number of unique product term, which in turn is the
			
 
				   // number of non-zeros in the outer product.
			
 
				-  int num_nonzeros = 1;
			
 
				+  // Also count the number of non-zeros in each row.
			
 
				+  row_nnz->resize(blocks.size());
			
 
				+  std::fill(row_nnz->begin(), row_nnz->end(), 0);
			
 
				+  (*row_nnz)[product[0].row] = blocks[product[0].col];
			
 
				+  int num_nonzeros = blocks[product[0].row] * blocks[product[0].col];
			
 
				   for (int i = 1; i < product.size(); ++i) {
			
 
				+    // Each (row, col) block counts only once.
			
 
				+    // This check depends on product sorted on (row, col).
			
 
				     if (product[i].row != product[i - 1].row ||
			
 
				         product[i].col != product[i - 1].col) {
			
 
				-      ++num_nonzeros;
			
 
				+      (*row_nnz)[product[i].row] += blocks[product[i].col];
			
 
				+      num_nonzeros += blocks[product[i].row] * blocks[product[i].col];
			
 
				     }
			
 
				   }
			
 
				 
			
 
				   CompressedRowSparseMatrix* matrix =
			
 
				-      new CompressedRowSparseMatrix(num_rows, num_cols, num_nonzeros);
			
 
				+      new CompressedRowSparseMatrix(num_cols, num_cols, num_nonzeros);
			
 
				+
			
 
				+  // Compute block offsets for outer product matrix, which is used
			
 
				+  // in ComputeOuterProduct.
			
 
				+  vector<int>* block_offsets = matrix->mutable_block_offsets();
			
 
				+  block_offsets->resize(blocks.size() + 1);
			
 
				+  (*block_offsets)[0] = 0;
			
 
				+  for (int i = 0; i < blocks.size(); ++i) {
			
 
				+    (*block_offsets)[i + 1] = (*block_offsets)[i] + blocks[i];
			
 
				+  }
			
 
				+
			
 
				+  return matrix;
			
 
				+}
			
 
				+
			
 
				+CompressedRowSparseMatrix*
			
 
				+CompressAndFillProgram(const int num_cols,
			
 
				+                       const vector<int>& blocks,
			
 
				+                       const vector<ProductTerm>& product,
			
 
				+                       vector<int>* program) {
			
 
				+  CHECK_GT(product.size(), 0);
			
 
				+
			
 
				+  vector<int> row_nnz;
			
 
				+  CompressedRowSparseMatrix* matrix =
			
 
				+      CreateOuterProductMatrix(num_cols, blocks, product, &row_nnz);
			
 
				+
			
 
				+  const vector<int>& block_offsets = matrix->block_offsets();
			
 
				 
			
 
				   int* crsm_rows = matrix->mutable_rows();
			
 
				-  std::fill(crsm_rows, crsm_rows + num_rows + 1, 0);
			
 
				+  std::fill(crsm_rows, crsm_rows + num_cols + 1, 0);
			
 
				   int* crsm_cols = matrix->mutable_cols();
			
 
				-  std::fill(crsm_cols, crsm_cols + num_nonzeros, 0);
			
 
				+  std::fill(crsm_cols, crsm_cols + matrix->num_nonzeros(), 0);
			
 
				 
			
 
				   CHECK_NOTNULL(program)->clear();
			
 
				   program->resize(product.size());
			
 
				 
			
 
				-  // Iterate over the sorted product terms. This means each row is
			
 
				-  // filled one at a time, and we are able to assign a position in the
			
 
				-  // values array to each term.
			
 
				+  // Non zero elements are not stored consecutively across rows in a block.
			
 
				+  // We seperate nonzero into three categories:
			
 
				+  //   nonzeros in all previous row blocks counted in nnz
			
 
				+  //   nonzeros in current row counted in row_nnz
			
 
				+  //   nonzeros in previous col blocks of current row counted in col_nnz
			
 
				   //
			
 
				-  // If terms repeat, i.e., they contribute to the same entry in the
			
 
				-  // result matrix), then they do not affect the sparsity structure of
			
 
				-  // the result matrix.
			
 
				+  // Give an element (j, k) within a block such that j and k
			
 
				+  // represent the relative position to the starting row and starting col of
			
 
				+  // the block, the row and col for the element is
			
 
				+  //   block_offsets[current.row] + j
			
 
				+  //   block_offsets[current.col] + k
			
 
				+  // The total number of nonzero to the element is
			
 
				+  //   nnz + row_nnz[current.row] * j + col_nnz + k
			
 
				+  //
			
 
				+  // program keeps col_nnz for block product, which is used later for
			
 
				+  // outerproduct computation.
			
 
				+  //
			
 
				+  // There is no special handling for diagonal blocks as we generate
			
 
				+  // BLOCK triangular matrix (diagonal block is full block) instead of
			
 
				+  // standard triangular matrix.
			
 
				   int nnz = 0;
			
 
				-  crsm_cols[0] = product[0].col;
			
 
				-  crsm_rows[product[0].row + 1]++;
			
 
				-  (*program)[product[0].index] = nnz;
			
 
				+  int col_nnz = 0;
			
 
				+
			
 
				+  // Process first product term.
			
 
				+  for (int j = 0; j < blocks[product[0].row]; ++j) {
			
 
				+    crsm_rows[block_offsets[product[0].row] + j + 1] =
			
 
				+        row_nnz[product[0].row];
			
 
				+    for (int k = 0; k < blocks[product[0].col]; ++k) {
			
 
				+      crsm_cols[row_nnz[product[0].row] * j + k]
			
 
				+          = block_offsets[product[0].col] + k;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  (*program)[product[0].index] = 0;
			
 
				+
			
 
				+  // Process rest product terms.
			
 
				   for (int i = 1; i < product.size(); ++i) {
			
 
				     const ProductTerm& previous = product[i - 1];
			
 
				     const ProductTerm& current = product[i];
			
 
				 
			
 
				     // Sparsity structure is updated only if the term is not a repeat.
			
 
				     if (previous.row != current.row || previous.col != current.col) {
			
 
				-      crsm_cols[++nnz] = current.col;
			
 
				-      crsm_rows[current.row + 1]++;
			
 
				+      col_nnz += blocks[previous.col];
			
 
				+      if (previous.row != current.row) {
			
 
				+        nnz += col_nnz * blocks[previous.row];
			
 
				+        col_nnz = 0;
			
 
				+
			
 
				+        for (int j = 0; j < blocks[current.row]; ++j) {
			
 
				+          crsm_rows[block_offsets[current.row] + j + 1] =
			
 
				+              row_nnz[current.row];
			
 
				+        }
			
 
				+      }
			
 
				+
			
 
				+      for (int j = 0; j < blocks[current.row]; ++j) {
			
 
				+        for (int k = 0; k < blocks[current.col]; ++k) {
			
 
				+          crsm_cols[nnz + row_nnz[current.row] * j + col_nnz + k]
			
 
				+              = block_offsets[current.col] + k;
			
 
				+        }
			
 
				+      }
			
 
				     }
			
 
				 
			
 
				-    // All terms get assigned the position in the values array where
			
 
				-    // their value is accumulated.
			
 
				-    (*program)[current.index] = nnz;
			
 
				+    (*program)[current.index] = col_nnz;
			
 
				   }
			
 
				 
			
 
				-  for (int i = 1; i < num_rows + 1; ++i) {
			
 
				+  for (int i = 1; i < num_cols + 1; ++i) {
			
 
				     crsm_rows[i] += crsm_rows[i - 1];
			
 
				   }
			
 
				 
			
 
				   return matrix;
			
 
				 }
			
 
				 
			
 
				+// input is a matrix of dimesion <row_block_size, input_cols>
			
 
				+// output is a matrix of dimension <col_block1_size, output_cols>
			
 
				+//
			
 
				+// Implement block multiplication O = I1' * I2.
			
 
				+// I1 is block(0, col_block1_begin, row_block_size, col_block1_size) of input
			
 
				+// I2 is block(0, col_block2_begin, row_block_size, col_block2_size) of input
			
 
				+// O is block(0, 0, col_block1_size, col_block2_size) of output
			
 
				+void ComputeBlockMultiplication(const int row_block_size,
			
 
				+                                const int col_block1_size,
			
 
				+                                const int col_block2_size,
			
 
				+                                const int col_block1_begin,
			
 
				+                                const int col_block2_begin,
			
 
				+                                const int input_cols,
			
 
				+                                const double *input,
			
 
				+                                const int output_cols,
			
 
				+                                double *output) {
			
 
				+  for (int r = 0; r < row_block_size; ++r) {
			
 
				+    for (int idx1 = 0; idx1 < col_block1_size; ++idx1) {
			
 
				+      for (int idx2 = 0; idx2 < col_block2_size; ++idx2) {
			
 
				+        output[output_cols * idx1 + idx2] +=
			
 
				+            input[input_cols * r + col_block1_begin + idx1] *
			
 
				+            input[input_cols * r + col_block2_begin + idx2];
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				 }  // namespace
			
 
				 
			
 
				 CompressedRowSparseMatrix*
			
 
				 CompressedRowSparseMatrix::CreateOuterProductMatrixAndProgram(
			
 
				       const CompressedRowSparseMatrix& m,
			
 
				+      const int stype,
			
 
				       vector<int>* program) {
			
 
				   CHECK_NOTNULL(program)->clear();
			
 
				   CHECK_GT(m.num_nonzeros(), 0)
			
@@ -501,60 +696,152 @@ CompressedRowSparseMatrix::CreateOuterProductMatrixAndProgram(
 
				                 << "you found a bug in Ceres. Please report it.";
			
 
				 
			
 
				   vector<ProductTerm> product;
			
 
				-  const vector<int>& row_blocks = m.row_blocks();
			
 
				-  int row_block_begin = 0;
			
 
				-  // Iterate over row blocks
			
 
				-  for (int row_block = 0; row_block < row_blocks.size(); ++row_block) {
			
 
				-    const int row_block_end = row_block_begin + row_blocks[row_block];
			
 
				-    // Compute the outer product terms for just one row per row block.
			
 
				-    const int r = row_block_begin;
			
 
				-    // Compute the lower triangular part of the product.
			
 
				-    for (int idx1 = m.rows()[r]; idx1 < m.rows()[r + 1]; ++idx1) {
			
 
				-      for (int idx2 = m.rows()[r]; idx2 <= idx1; ++idx2) {
			
 
				-        product.push_back(ProductTerm(m.cols()[idx1],
			
 
				-                                      m.cols()[idx2],
			
 
				-                                      product.size()));
			
 
				+  const vector<int>& col_blocks = m.col_blocks();
			
 
				+  const vector<int>& crsb_rows = m.crsb_rows();
			
 
				+  const vector<int>& crsb_cols = m.crsb_cols();
			
 
				+
			
 
				+  // Give input matrix m in Compressed Row Sparse Block format
			
 
				+  //     (row_block, col_block)
			
 
				+  // represent each block multiplication
			
 
				+  //     (row_block, col_block1)' X (row_block, col_block2)
			
 
				+  // by its product term index and sort the product terms
			
 
				+  //     (col_block1, col_block2, index)
			
 
				+  //
			
 
				+  // Due to the compression on rows, col_block is accessed through idx to
			
 
				+  // crsb_cols.  So col_block is accessed as crsb_cols[idx] in the code.
			
 
				+  for (int row_block = 1; row_block < crsb_rows.size(); ++row_block) {
			
 
				+    for (int idx1 = crsb_rows[row_block - 1]; idx1 < crsb_rows[row_block];
			
 
				+         ++idx1) {
			
 
				+      if (stype > 0) { // Lower triangular matrix.
			
 
				+        for (int idx2 = crsb_rows[row_block - 1]; idx2 <= idx1; ++idx2) {
			
 
				+          product.push_back(ProductTerm(crsb_cols[idx1], crsb_cols[idx2],
			
 
				+                                        product.size()));
			
 
				+        }
			
 
				+      }
			
 
				+      else { // Upper triangular matrix.
			
 
				+        for (int idx2 = idx1; idx2 < crsb_rows[row_block]; ++idx2) {
			
 
				+          product.push_back(ProductTerm(crsb_cols[idx1], crsb_cols[idx2],
			
 
				+                                        product.size()));
			
 
				+        }
			
 
				       }
			
 
				     }
			
 
				-    row_block_begin = row_block_end;
			
 
				   }
			
 
				-  CHECK_EQ(row_block_begin, m.num_rows());
			
 
				+
			
 
				   sort(product.begin(), product.end());
			
 
				-  return CompressAndFillProgram(m.num_cols(), m.num_cols(), product, program);
			
 
				+  return CompressAndFillProgram(m.num_cols(), col_blocks, product, program);
			
 
				 }
			
 
				 
			
 
				+// Give input matrix m in Compressed Row Sparse Block format
			
 
				+//     (row_block, col_block)
			
 
				+// compute outerproduct m' * m as sum of block multiplications
			
 
				+//     (row_block, col_block1)' X (row_block, col_block2)
			
 
				+//
			
 
				+// Given row_block of the input matrix m, we use m_row_begin to represent
			
 
				+// the starting row of the row block and m_row_nnz to represent number of
			
 
				+// nonzero in each row of the row block, then the rows belonging to
			
 
				+// the row block can be represented as a dense matrix starting at
			
 
				+//     m.values() + m.rows()[m_row_begin]
			
 
				+// with dimension
			
 
				+//     <m.row_blocks()[row_block], m_row_nnz>
			
 
				+//
			
 
				+// Then each input matrix block (row_block, col_block) can be represented as
			
 
				+// a block of above dense matrix starting at position
			
 
				+//     (0, m_col_nnz)
			
 
				+// with size
			
 
				+//     <m.row_blocks()[row_block], m.col_blocks()[col_block]>
			
 
				+// where m_col_nnz is the number of nonzero before col_block in each row.
			
 
				+//
			
 
				+// The outerproduct block is represented similarly with m_row_begin,
			
 
				+// m_row_nnz, m_col_nnz, etc. replaced by row_begin, row_nnz, col_nnz, etc.
			
 
				+// The difference is, m_row_begin and m_col_nnz is counted during the
			
 
				+// traverse of block multiplication, while row_begin and col_nnz are got
			
 
				+// from pre-computed block_offsets and program.
			
 
				+//
			
 
				+// Due to the compression on rows, col_block is accessed through
			
 
				+// idx to crsb_col vector. So col_block is accessed as crsb_col[idx]
			
 
				+// in the code.
			
 
				+//
			
 
				+// Note this function produces a triangular matrix in block unit (i.e.
			
 
				+// diagonal block is a normal block) instead of standard triangular matrix.
			
 
				+// So there is no special handling for diagonal blocks.
			
 
				 void CompressedRowSparseMatrix::ComputeOuterProduct(
			
 
				     const CompressedRowSparseMatrix& m,
			
 
				+    const int stype,
			
 
				     const vector<int>& program,
			
 
				     CompressedRowSparseMatrix* result) {
			
 
				   result->SetZero();
			
 
				   double* values = result->mutable_values();
			
 
				-  const vector<int>& row_blocks = m.row_blocks();
			
 
				+  const int* rows = result->rows();
			
 
				+  const vector<int>& block_offsets = result->block_offsets();
			
 
				 
			
 
				   int cursor = 0;
			
 
				-  int row_block_begin = 0;
			
 
				   const double* m_values = m.values();
			
 
				   const int* m_rows = m.rows();
			
 
				-  // Iterate over row blocks.
			
 
				-  for (int row_block = 0; row_block < row_blocks.size(); ++row_block) {
			
 
				-    const int row_block_end = row_block_begin + row_blocks[row_block];
			
 
				-    const int saved_cursor = cursor;
			
 
				-    for (int r = row_block_begin; r < row_block_end; ++r) {
			
 
				-      // Reuse the program segment for each row in this row block.
			
 
				-      cursor = saved_cursor;
			
 
				-      const int row_begin = m_rows[r];
			
 
				-      const int row_end = m_rows[r + 1];
			
 
				-      for (int idx1 = row_begin; idx1 < row_end; ++idx1) {
			
 
				-        const double v1 =  m_values[idx1];
			
 
				-        for (int idx2 = row_begin; idx2 <= idx1; ++idx2, ++cursor) {
			
 
				-          values[program[cursor]] += v1 * m_values[idx2];
			
 
				+  const vector<int>& row_blocks = m.row_blocks();
			
 
				+  const vector<int>& col_blocks = m.col_blocks();
			
 
				+  const vector<int>& crsb_rows = m.crsb_rows();
			
 
				+  const vector<int>& crsb_cols = m.crsb_cols();
			
 
				+
			
 
				+#define COL_BLOCK1 (crsb_cols[idx1])
			
 
				+#define COL_BLOCK2 (crsb_cols[idx2])
			
 
				+
			
 
				+  // Iterate row blocks.
			
 
				+  for (int row_block = 0, m_row_begin = 0; row_block < row_blocks.size();
			
 
				+       m_row_begin += row_blocks[row_block++]) {
			
 
				+    // Non zeros are not stored consecutively across rows in a block.
			
 
				+    // The gaps between rows is the number of nonzeros of the
			
 
				+    // input matrix compressed row.
			
 
				+    const int m_row_nnz =
			
 
				+        m_rows[m_row_begin + 1] - m_rows[m_row_begin];
			
 
				+
			
 
				+    // Iterate (col_block1 x col_block2).
			
 
				+    for (int idx1 = crsb_rows[row_block], m_col_nnz1 = 0;
			
 
				+         idx1 < crsb_rows[row_block + 1];
			
 
				+         m_col_nnz1 += col_blocks[COL_BLOCK1], ++idx1) {
			
 
				+      // Non zeros are not stored consecutively across rows in a block.
			
 
				+      // The gaps between rows is the number of nonzeros of the
			
 
				+      // outerproduct matrix compressed row.
			
 
				+      const int row_begin = block_offsets[COL_BLOCK1];
			
 
				+      const int row_nnz = rows[row_begin + 1] - rows[row_begin];
			
 
				+
			
 
				+      if (stype > 0) {  // Lower triangular matrix.
			
 
				+        for (int idx2 = crsb_rows[row_block], m_col_nnz2 = 0;
			
 
				+             idx2 <= idx1;
			
 
				+             m_col_nnz2 += col_blocks[COL_BLOCK2], ++idx2, ++cursor) {
			
 
				+          int col_nnz = program[cursor];
			
 
				+          ComputeBlockMultiplication(row_blocks[row_block],
			
 
				+                                     col_blocks[COL_BLOCK1],
			
 
				+                                     col_blocks[COL_BLOCK2],
			
 
				+                                     m_col_nnz1,
			
 
				+                                     m_col_nnz2,
			
 
				+                                     m_row_nnz,
			
 
				+                                     m_values + m_rows[m_row_begin],
			
 
				+                                     row_nnz,
			
 
				+                                     values + rows[row_begin] + col_nnz);
			
 
				+        }
			
 
				+      }
			
 
				+      else { // Upper triangular matrix.
			
 
				+        for (int idx2 = idx1, m_col_nnz2 = m_col_nnz1;
			
 
				+             idx2 < crsb_rows[row_block + 1];
			
 
				+             m_col_nnz2 += col_blocks[COL_BLOCK2], ++idx2, ++cursor) {
			
 
				+          int col_nnz = program[cursor];
			
 
				+          ComputeBlockMultiplication(row_blocks[row_block],
			
 
				+                                     col_blocks[COL_BLOCK1],
			
 
				+                                     col_blocks[COL_BLOCK2],
			
 
				+                                     m_col_nnz1,
			
 
				+                                     m_col_nnz2,
			
 
				+                                     m_row_nnz,
			
 
				+                                     m_values + m_rows[m_row_begin],
			
 
				+                                     row_nnz,
			
 
				+                                     values + rows[row_begin] + col_nnz);
			
 
				         }
			
 
				       }
			
 
				     }
			
 
				-    row_block_begin = row_block_end;
			
 
				   }
			
 
				 
			
 
				-  CHECK_EQ(row_block_begin, m.num_rows());
			
 
				+#undef COL_BLOCK1
			
 
				+#undef COL_BLOCK2
			
 
				+
			
 
				   CHECK_EQ(cursor, program.size());
			
 
				 }
			
 
				 
			
--- a/internal/ceres/compressed_row_sparse_matrix.h
+++ b/internal/ceres/compressed_row_sparse_matrix.h
@@ -115,6 +115,15 @@ class CompressedRowSparseMatrix : public SparseMatrix {
 
				   const std::vector<int>& col_blocks() const { return col_blocks_; }
			
 
				   std::vector<int>* mutable_col_blocks() { return &col_blocks_; }
			
 
				 
			
 
				+  const std::vector<int>& block_offsets() const { return block_offsets_; }
			
 
				+  std::vector<int>* mutable_block_offsets() { return &block_offsets_; }
			
 
				+
			
 
				+  const std::vector<int>& crsb_rows() const { return crsb_rows_; }
			
 
				+  std::vector<int>* mutable_crsb_rows() { return &crsb_rows_; }
			
 
				+
			
 
				+  const std::vector<int>& crsb_cols() const { return crsb_cols_; }
			
 
				+  std::vector<int>* mutable_crsb_cols() { return &crsb_cols_; }
			
 
				+
			
 
				   // Destructive array resizing method.
			
 
				   void SetMaxNumNonZeros(int num_nonzeros);
			
 
				 
			
@@ -135,8 +144,8 @@ class CompressedRowSparseMatrix : public SparseMatrix {
 
				   // and create a CompressedRowSparseMatrix corresponding to it.
			
 
				   //
			
 
				   // Also compute a "program" vector, which for every term in the
			
 
				-  // outer product points to the entry in the values array of the
			
 
				-  // result matrix where it should be accumulated.
			
 
				+  // block outer product provides the information for the entry
			
 
				+  // in the values array of the result matrix where it should be accumulated.
			
 
				   //
			
 
				   // This program is used by the ComputeOuterProduct function below to
			
 
				   // compute the outer product.
			
@@ -145,8 +154,13 @@ class CompressedRowSparseMatrix : public SparseMatrix {
 
				   // same sparsity structure, the program only stores the result for
			
 
				   // one row per row block. The ComputeOuterProduct function reuses
			
 
				   // this information for each row in the row block.
			
 
				+  //
			
 
				+  // stype controls the result matrix in upper or lower triangular matrix
			
 
				+  //    stype = 1:  lower triangular matrix
			
 
				+  //    stype = -1: higher triangular matrix
			
 
				   static CompressedRowSparseMatrix* CreateOuterProductMatrixAndProgram(
			
 
				       const CompressedRowSparseMatrix& m,
			
 
				+      const int stype,
			
 
				       std::vector<int>* program);
			
 
				 
			
 
				   // Compute the values array for the expression m.transpose() * m,
			
@@ -154,6 +168,7 @@ class CompressedRowSparseMatrix : public SparseMatrix {
 
				   // created using the CreateOuterProductMatrixAndProgram function
			
 
				   // above.
			
 
				   static void ComputeOuterProduct(const CompressedRowSparseMatrix& m,
			
 
				+                                  const int stype,
			
 
				                                   const std::vector<int>& program,
			
 
				                                   CompressedRowSparseMatrix* result);
			
 
				 
			
@@ -172,6 +187,18 @@ class CompressedRowSparseMatrix : public SparseMatrix {
 
				   std::vector<int> row_blocks_;
			
 
				   std::vector<int> col_blocks_;
			
 
				 
			
 
				+  // For outerproduct matrix (J' * J), we pre-compute its block offsets
			
 
				+  // information here for fast outerproduct computation in block unit.
			
 
				+  // Since the outerproduct matrix is symmetric, we do not need to
			
 
				+  // distinguish row or col block. In another word, this is the prefix
			
 
				+  // sum of row_blocks_/col_blocks_.
			
 
				+  std::vector<int> block_offsets_;
			
 
				+
			
 
				+  // If the matrix has an underlying block structure, then it can also
			
 
				+  // carry with it compressed row sparse block information.
			
 
				+  std::vector<int> crsb_rows_;
			
 
				+  std::vector<int> crsb_cols_;
			
 
				+
			
 
				   CERES_DISALLOW_COPY_AND_ASSIGN(CompressedRowSparseMatrix);
			
 
				 };
			
 
				 
			
--- a/internal/ceres/compressed_row_sparse_matrix_test.cc
+++ b/internal/ceres/compressed_row_sparse_matrix_test.cc
@@ -89,6 +89,13 @@ class CompressedRowSparseMatrixTest : public ::testing::Test {
 
				     vector<int>* col_blocks = crsm->mutable_col_blocks();
			
 
				     col_blocks->resize(num_cols);
			
 
				     std::fill(col_blocks->begin(), col_blocks->end(), 1);
			
 
				+
			
 
				+    // With all blocks of size 1, crsb_rows and crsb_cols are equivalent to
			
 
				+    // rows and cols.
			
 
				+    std::copy(crsm->rows(), crsm->rows() + crsm->num_rows() + 1,
			
 
				+              std::back_inserter(*crsm->mutable_crsb_rows()));
			
 
				+    std::copy(crsm->cols(), crsm->cols() + crsm->num_nonzeros(),
			
 
				+              std::back_inserter(*crsm->mutable_crsb_cols()));
			
 
				   }
			
 
				 
			
 
				   int num_rows;
			
@@ -142,6 +149,9 @@ TEST_F(CompressedRowSparseMatrixTest, DeleteRows) {
 
				   // Clear the row and column blocks as these are purely scalar tests.
			
 
				   crsm->mutable_row_blocks()->clear();
			
 
				   crsm->mutable_col_blocks()->clear();
			
 
				+  crsm->mutable_crsb_rows()->clear();
			
 
				+  crsm->mutable_crsb_cols()->clear();
			
 
				+
			
 
				   for (int i = 0; i < num_rows; ++i) {
			
 
				     tsm->Resize(num_rows - i, num_cols);
			
 
				     crsm->DeleteRows(crsm->num_rows() - tsm->num_rows());
			
@@ -153,6 +163,8 @@ TEST_F(CompressedRowSparseMatrixTest, AppendRows) {
 
				   // Clear the row and column blocks as these are purely scalar tests.
			
 
				   crsm->mutable_row_blocks()->clear();
			
 
				   crsm->mutable_col_blocks()->clear();
			
 
				+  crsm->mutable_crsb_rows()->clear();
			
 
				+  crsm->mutable_crsb_cols()->clear();
			
 
				 
			
 
				   for (int i = 0; i < num_rows; ++i) {
			
 
				     TripletSparseMatrix tsm_appendage(*tsm);
			
@@ -182,6 +194,9 @@ TEST_F(CompressedRowSparseMatrixTest, AppendAndDeleteBlockDiagonalMatrix) {
 
				   const vector<int> pre_row_blocks = crsm->row_blocks();
			
 
				   const vector<int> pre_col_blocks = crsm->col_blocks();
			
 
				 
			
 
				+  const vector<int> pre_crsb_rows = crsm->crsb_rows();
			
 
				+  const vector<int> pre_crsb_cols = crsm->crsb_cols();
			
 
				+
			
 
				   scoped_ptr<CompressedRowSparseMatrix> appendage(
			
 
				       CompressedRowSparseMatrix::CreateBlockDiagonalMatrix(
			
 
				           diagonal.get(), row_and_column_blocks));
			
@@ -202,9 +217,23 @@ TEST_F(CompressedRowSparseMatrixTest, AppendAndDeleteBlockDiagonalMatrix) {
 
				   EXPECT_EQ(expected_row_blocks, crsm->row_blocks());
			
 
				   EXPECT_EQ(expected_col_blocks, crsm->col_blocks());
			
 
				 
			
 
				+  EXPECT_EQ(crsm->crsb_cols().size(),
			
 
				+             pre_crsb_cols.size() + row_and_column_blocks.size());
			
 
				+  EXPECT_EQ(crsm->crsb_rows().size(),
			
 
				+             pre_crsb_rows.size() + row_and_column_blocks.size());
			
 
				+  for (int i = 0; i < row_and_column_blocks.size(); ++i) {
			
 
				+    EXPECT_EQ(crsm->crsb_rows()[i + pre_crsb_rows.size()],
			
 
				+              pre_crsb_rows.back() + i + 1);
			
 
				+    EXPECT_EQ(crsm->crsb_cols()[i + pre_crsb_cols.size()], i);
			
 
				+  }
			
 
				+
			
 
				   crsm->DeleteRows(num_diagonal_rows);
			
 
				   EXPECT_EQ(crsm->row_blocks(), pre_row_blocks);
			
 
				   EXPECT_EQ(crsm->col_blocks(), pre_col_blocks);
			
 
				+
			
 
				+  EXPECT_EQ(crsm->crsb_rows(), pre_crsb_rows);
			
 
				+  EXPECT_EQ(crsm->crsb_cols(), pre_crsb_cols);
			
 
				+
			
 
				 }
			
 
				 
			
 
				 TEST_F(CompressedRowSparseMatrixTest, ToDenseMatrix) {
			
@@ -357,6 +386,14 @@ TEST(CompressedRowSparseMatrix, Transpose) {
 
				   matrix.mutable_col_blocks()->push_back(4);
			
 
				   matrix.mutable_col_blocks()->push_back(2);
			
 
				 
			
 
				+  matrix.mutable_crsb_rows()->push_back(0);
			
 
				+  matrix.mutable_crsb_rows()->push_back(2);
			
 
				+  matrix.mutable_crsb_rows()->push_back(4);
			
 
				+  matrix.mutable_crsb_cols()->push_back(0);
			
 
				+  matrix.mutable_crsb_cols()->push_back(1);
			
 
				+  matrix.mutable_crsb_cols()->push_back(0);
			
 
				+  matrix.mutable_crsb_cols()->push_back(1);
			
 
				+
			
 
				   rows[0] = 0;
			
 
				   cols[0] = 1;
			
 
				   cols[1] = 3;
			
@@ -440,10 +477,16 @@ CompressedRowSparseMatrix* CreateRandomCompressedRowSparseMatrix(
 
				   vector<int> cols;
			
 
				   vector<double> values;
			
 
				 
			
 
				+  vector<int> crsb_rows;
			
 
				+  vector<int> crsb_cols;
			
 
				+
			
 
				   while (values.size() == 0) {
			
 
				     int row_block_begin = 0;
			
 
				+    crsb_rows.clear();
			
 
				+    crsb_cols.clear();
			
 
				     for (int r = 0; r < options.num_row_blocks; ++r) {
			
 
				       int col_block_begin = 0;
			
 
				+      crsb_rows.push_back(crsb_cols.size());
			
 
				       for (int c = 0; c < options.num_col_blocks; ++c) {
			
 
				         if (RandDouble() <= options.block_density) {
			
 
				           for (int i = 0; i < row_blocks[r]; ++i) {
			
@@ -453,11 +496,13 @@ CompressedRowSparseMatrix* CreateRandomCompressedRowSparseMatrix(
 
				               values.push_back(RandNormal());
			
 
				             }
			
 
				           }
			
 
				+          crsb_cols.push_back(c);
			
 
				         }
			
 
				         col_block_begin += col_blocks[c];
			
 
				       }
			
 
				       row_block_begin += row_blocks[r];
			
 
				     }
			
 
				+    crsb_rows.push_back(crsb_cols.size());
			
 
				   }
			
 
				 
			
 
				   const int num_rows = std::accumulate(row_blocks.begin(), row_blocks.end(), 0);
			
@@ -472,6 +517,8 @@ CompressedRowSparseMatrix* CreateRandomCompressedRowSparseMatrix(
 
				   CompressedRowSparseMatrix* matrix = new CompressedRowSparseMatrix(tsm);
			
 
				   (*matrix->mutable_row_blocks())  = row_blocks;
			
 
				   (*matrix->mutable_col_blocks())  = col_blocks;
			
 
				+  (*matrix->mutable_crsb_rows())  = crsb_rows;
			
 
				+  (*matrix->mutable_crsb_cols())  = crsb_cols;
			
 
				   return matrix;
			
 
				 }
			
 
				 
			
@@ -534,11 +581,14 @@ TEST(CompressedRowSparseMatrix, ComputeOuterProduct) {
 
				         cs_di* expected_outer_product =
			
 
				             cxsparse.MatrixMatrixMultiply(&cs_matrix_transpose, cs_matrix);
			
 
				 
			
 
				+        // Use compressed row lower triangular matrix for cxsparse.
			
 
				+        const int stype = 1;
			
 
				         vector<int> program;
			
 
				         scoped_ptr<CompressedRowSparseMatrix> outer_product(
			
 
				             CompressedRowSparseMatrix::CreateOuterProductMatrixAndProgram(
			
 
				-                *matrix, &program));
			
 
				+                *matrix, stype, &program));
			
 
				         CompressedRowSparseMatrix::ComputeOuterProduct(*matrix,
			
 
				+                                                       stype,
			
 
				                                                        program,
			
 
				                                                        outer_product.get());
			
 
				 
			
@@ -556,6 +606,7 @@ TEST(CompressedRowSparseMatrix, ComputeOuterProduct) {
 
				         expected_matrix.triangularView<Eigen::StrictlyLower>().setZero();
			
 
				 
			
 
				         ToDenseMatrix(&actual_outer_product, &actual_matrix);
			
 
				+        actual_matrix.triangularView<Eigen::StrictlyLower>().setZero();
			
 
				         const double diff_norm =
			
 
				             (actual_matrix - expected_matrix).norm() / expected_matrix.norm();
			
 
				         ASSERT_NEAR(diff_norm, 0.0, kTolerance)
			
--- a/internal/ceres/sparse_normal_cholesky_solver.cc
+++ b/internal/ceres/sparse_normal_cholesky_solver.cc
@@ -275,14 +275,21 @@ LinearSolver::Summary SparseNormalCholeskySolver::SolveImplUsingEigen(
 
				                                &event_logger);
			
 
				   }
			
 
				 
			
 
				+  // Compute outerproduct to compressed row lower triangular matrix.
			
 
				+  // Eigen SimplicialLDLT default uses lower triangular part of matrix.
			
 
				+  // This can change to upper triangular matrix if specifying
			
 
				+  //    Eigen::SimplicialLDLT< _MatrixType, _UpLo, _Ordering >
			
 
				+  // with _UpLo = Upper.
			
 
				+  const int stype = 1;
			
 
				+
			
 
				   if (outer_product_.get() == NULL) {
			
 
				     outer_product_.reset(
			
 
				         CompressedRowSparseMatrix::CreateOuterProductMatrixAndProgram(
			
 
				-            *A, &pattern_));
			
 
				+            *A, stype, &pattern_));
			
 
				   }
			
 
				 
			
 
				   CompressedRowSparseMatrix::ComputeOuterProduct(
			
 
				-      *A, pattern_, outer_product_.get());
			
 
				+      *A, stype, pattern_, outer_product_.get());
			
 
				 
			
 
				   // Map to an upper triangular column major matrix.
			
 
				   //
			
@@ -362,20 +369,21 @@ LinearSolver::Summary SparseNormalCholeskySolver::SolveImplUsingCXSparse(
 
				   summary.termination_type = LINEAR_SOLVER_SUCCESS;
			
 
				   summary.message = "Success.";
			
 
				 
			
 
				+  // Compute outerproduct to compressed row lower triangular matrix.
			
 
				+  // CXSparse Cholesky factorization uses lower triangular part of the matrix.
			
 
				+  const int stype = 1;
			
 
				+
			
 
				   // Compute the normal equations. J'J delta = J'f and solve them
			
 
				-  // using a sparse Cholesky factorization. Notice that when compared
			
 
				-  // to SuiteSparse we have to explicitly compute the normal equations
			
 
				-  // before they can be factorized. CHOLMOD/SuiteSparse on the other
			
 
				-  // hand can just work off of Jt to compute the Cholesky
			
 
				-  // factorization of the normal equations.
			
 
				+  // using a sparse Cholesky factorization. Notice that we explicitly
			
 
				+  // compute the normal equations before they can be factorized.
			
 
				   if (outer_product_.get() == NULL) {
			
 
				     outer_product_.reset(
			
 
				         CompressedRowSparseMatrix::CreateOuterProductMatrixAndProgram(
			
 
				-            *A, &pattern_));
			
 
				+            *A, stype, &pattern_));
			
 
				   }
			
 
				 
			
 
				   CompressedRowSparseMatrix::ComputeOuterProduct(
			
 
				-      *A, pattern_, outer_product_.get());
			
 
				+      *A, stype, pattern_, outer_product_.get());
			
 
				   cs_di lhs =
			
 
				       cxsparse_.CreateSparseMatrixTransposeView(outer_product_.get());
			
 
				 
			
@@ -431,8 +439,26 @@ LinearSolver::Summary SparseNormalCholeskySolver::SolveImplUsingSuiteSparse(
 
				   summary.num_iterations = 1;
			
 
				   summary.message = "Success.";
			
 
				 
			
 
				+  // Compute outerproduct to compressed row upper triangular matrix.
			
 
				+  // This is the fastest option for the our default natural ordering
			
 
				+  // (see comment in cholmod_factorize.c:205 in SuiteSparse).
			
 
				+  const int stype = -1;
			
 
				+
			
 
				+  // Compute the normal equations. J'J delta = J'f and solve them
			
 
				+  // using a sparse Cholesky factorization. Notice that we explicitly
			
 
				+  // compute the normal equations before they can be factorized.
			
 
				+  if (outer_product_.get() == NULL) {
			
 
				+    outer_product_.reset(
			
 
				+        CompressedRowSparseMatrix::CreateOuterProductMatrixAndProgram(
			
 
				+            *A, stype, &pattern_));
			
 
				+  }
			
 
				+
			
 
				+  CompressedRowSparseMatrix::ComputeOuterProduct(
			
 
				+      *A, stype, pattern_, outer_product_.get());
			
 
				+
			
 
				   const int num_cols = A->num_cols();
			
 
				-  cholmod_sparse lhs = ss_.CreateSparseMatrixTransposeView(A);
			
 
				+  cholmod_sparse lhs =
			
 
				+      ss_.CreateSparseMatrixTransposeView(outer_product_.get(), stype);
			
 
				   event_logger.AddEvent("Setup");
			
 
				 
			
 
				   if (options_.dynamic_sparsity) {
			
@@ -443,7 +469,7 @@ LinearSolver::Summary SparseNormalCholeskySolver::SolveImplUsingSuiteSparse(
 
				     if (options_.use_postordering) {
			
 
				       factor_ = ss_.BlockAnalyzeCholesky(&lhs,
			
 
				                                          A->col_blocks(),
			
 
				-                                         A->row_blocks(),
			
 
				+                                         A->col_blocks(),
			
 
				                                          &summary.message);
			
 
				     } else {
			
 
				       if (options_.dynamic_sparsity) {
			
--- a/internal/ceres/suitesparse.cc
+++ b/internal/ceres/suitesparse.cc
@@ -96,7 +96,7 @@ cholmod_sparse* SuiteSparse::CreateSparseMatrixTranspose(
 
				 }
			
 
				 
			
 
				 cholmod_sparse SuiteSparse::CreateSparseMatrixTransposeView(
			
 
				-    CompressedRowSparseMatrix* A) {
			
 
				+    CompressedRowSparseMatrix* A, const int stype) {
			
 
				   cholmod_sparse m;
			
 
				   m.nrow = A->num_cols();
			
 
				   m.ncol = A->num_rows();
			
@@ -106,7 +106,7 @@ cholmod_sparse SuiteSparse::CreateSparseMatrixTransposeView(
 
				   m.i = reinterpret_cast<void*>(A->mutable_cols());
			
 
				   m.x = reinterpret_cast<void*>(A->mutable_values());
			
 
				   m.z = NULL;
			
 
				-  m.stype = 0;  // Matrix is not symmetric.
			
 
				+  m.stype = stype;
			
 
				   m.itype = CHOLMOD_INT;
			
 
				   m.xtype = CHOLMOD_REAL;
			
 
				   m.dtype = CHOLMOD_DOUBLE;
			
--- a/internal/ceres/suitesparse.h
+++ b/internal/ceres/suitesparse.h
@@ -96,8 +96,11 @@ class SuiteSparse {
 
				 
			
 
				   // Create a cholmod_sparse wrapper around the contents of A. This is
			
 
				   // a shallow object, which refers to the contents of A and does not
			
 
				-  // use the SuiteSparse machinery to allocate memory.
			
 
				-  cholmod_sparse CreateSparseMatrixTransposeView(CompressedRowSparseMatrix* A);
			
 
				+  // use the SuiteSparse machinery to allocate memory. This can
			
 
				+  // create a choldmod_sparse structure with different stype (upper
			
 
				+  // triangular: stype= -1 or lower triangular: stype = 1).
			
 
				+  cholmod_sparse CreateSparseMatrixTransposeView(CompressedRowSparseMatrix* A,
			
 
				+                                                 const int stype);
			
 
				 
			
 
				   // Given a vector x, build a cholmod_dense vector of size out_size
			
 
				   // with the first in_size entries copied from x. If x is NULL, then
			
--- a/internal/ceres/unsymmetric_linear_solver_test.cc
+++ b/internal/ceres/unsymmetric_linear_solver_test.cc
@@ -78,6 +78,15 @@ class UnsymmetricLinearSolverTest : public ::testing::Test {
 
				       for (int i = 0; i < A_->num_cols(); ++i) {
			
 
				         crsm->mutable_col_blocks()->push_back(1);
			
 
				       }
			
 
				+
			
 
				+      // With all blocks of size 1, crsb_rows and crsb_cols are equivalent to
			
 
				+      // rows and cols.
			
 
				+      std::copy(crsm->rows(), crsm->rows() + crsm->num_rows() + 1,
			
 
				+                std::back_inserter(*crsm->mutable_crsb_rows()));
			
 
				+
			
 
				+      std::copy(crsm->cols(), crsm->cols() + crsm->num_nonzeros(),
			
 
				+                std::back_inserter(*crsm->mutable_crsb_cols()));
			
 
				+
			
 
				       transformed_A.reset(crsm);
			
 
				     } else {
			
 
				       LOG(FATAL) << "Unknown linear solver : " << options.type;