| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419 | // Copyright 2018 The Abseil Authors.//// Licensed under the Apache License, Version 2.0 (the "License");// you may not use this file except in compliance with the License.// You may obtain a copy of the License at////      http://www.apache.org/licenses/LICENSE-2.0//// Unless required by applicable law or agreed to in writing, software// distributed under the License is distributed on an "AS IS" BASIS,// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.// See the License for the specific language governing permissions and// limitations under the License.#ifndef ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_#define ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_#include <algorithm>#include <cstdint>#include <iostream>#include <string>#include "absl/strings/ascii.h"#include "absl/strings/internal/charconv_parse.h"#include "absl/strings/string_view.h"namespace absl {namespace strings_internal {// The largest power that 5 that can be raised to, and still fit in a uint32_t.constexpr int kMaxSmallPowerOfFive = 13;// The largest power that 10 that can be raised to, and still fit in a uint32_t.constexpr int kMaxSmallPowerOfTen = 9;extern const uint32_t kFiveToNth[kMaxSmallPowerOfFive + 1];extern const uint32_t kTenToNth[kMaxSmallPowerOfTen + 1];// Large, fixed-width unsigned integer.//// Exact rounding for decimal-to-binary floating point conversion requires very// large integer math, but a design goal of absl::from_chars is to avoid// allocating memory.  The integer precision needed for decimal-to-binary// conversions is large but bounded, so a huge fixed-width integer class// suffices.//// This is an intentionally limited big integer class.  Only needed operations// are implemented.  All storage lives in an array data member, and all// arithmetic is done in-place, to avoid requiring separate storage for operand// and result.//// This is an internal class.  Some methods live in the .cc file, and are// instantiated only for the values of max_words we need.template <int max_words>class BigUnsigned { public:  static_assert(max_words == 4 || max_words == 84,                "unsupported max_words value");  BigUnsigned() : size_(0), words_{} {}  explicit constexpr BigUnsigned(uint64_t v)      : size_((v >> 32) ? 2 : v ? 1 : 0),        words_{static_cast<uint32_t>(v & 0xffffffffu),               static_cast<uint32_t>(v >> 32)} {}  // Constructs a BigUnsigned from the given string_view containing a decimal  // value.  If the input std::string is not a decimal integer, constructs a 0  // instead.  explicit BigUnsigned(absl::string_view sv) : size_(0), words_{} {    // Check for valid input, returning a 0 otherwise.  This is reasonable    // behavior only because this constructor is for unit tests.    if (std::find_if_not(sv.begin(), sv.end(), ascii_isdigit) != sv.end() ||        sv.empty()) {      return;    }    int exponent_adjust =        ReadDigits(sv.data(), sv.data() + sv.size(), Digits10() + 1);    if (exponent_adjust > 0) {      MultiplyByTenToTheNth(exponent_adjust);    }  }  // Loads the mantissa value of a previously-parsed float.  //  // Returns the associated decimal exponent.  The value of the parsed float is  // exactly *this * 10**exponent.  int ReadFloatMantissa(const ParsedFloat& fp, int significant_digits);  // Returns the number of decimal digits of precision this type provides.  All  // numbers with this many decimal digits or fewer are representable by this  // type.  //  // Analagous to std::numeric_limits<BigUnsigned>::digits10.  static constexpr int Digits10() {    // 9975007/1035508 is very slightly less than log10(2**32).    return static_cast<uint64_t>(max_words) * 9975007 / 1035508;  }  // Shifts left by the given number of bits.  void ShiftLeft(int count) {    if (count > 0) {      const int word_shift = count / 32;      if (word_shift >= max_words) {        SetToZero();        return;      }      size_ = (std::min)(size_ + word_shift, max_words);      count %= 32;      if (count == 0) {        std::copy_backward(words_, words_ + size_ - word_shift, words_ + size_);      } else {        for (int i = (std::min)(size_, max_words - 1); i > word_shift; --i) {          words_[i] = (words_[i - word_shift] << count) |                      (words_[i - word_shift - 1] >> (32 - count));        }        words_[word_shift] = words_[0] << count;        // Grow size_ if necessary.        if (size_ < max_words && words_[size_]) {          ++size_;        }      }      std::fill(words_, words_ + word_shift, 0u);    }  }  // Multiplies by v in-place.  void MultiplyBy(uint32_t v) {    if (size_ == 0 || v == 1) {      return;    }    if (v == 0) {      SetToZero();      return;    }    const uint64_t factor = v;    uint64_t window = 0;    for (int i = 0; i < size_; ++i) {      window += factor * words_[i];      words_[i] = window & 0xffffffff;      window >>= 32;    }    // If carry bits remain and there's space for them, grow size_.    if (window && size_ < max_words) {      words_[size_] = window & 0xffffffff;      ++size_;    }  }  void MultiplyBy(uint64_t v) {    uint32_t words[2];    words[0] = static_cast<uint32_t>(v);    words[1] = static_cast<uint32_t>(v >> 32);    if (words[1] == 0) {      MultiplyBy(words[0]);    } else {      MultiplyBy(2, words);    }  }  // Multiplies in place by 5 to the power of n.  n must be non-negative.  void MultiplyByFiveToTheNth(int n) {    while (n >= kMaxSmallPowerOfFive) {      MultiplyBy(kFiveToNth[kMaxSmallPowerOfFive]);      n -= kMaxSmallPowerOfFive;    }    if (n > 0) {      MultiplyBy(kFiveToNth[n]);    }  }  // Multiplies in place by 10 to the power of n.  n must be non-negative.  void MultiplyByTenToTheNth(int n) {    if (n > kMaxSmallPowerOfTen) {      // For large n, raise to a power of 5, then shift left by the same amount.      // (10**n == 5**n * 2**n.)  This requires fewer multiplications overall.      MultiplyByFiveToTheNth(n);      ShiftLeft(n);    } else if (n > 0) {      // We can do this more quickly for very small N by using a single      // multiplication.      MultiplyBy(kTenToNth[n]);    }  }  // Returns the value of 5**n, for non-negative n.  This implementation uses  // a lookup table, and is faster then seeding a BigUnsigned with 1 and calling  // MultiplyByFiveToTheNth().  static BigUnsigned FiveToTheNth(int n);  // Multiplies by another BigUnsigned, in-place.  template <int M>  void MultiplyBy(const BigUnsigned<M>& other) {    MultiplyBy(other.size(), other.words());  }  void SetToZero() {    std::fill(words_, words_ + size_, 0u);    size_ = 0;  }  // Returns the value of the nth word of this BigUnsigned.  This is  // range-checked, and returns 0 on out-of-bounds accesses.  uint32_t GetWord(int index) const {    if (index < 0 || index >= size_) {      return 0;    }    return words_[index];  }  // Returns this integer as a decimal std::string.  This is not used in the decimal-  // to-binary conversion; it is intended to aid in testing.  std::string ToString() const;  int size() const { return size_; }  const uint32_t* words() const { return words_; } private:  // Reads the number between [begin, end), possibly containing a decimal point,  // into this BigUnsigned.  //  // Callers are required to ensure [begin, end) contains a valid number, with  // one or more decimal digits and at most one decimal point.  This routine  // will behave unpredictably if these preconditions are not met.  //  // Only the first `significant_digits` digits are read.  Digits beyond this  // limit are "sticky": If the final significant digit is 0 or 5, and if any  // dropped digit is nonzero, then that final significant digit is adjusted up  // to 1 or 6.  This adjustment allows for precise rounding.  //  // Returns `exponent_adjustment`, a power-of-ten exponent adjustment to  // account for the decimal point and for dropped significant digits.  After  // this function returns,  //   actual_value_of_parsed_string ~= *this * 10**exponent_adjustment.  int ReadDigits(const char* begin, const char* end, int significant_digits);  // Performs a step of big integer multiplication.  This computes the full  // (64-bit-wide) values that should be added at the given index (step), and  // adds to that location in-place.  //  // Because our math all occurs in place, we must multiply starting from the  // highest word working downward.  (This is a bit more expensive due to the  // extra carries involved.)  //  // This must be called in steps, for each word to be calculated, starting from  // the high end and working down to 0.  The first value of `step` should be  //   `std::min(original_size + other.size_ - 2, max_words - 1)`.  // The reason for this expression is that multiplying the i'th word from one  // multiplicand and the j'th word of another multiplicand creates a  // two-word-wide value to be stored at the (i+j)'th element.  The highest  // word indices we will access are `original_size - 1` from this object, and  // `other.size_ - 1` from our operand.  Therefore,  // `original_size + other.size_ - 2` is the first step we should calculate,  // but limited on an upper bound by max_words.  // Working from high-to-low ensures that we do not overwrite the portions of  // the initial value of *this which are still needed for later steps.  //  // Once called with step == 0, *this contains the result of the  // multiplication.  //  // `original_size` is the size_ of *this before the first call to  // MultiplyStep().  `other_words` and `other_size` are the contents of our  // operand.  `step` is the step to perform, as described above.  void MultiplyStep(int original_size, const uint32_t* other_words,                    int other_size, int step);  void MultiplyBy(int other_size, const uint32_t* other_words) {    const int original_size = size_;    const int first_step =        (std::min)(original_size + other_size - 2, max_words - 1);    for (int step = first_step; step >= 0; --step) {      MultiplyStep(original_size, other_words, other_size, step);    }  }  // Adds a 32-bit value to the index'th word, with carry.  void AddWithCarry(int index, uint32_t value) {    if (value) {      while (index < max_words && value > 0) {        words_[index] += value;        // carry if we overflowed in this word:        if (value > words_[index]) {          value = 1;          ++index;        } else {          value = 0;        }      }      size_ = (std::min)(max_words, (std::max)(index + 1, size_));    }  }  void AddWithCarry(int index, uint64_t value) {    if (value && index < max_words) {      uint32_t high = value >> 32;      uint32_t low = value & 0xffffffff;      words_[index] += low;      if (words_[index] < low) {        ++high;        if (high == 0) {          // Carry from the low word caused our high word to overflow.          // Short circuit here to do the right thing.          AddWithCarry(index + 2, static_cast<uint32_t>(1));          return;        }      }      if (high > 0) {        AddWithCarry(index + 1, high);      } else {        // Normally 32-bit AddWithCarry() sets size_, but since we don't call        // it when `high` is 0, do it ourselves here.        size_ = (std::min)(max_words, (std::max)(index + 1, size_));      }    }  }  // Divide this in place by a constant divisor.  Returns the remainder of the  // division.  template <uint32_t divisor>  uint32_t DivMod() {    uint64_t accumulator = 0;    for (int i = size_ - 1; i >= 0; --i) {      accumulator <<= 32;      accumulator += words_[i];      // accumulator / divisor will never overflow an int32_t in this loop      words_[i] = static_cast<uint32_t>(accumulator / divisor);      accumulator = accumulator % divisor;    }    while (size_ > 0 && words_[size_ - 1] == 0) {      --size_;    }    return static_cast<uint32_t>(accumulator);  }  // The number of elements in words_ that may carry significant values.  // All elements beyond this point are 0.  //  // When size_ is 0, this BigUnsigned stores the value 0.  // When size_ is nonzero, is *not* guaranteed that words_[size_ - 1] is  // nonzero.  This can occur due to overflow truncation.  // In particular, x.size_ != y.size_ does *not* imply x != y.  int size_;  uint32_t words_[max_words];};// Compares two big integer instances.//// Returns -1 if lhs < rhs, 0 if lhs == rhs, and 1 if lhs > rhs.template <int N, int M>int Compare(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {  int limit = (std::max)(lhs.size(), rhs.size());  for (int i = limit - 1; i >= 0; --i) {    const uint32_t lhs_word = lhs.GetWord(i);    const uint32_t rhs_word = rhs.GetWord(i);    if (lhs_word < rhs_word) {      return -1;    } else if (lhs_word > rhs_word) {      return 1;    }  }  return 0;}template <int N, int M>bool operator==(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {  int limit = (std::max)(lhs.size(), rhs.size());  for (int i = 0; i < limit; ++i) {    if (lhs.GetWord(i) != rhs.GetWord(i)) {      return false;    }  }  return true;}template <int N, int M>bool operator!=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {  return !(lhs == rhs);}template <int N, int M>bool operator<(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {  return Compare(lhs, rhs) == -1;}template <int N, int M>bool operator>(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {  return rhs < lhs;}template <int N, int M>bool operator<=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {  return !(rhs < lhs);}template <int N, int M>bool operator>=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {  return !(lhs < rhs);}// Output operator for BigUnsigned, for testing purposes only.template <int N>std::ostream& operator<<(std::ostream& os, const BigUnsigned<N>& num) {  return os << num.ToString();}// Explicit instantiation declarations for the sizes of BigUnsigned that we// are using.//// For now, the choices of 4 and 84 are arbitrary; 4 is a small value that is// still bigger than an int128, and 84 is a large value we will want to use// in the from_chars implementation.//// Comments justifying the use of 84 belong in the from_chars implementation,// and will be added in a follow-up CL.extern template class BigUnsigned<4>;extern template class BigUnsigned<84>;}  // namespace strings_internal}  // namespace absl#endif  // ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_
 |