| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335 | #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_#define ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_#include <limits.h>#include <stddef.h>#include <stdlib.h>#include <cassert>#include <cstdint>#include <initializer_list>#include <iosfwd>#include <iterator>#include <memory>#include <string>#include <vector>#include "absl/strings/internal/str_format/checker.h"#include "absl/strings/internal/str_format/extension.h"namespace absl {ABSL_NAMESPACE_BEGINnamespace str_format_internal {enum class LengthMod : std::uint8_t { h, hh, l, ll, L, j, z, t, q, none };std::string LengthModToString(LengthMod v);// The analyzed properties of a single specified conversion.struct UnboundConversion {  UnboundConversion()      : flags() /* This is required to zero all the fields of flags. */ {    flags.basic = true;  }  class InputValue {   public:    void set_value(int value) {      assert(value >= 0);      value_ = value;    }    int value() const { return value_; }    // Marks the value as "from arg". aka the '*' format.    // Requires `value >= 1`.    // When set, is_from_arg() return true and get_from_arg() returns the    // original value.    // `value()`'s return value is unspecfied in this state.    void set_from_arg(int value) {      assert(value > 0);      value_ = -value - 1;    }    bool is_from_arg() const { return value_ < -1; }    int get_from_arg() const {      assert(is_from_arg());      return -value_ - 1;    }   private:    int value_ = -1;  };  // No need to initialize. It will always be set in the parser.  int arg_position;  InputValue width;  InputValue precision;  Flags flags;  LengthMod length_mod = LengthMod::none;  FormatConversionChar conv = FormatConversionCharInternal::kNone;};// Consume conversion spec prefix (not including '%') of [p, end) if valid.// Examples of valid specs would be e.g.: "s", "d", "-12.6f".// If valid, it returns the first character following the conversion spec,// and the spec part is broken down and returned in 'conv'.// If invalid, returns nullptr.const char* ConsumeUnboundConversion(const char* p, const char* end,                                     UnboundConversion* conv, int* next_arg);// Helper tag class for the table below.// It allows fast `char -> ConversionChar/LengthMod` checking and// conversions.class ConvTag { public:  constexpr ConvTag(ConversionChar conversion_char)  // NOLINT      : tag_(static_cast<int8_t>(conversion_char)) {}  // We invert the length modifiers to make them negative so that we can easily  // test for them.  constexpr ConvTag(LengthMod length_mod)  // NOLINT      : tag_(~static_cast<std::int8_t>(length_mod)) {}  // Everything else is -128, which is negative to make is_conv() simpler.  constexpr ConvTag() : tag_(-128) {}  bool is_conv() const { return tag_ >= 0; }  bool is_length() const { return tag_ < 0 && tag_ != -128; }  ConversionChar as_conv() const {    assert(is_conv());    return static_cast<ConversionChar>(tag_);  }  LengthMod as_length() const {    assert(is_length());    return static_cast<LengthMod>(~tag_);  } private:  std::int8_t tag_;};extern const ConvTag kTags[256];// Keep a single table for all the conversion chars and length modifiers.inline ConvTag GetTagForChar(char c) {  return kTags[static_cast<unsigned char>(c)];}// Parse the format string provided in 'src' and pass the identified items into// 'consumer'.// Text runs will be passed by calling//   Consumer::Append(string_view);// ConversionItems will be passed by calling//   Consumer::ConvertOne(UnboundConversion, string_view);// In the case of ConvertOne, the string_view that is passed is the// portion of the format string corresponding to the conversion, not including// the leading %. On success, it returns true. On failure, it stops and returns// false.template <typename Consumer>bool ParseFormatString(string_view src, Consumer consumer) {  int next_arg = 0;  const char* p = src.data();  const char* const end = p + src.size();  while (p != end) {    const char* percent = static_cast<const char*>(memchr(p, '%', end - p));    if (!percent) {      // We found the last substring.      return consumer.Append(string_view(p, end - p));    }    // We found a percent, so push the text run then process the percent.    if (ABSL_PREDICT_FALSE(!consumer.Append(string_view(p, percent - p)))) {      return false;    }    if (ABSL_PREDICT_FALSE(percent + 1 >= end)) return false;    auto tag = GetTagForChar(percent[1]);    if (tag.is_conv()) {      if (ABSL_PREDICT_FALSE(next_arg < 0)) {        // This indicates an error in the format string.        // The only way to get `next_arg < 0` here is to have a positional        // argument first which sets next_arg to -1 and then a non-positional        // argument.        return false;      }      p = percent + 2;      // Keep this case separate from the one below.      // ConvertOne is more efficient when the compiler can see that the `basic`      // flag is set.      UnboundConversion conv;      conv.conv = tag.as_conv();      conv.arg_position = ++next_arg;      if (ABSL_PREDICT_FALSE(              !consumer.ConvertOne(conv, string_view(percent + 1, 1)))) {        return false;      }    } else if (percent[1] != '%') {      UnboundConversion conv;      p = ConsumeUnboundConversion(percent + 1, end, &conv, &next_arg);      if (ABSL_PREDICT_FALSE(p == nullptr)) return false;      if (ABSL_PREDICT_FALSE(!consumer.ConvertOne(          conv, string_view(percent + 1, p - (percent + 1))))) {        return false;      }    } else {      if (ABSL_PREDICT_FALSE(!consumer.Append("%"))) return false;      p = percent + 2;      continue;    }  }  return true;}// Always returns true, or fails to compile in a constexpr context if s does not// point to a constexpr char array.constexpr bool EnsureConstexpr(string_view s) {  return s.empty() || s[0] == s[0];}class ParsedFormatBase { public:  explicit ParsedFormatBase(      string_view format, bool allow_ignored,      std::initializer_list<FormatConversionCharSet> convs);  ParsedFormatBase(const ParsedFormatBase& other) { *this = other; }  ParsedFormatBase(ParsedFormatBase&& other) { *this = std::move(other); }  ParsedFormatBase& operator=(const ParsedFormatBase& other) {    if (this == &other) return *this;    has_error_ = other.has_error_;    items_ = other.items_;    size_t text_size = items_.empty() ? 0 : items_.back().text_end;    data_.reset(new char[text_size]);    memcpy(data_.get(), other.data_.get(), text_size);    return *this;  }  ParsedFormatBase& operator=(ParsedFormatBase&& other) {    if (this == &other) return *this;    has_error_ = other.has_error_;    data_ = std::move(other.data_);    items_ = std::move(other.items_);    // Reset the vector to make sure the invariants hold.    other.items_.clear();    return *this;  }  template <typename Consumer>  bool ProcessFormat(Consumer consumer) const {    const char* const base = data_.get();    string_view text(base, 0);    for (const auto& item : items_) {      const char* const end = text.data() + text.size();      text = string_view(end, (base + item.text_end) - end);      if (item.is_conversion) {        if (!consumer.ConvertOne(item.conv, text)) return false;      } else {        if (!consumer.Append(text)) return false;      }    }    return !has_error_;  }  bool has_error() const { return has_error_; } private:  // Returns whether the conversions match and if !allow_ignored it verifies  // that all conversions are used by the format.  bool MatchesConversions(      bool allow_ignored,      std::initializer_list<FormatConversionCharSet> convs) const;  struct ParsedFormatConsumer;  struct ConversionItem {    bool is_conversion;    // Points to the past-the-end location of this element in the data_ array.    size_t text_end;    UnboundConversion conv;  };  bool has_error_;  std::unique_ptr<char[]> data_;  std::vector<ConversionItem> items_;};// A value type representing a preparsed format.  These can be created, copied// around, and reused to speed up formatting loops.// The user must specify through the template arguments the conversion// characters used in the format. This will be checked at compile time.//// This class uses Conv enum values to specify each argument.// This allows for more flexibility as you can specify multiple possible// conversion characters for each argument.// ParsedFormat<char...> is a simplified alias for when the user only// needs to specify a single conversion character for each argument.//// Example://   // Extended format supports multiple characters per argument://   using MyFormat = ExtendedParsedFormat<Conv::d | Conv::x>;//   MyFormat GetFormat(bool use_hex) {//     if (use_hex) return MyFormat("foo %x bar");//     return MyFormat("foo %d bar");//   }//   // 'format' can be used with any value that supports 'd' and 'x',//   // like `int`.//   auto format = GetFormat(use_hex);//   value = StringF(format, i);//// This class also supports runtime format checking with the ::New() and// ::NewAllowIgnored() factory functions.// This is the only API that allows the user to pass a runtime specified format// string. These factory functions will return NULL if the format does not match// the conversions requested by the user.template <str_format_internal::Conv... C>class ExtendedParsedFormat : public str_format_internal::ParsedFormatBase { public:  explicit ExtendedParsedFormat(string_view format)#ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER      __attribute__((          enable_if(str_format_internal::EnsureConstexpr(format),                    "Format string is not constexpr."),          enable_if(str_format_internal::ValidFormatImpl<C...>(format),                    "Format specified does not match the template arguments.")))#endif  // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER      : ExtendedParsedFormat(format, false) {  }  // ExtendedParsedFormat factory function.  // The user still has to specify the conversion characters, but they will not  // be checked at compile time. Instead, it will be checked at runtime.  // This delays the checking to runtime, but allows the user to pass  // dynamically sourced formats.  // It returns NULL if the format does not match the conversion characters.  // The user is responsible for checking the return value before using it.  //  // The 'New' variant will check that all the specified arguments are being  // consumed by the format and return NULL if any argument is being ignored.  // The 'NewAllowIgnored' variant will not verify this and will allow formats  // that ignore arguments.  static std::unique_ptr<ExtendedParsedFormat> New(string_view format) {    return New(format, false);  }  static std::unique_ptr<ExtendedParsedFormat> NewAllowIgnored(      string_view format) {    return New(format, true);  } private:  static std::unique_ptr<ExtendedParsedFormat> New(string_view format,                                                   bool allow_ignored) {    std::unique_ptr<ExtendedParsedFormat> conv(        new ExtendedParsedFormat(format, allow_ignored));    if (conv->has_error()) return nullptr;    return conv;  }  ExtendedParsedFormat(string_view s, bool allow_ignored)      : ParsedFormatBase(s, allow_ignored, {C...}) {}};}  // namespace str_format_internalABSL_NAMESPACE_END}  // namespace absl#endif  // ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
 |