| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511 | //// Copyright 2017 The Abseil Authors.//// Licensed under the Apache License, Version 2.0 (the "License");// you may not use this file except in compliance with the License.// You may obtain a copy of the License at////      http://www.apache.org/licenses/LICENSE-2.0//// Unless required by applicable law or agreed to in writing, software// distributed under the License is distributed on an "AS IS" BASIS,// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.// See the License for the specific language governing permissions and// limitations under the License.//// -----------------------------------------------------------------------------// File: str_split.h// -----------------------------------------------------------------------------//// This file contains functions for splitting strings. It defines the main// `StrSplit()` function, several delimiters for determining the boundaries on// which to split the string, and predicates for filtering delimited results.// `StrSplit()` adapts the returned collection to the type specified by the// caller.//// Example:////   // Splits the given string on commas. Returns the results in a//   // vector of strings.//   std::vector<std::string> v = absl::StrSplit("a,b,c", ',');//   // Can also use ","//   // v[0] == "a", v[1] == "b", v[2] == "c"//// See StrSplit() below for more information.#ifndef ABSL_STRINGS_STR_SPLIT_H_#define ABSL_STRINGS_STR_SPLIT_H_#include <algorithm>#include <cstddef>#include <map>#include <set>#include <string>#include <utility>#include <vector>#include "absl/base/internal/raw_logging.h"#include "absl/strings/internal/str_split_internal.h"#include "absl/strings/string_view.h"#include "absl/strings/strip.h"namespace absl {//------------------------------------------------------------------------------// Delimiters//------------------------------------------------------------------------------//// `StrSplit()` uses delimiters to define the boundaries between elements in the// provided input. Several `Delimiter` types are defined below. If a string// (`const char*`, `std::string`, or `absl::string_view`) is passed in place of// an explicit `Delimiter` object, `StrSplit()` treats it the same way as if it// were passed a `ByString` delimiter.//// A `Delimiter` is an object with a `Find()` function that knows how to find// the first occurrence of itself in a given `absl::string_view`.//// The following `Delimiter` types are available for use within `StrSplit()`:////   - `ByString` (default for string arguments)//   - `ByChar` (default for a char argument)//   - `ByAnyChar`//   - `ByLength`//   - `MaxSplits`////// A Delimiter's Find() member function will be passed the input text that is to// be split and the position to begin searching for the next delimiter in the// input text. The returned absl::string_view should refer to the next// occurrence (after pos) of the represented delimiter; this returned// absl::string_view represents the next location where the input string should// be broken. The returned absl::string_view may be zero-length if the Delimiter// does not represent a part of the string (e.g., a fixed-length delimiter). If// no delimiter is found in the given text, a zero-length absl::string_view// referring to text.end() should be returned (e.g.,// absl::string_view(text.end(), 0)). It is important that the returned// absl::string_view always be within the bounds of input text given as an// argument--it must not refer to a string that is physically located outside of// the given string.//// The following example is a simple Delimiter object that is created with a// single char and will look for that char in the text passed to the Find()// function:////   struct SimpleDelimiter {//     const char c_;//     explicit SimpleDelimiter(char c) : c_(c) {}//     absl::string_view Find(absl::string_view text, size_t pos) {//       auto found = text.find(c_, pos);//       if (found == absl::string_view::npos)//         return absl::string_view(text.end(), 0);////       return absl::string_view(text, found, 1);//     }//   };// ByString//// A sub-string delimiter. If `StrSplit()` is passed a string in place of a// `Delimiter` object, the string will be implicitly converted into a// `ByString` delimiter.//// Example:////   // Because a string literal is converted to an `absl::ByString`,//   // the following two splits are equivalent.////   std::vector<std::string> v1 = absl::StrSplit("a, b, c", ", ");////   using absl::ByString;//   std::vector<std::string> v2 = absl::StrSplit("a, b, c",//                                                ByString(", "));//   // v[0] == "a", v[1] == "b", v[2] == "c"class ByString { public:  explicit ByString(absl::string_view sp);  absl::string_view Find(absl::string_view text, size_t pos) const; private:  const std::string delimiter_;};// ByChar//// A single character delimiter. `ByChar` is functionally equivalent to a// 1-char string within a `ByString` delimiter, but slightly more// efficient.//// Example:////   // Because a char literal is converted to a absl::ByChar,//   // the following two splits are equivalent.//   std::vector<std::string> v1 = absl::StrSplit("a,b,c", ',');//   using absl::ByChar;//   std::vector<std::string> v2 = absl::StrSplit("a,b,c", ByChar(','));//   // v[0] == "a", v[1] == "b", v[2] == "c"//// `ByChar` is also the default delimiter if a single character is given// as the delimiter to `StrSplit()`. For example, the following calls are// equivalent:////   std::vector<std::string> v = absl::StrSplit("a-b", '-');////   using absl::ByChar;//   std::vector<std::string> v = absl::StrSplit("a-b", ByChar('-'));//class ByChar { public:  explicit ByChar(char c) : c_(c) {}  absl::string_view Find(absl::string_view text, size_t pos) const; private:  char c_;};// ByAnyChar//// A delimiter that will match any of the given byte-sized characters within// its provided string.//// Note: this delimiter works with single-byte string data, but does not work// with variable-width encodings, such as UTF-8.//// Example:////   using absl::ByAnyChar;//   std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",="));//   // v[0] == "a", v[1] == "b", v[2] == "c"//// If `ByAnyChar` is given the empty string, it behaves exactly like// `ByString` and matches each individual character in the input string.//class ByAnyChar { public:  explicit ByAnyChar(absl::string_view sp);  absl::string_view Find(absl::string_view text, size_t pos) const; private:  const std::string delimiters_;};// ByLength//// A delimiter for splitting into equal-length strings. The length argument to// the constructor must be greater than 0.//// Note: this delimiter works with single-byte string data, but does not work// with variable-width encodings, such as UTF-8.//// Example:////   using absl::ByLength;//   std::vector<std::string> v = absl::StrSplit("123456789", ByLength(3));//   // v[0] == "123", v[1] == "456", v[2] == "789"//// Note that the string does not have to be a multiple of the fixed split// length. In such a case, the last substring will be shorter.////   using absl::ByLength;//   std::vector<std::string> v = absl::StrSplit("12345", ByLength(2));////   // v[0] == "12", v[1] == "34", v[2] == "5"class ByLength { public:  explicit ByLength(ptrdiff_t length);  absl::string_view Find(absl::string_view text, size_t pos) const; private:  const ptrdiff_t length_;};namespace strings_internal {// A traits-like metafunction for selecting the default Delimiter object type// for a particular Delimiter type. The base case simply exposes type Delimiter// itself as the delimiter's Type. However, there are specializations for// string-like objects that map them to the ByString delimiter object.// This allows functions like absl::StrSplit() and absl::MaxSplits() to accept// string-like objects (e.g., ',') as delimiter arguments but they will be// treated as if a ByString delimiter was given.template <typename Delimiter>struct SelectDelimiter {  using type = Delimiter;};template <>struct SelectDelimiter<char> {  using type = ByChar;};template <>struct SelectDelimiter<char*> {  using type = ByString;};template <>struct SelectDelimiter<const char*> {  using type = ByString;};template <>struct SelectDelimiter<absl::string_view> {  using type = ByString;};template <>struct SelectDelimiter<std::string> {  using type = ByString;};// Wraps another delimiter and sets a max number of matches for that delimiter.template <typename Delimiter>class MaxSplitsImpl { public:  MaxSplitsImpl(Delimiter delimiter, int limit)      : delimiter_(delimiter), limit_(limit), count_(0) {}  absl::string_view Find(absl::string_view text, size_t pos) {    if (count_++ == limit_) {      return absl::string_view(text.end(), 0);  // No more matches.    }    return delimiter_.Find(text, pos);  } private:  Delimiter delimiter_;  const int limit_;  int count_;};}  // namespace strings_internal// MaxSplits()//// A delimiter that limits the number of matches which can occur to the passed// `limit`. The last element in the returned collection will contain all// remaining unsplit pieces, which may contain instances of the delimiter.// The collection will contain at most `limit` + 1 elements.// Example:////   using absl::MaxSplits;//   std::vector<std::string> v = absl::StrSplit("a,b,c", MaxSplits(',', 1));////   // v[0] == "a", v[1] == "b,c"template <typename Delimiter>inline strings_internal::MaxSplitsImpl<    typename strings_internal::SelectDelimiter<Delimiter>::type>MaxSplits(Delimiter delimiter, int limit) {  typedef      typename strings_internal::SelectDelimiter<Delimiter>::type DelimiterType;  return strings_internal::MaxSplitsImpl<DelimiterType>(      DelimiterType(delimiter), limit);}//------------------------------------------------------------------------------// Predicates//------------------------------------------------------------------------------//// Predicates filter the results of a `StrSplit()` by determining whether or not// a resultant element is included in the result set. A predicate may be passed// as an optional third argument to the `StrSplit()` function.//// Predicates are unary functions (or functors) that take a single// `absl::string_view` argument and return a bool indicating whether the// argument should be included (`true`) or excluded (`false`).//// Predicates are useful when filtering out empty substrings. By default, empty// substrings may be returned by `StrSplit()`, which is similar to the way split// functions work in other programming languages.// AllowEmpty()//// Always returns `true`, indicating that all strings--including empty// strings--should be included in the split output. This predicate is not// strictly needed because this is the default behavior of `StrSplit()`;// however, it might be useful at some call sites to make the intent explicit.//// Example:////  std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', AllowEmpty());////  // v[0] == " a ", v[1] == " ", v[2] == "", v[3] = "b", v[4] == ""struct AllowEmpty {  bool operator()(absl::string_view) const { return true; }};// SkipEmpty()//// Returns `false` if the given `absl::string_view` is empty, indicating that// `StrSplit()` should omit the empty string.//// Example:////   std::vector<std::string> v = absl::StrSplit(",a,,b,", ',', SkipEmpty());////   // v[0] == "a", v[1] == "b"//// Note: `SkipEmpty()` does not consider a string containing only whitespace// to be empty. To skip such whitespace as well, use the `SkipWhitespace()`// predicate.struct SkipEmpty {  bool operator()(absl::string_view sp) const { return !sp.empty(); }};// SkipWhitespace()//// Returns `false` if the given `absl::string_view` is empty *or* contains only// whitespace, indicating that `StrSplit()` should omit the string.//// Example:////   std::vector<std::string> v = absl::StrSplit(" a , ,,b,",//                                               ',', SkipWhitespace());//   // v[0] == " a ", v[1] == "b"////   // SkipEmpty() would return whitespace elements//   std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', SkipEmpty());//   // v[0] == " a ", v[1] == " ", v[2] == "b"struct SkipWhitespace {  bool operator()(absl::string_view sp) const {    sp = absl::StripAsciiWhitespace(sp);    return !sp.empty();  }};//------------------------------------------------------------------------------//                                  StrSplit()//------------------------------------------------------------------------------// StrSplit()//// Splits a given string based on the provided `Delimiter` object, returning the// elements within the type specified by the caller. Optionally, you may pass a// `Predicate` to `StrSplit()` indicating whether to include or exclude the// resulting element within the final result set. (See the overviews for// Delimiters and Predicates above.)//// Example:////   std::vector<std::string> v = absl::StrSplit("a,b,c,d", ',');//   // v[0] == "a", v[1] == "b", v[2] == "c", v[3] == "d"//// You can also provide an explicit `Delimiter` object://// Example:////   using absl::ByAnyChar;//   std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",="));//   // v[0] == "a", v[1] == "b", v[2] == "c"//// See above for more information on delimiters.//// By default, empty strings are included in the result set. You can optionally// include a third `Predicate` argument to apply a test for whether the// resultant element should be included in the result set://// Example:////   std::vector<std::string> v = absl::StrSplit(" a , ,,b,",//                                               ',', SkipWhitespace());//   // v[0] == " a ", v[1] == "b"//// See above for more information on predicates.////------------------------------------------------------------------------------// StrSplit() Return Types//------------------------------------------------------------------------------//// The `StrSplit()` function adapts the returned collection to the collection// specified by the caller (e.g. `std::vector` above). The returned collections// may contain `string`, `absl::string_view` (in which case the original string// being split must ensure that it outlives the collection), or any object that// can be explicitly created from an `absl::string_view`. This behavior works// for://// 1) All standard STL containers including `std::vector`, `std::list`,//    `std::deque`, `std::set`,`std::multiset`, 'std::map`, and `std::multimap`// 2) `std::pair` (which is not actually a container). See below.//// Example:////   // The results are returned as `absl::string_view` objects. Note that we//   // have to ensure that the input string outlives any results.//   std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');////   // Stores results in a std::set<std::string>, which also performs//   // de-duplication and orders the elements in ascending order.//   std::set<std::string> a = absl::StrSplit("b,a,c,a,b", ',');//   // v[0] == "a", v[1] == "b", v[2] = "c"////   // `StrSplit()` can be used within a range-based for loop, in which case//   // each element will be of type `absl::string_view`.//   std::vector<std::string> v;//   for (const auto sv : absl::StrSplit("a,b,c", ',')) {//     if (sv != "b") v.emplace_back(sv);//   }//   // v[0] == "a", v[1] == "c"////   // Stores results in a map. The map implementation assumes that the input//   // is provided as a series of key/value pairs. For example, the 0th element//   // resulting from the split will be stored as a key to the 1st element. If//   // an odd number of elements are resolved, the last element is paired with//   // a default-constructed value (e.g., empty string).//   std::map<std::string, std::string> m = absl::StrSplit("a,b,c", ',');//   // m["a"] == "b", m["c"] == ""     // last component value equals ""//// Splitting to `std::pair` is an interesting case because it can hold only two// elements and is not a collection type. When splitting to a `std::pair` the// first two split strings become the `std::pair` `.first` and `.second`// members, respectively. The remaining split substrings are discarded. If there// are less than two split substrings, the empty string is used for the// corresponding// `std::pair` member.//// Example:////   // Stores first two split strings as the members in a std::pair.//   std::pair<string, string> p = absl::StrSplit("a,b,c", ',');//   // p.first == "a", p.second == "b"       // "c" is omitted.//// The `StrSplit()` function can be used multiple times to perform more// complicated splitting logic, such as intelligently parsing key-value pairs.//// Example:////   // The input string "a=b=c,d=e,f=,g" becomes//   // { "a" => "b=c", "d" => "e", "f" => "", "g" => "" }//   std::map<string, string> m;//   for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) {//     m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1)));//   }//   EXPECT_EQ("b=c", m.find("a")->second);//   EXPECT_EQ("e", m.find("d")->second);//   EXPECT_EQ("", m.find("f")->second);//   EXPECT_EQ("", m.find("g")->second);//// WARNING: Due to a legacy bug that is maintained for backward compatibility,// splitting the following empty string_views produces different results:////   absl::StrSplit(absl::string_view(""), '-');  // {""}//   absl::StrSplit(absl::string_view(), '-');    // {}, but should be {""}//// Try not to depend on this distinction because the bug may one day be fixed.template <typename Delimiter>strings_internal::Splitter<    typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty>StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d) {  using DelimiterType =      typename strings_internal::SelectDelimiter<Delimiter>::type;  return strings_internal::Splitter<DelimiterType, AllowEmpty>(      std::move(text), DelimiterType(d), AllowEmpty());}template <typename Delimiter, typename Predicate>strings_internal::Splitter<    typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate>StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d,         Predicate p) {  using DelimiterType =      typename strings_internal::SelectDelimiter<Delimiter>::type;  return strings_internal::Splitter<DelimiterType, Predicate>(      std::move(text), DelimiterType(d), std::move(p));}}  // namespace absl#endif  // ABSL_STRINGS_STR_SPLIT_H_
 |