| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533 | // Copyright 2021 The Abseil Authors.//// Licensed under the Apache License, Version 2.0 (the "License");// you may not use this file except in compliance with the License.// You may obtain a copy of the License at////      https://www.apache.org/licenses/LICENSE-2.0//// Unless required by applicable law or agreed to in writing, software// distributed under the License is distributed on an "AS IS" BASIS,// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.// See the License for the specific language governing permissions and// limitations under the License.#ifndef ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_#define ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_#include <atomic>#include <cassert>#include <cstddef>#include <cstdint>#include <type_traits>#include "absl/base/config.h"#include "absl/base/internal/invoke.h"#include "absl/base/optimization.h"#include "absl/container/internal/compressed_tuple.h"#include "absl/meta/type_traits.h"#include "absl/strings/string_view.h"namespace absl {ABSL_NAMESPACE_BEGINnamespace cord_internal {// Default feature enable states for cord ring buffersenum CordFeatureDefaults {  kCordEnableRingBufferDefault = false,  kCordShallowSubcordsDefault = false};extern std::atomic<bool> cord_ring_buffer_enabled;extern std::atomic<bool> shallow_subcords_enabled;inline void enable_cord_ring_buffer(bool enable) {  cord_ring_buffer_enabled.store(enable, std::memory_order_relaxed);}inline void enable_shallow_subcords(bool enable) {  shallow_subcords_enabled.store(enable, std::memory_order_relaxed);}enum Constants {  // The inlined size to use with absl::InlinedVector.  //  // Note: The InlinedVectors in this file (and in cord.h) do not need to use  // the same value for their inlined size. The fact that they do is historical.  // It may be desirable for each to use a different inlined size optimized for  // that InlinedVector's usage.  //  // TODO(jgm): Benchmark to see if there's a more optimal value than 47 for  // the inlined vector size (47 exists for backward compatibility).  kInlinedVectorSize = 47,  // Prefer copying blocks of at most this size, otherwise reference count.  kMaxBytesToCopy = 511};// Wraps std::atomic for reference counting.class Refcount { public:  constexpr Refcount() : count_{kRefIncrement} {}  struct Immortal {};  explicit constexpr Refcount(Immortal) : count_(kImmortalTag) {}  // Increments the reference count. Imposes no memory ordering.  inline void Increment() {    count_.fetch_add(kRefIncrement, std::memory_order_relaxed);  }  // Asserts that the current refcount is greater than 0. If the refcount is  // greater than 1, decrements the reference count.  //  // Returns false if there are no references outstanding; true otherwise.  // Inserts barriers to ensure that state written before this method returns  // false will be visible to a thread that just observed this method returning  // false.  inline bool Decrement() {    int32_t refcount = count_.load(std::memory_order_acquire);    assert(refcount > 0 || refcount & kImmortalTag);    return refcount != kRefIncrement &&           count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) !=               kRefIncrement;  }  // Same as Decrement but expect that refcount is greater than 1.  inline bool DecrementExpectHighRefcount() {    int32_t refcount =        count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel);    assert(refcount > 0 || refcount & kImmortalTag);    return refcount != kRefIncrement;  }  // Returns the current reference count using acquire semantics.  inline int32_t Get() const {    return count_.load(std::memory_order_acquire) >> kImmortalShift;  }  // Returns whether the atomic integer is 1.  // If the reference count is used in the conventional way, a  // reference count of 1 implies that the current thread owns the  // reference and no other thread shares it.  // This call performs the test for a reference count of one, and  // performs the memory barrier needed for the owning thread  // to act on the object, knowing that it has exclusive access to the  // object.  inline bool IsOne() {    return count_.load(std::memory_order_acquire) == kRefIncrement;  }  bool IsImmortal() const {    return (count_.load(std::memory_order_relaxed) & kImmortalTag) != 0;  } private:  // We reserve the bottom bit to tag a reference count as immortal.  // By making it `1` we ensure that we never reach `0` when adding/subtracting  // `2`, thus it never looks as if it should be destroyed.  // These are used for the StringConstant constructor where we do not increase  // the refcount at construction time (due to constinit requirements) but we  // will still decrease it at destruction time to avoid branching on Unref.  enum {    kImmortalShift = 1,    kRefIncrement = 1 << kImmortalShift,    kImmortalTag = kRefIncrement - 1  };  std::atomic<int32_t> count_;};// The overhead of a vtable is too much for Cord, so we roll our own subclasses// using only a single byte to differentiate classes from each other - the "tag"// byte.  Define the subclasses first so we can provide downcasting helper// functions in the base class.struct CordRepConcat;struct CordRepExternal;struct CordRepFlat;struct CordRepSubstring;class CordRepRing;// Various representations that we allowenum CordRepKind {  CONCAT = 0,  EXTERNAL = 1,  SUBSTRING = 2,  RING = 3,  // We have different tags for different sized flat arrays,  // starting with FLAT, and limited to MAX_FLAT_TAG. The 224 value is based on  // the current 'size to tag' encoding of 8 / 32 bytes. If a new tag is needed  // in the future, then 'FLAT' and 'MAX_FLAT_TAG' should be adjusted as well  // as the Tag <---> Size logic so that FLAT stil represents the minimum flat  // allocation size. (32 bytes as of now).  FLAT = 4,  MAX_FLAT_TAG = 224};struct CordRep {  CordRep() = default;  constexpr CordRep(Refcount::Immortal immortal, size_t l)      : length(l), refcount(immortal), tag(EXTERNAL), storage{} {}  // The following three fields have to be less than 32 bytes since  // that is the smallest supported flat node size.  size_t length;  Refcount refcount;  // If tag < FLAT, it represents CordRepKind and indicates the type of node.  // Otherwise, the node type is CordRepFlat and the tag is the encoded size.  uint8_t tag;  char storage[1];  // Starting point for flat array: MUST BE LAST FIELD  inline CordRepRing* ring();  inline const CordRepRing* ring() const;  inline CordRepConcat* concat();  inline const CordRepConcat* concat() const;  inline CordRepSubstring* substring();  inline const CordRepSubstring* substring() const;  inline CordRepExternal* external();  inline const CordRepExternal* external() const;  inline CordRepFlat* flat();  inline const CordRepFlat* flat() const;  // --------------------------------------------------------------------  // Memory management  // This internal routine is called from the cold path of Unref below. Keeping  // it in a separate routine allows good inlining of Unref into many profitable  // call sites. However, the call to this function can be highly disruptive to  // the register pressure in those callers. To minimize the cost to callers, we  // use a special LLVM calling convention that preserves most registers. This  // allows the call to this routine in cold paths to not disrupt the caller's  // register pressure. This calling convention is not available on all  // platforms; we intentionally allow LLVM to ignore the attribute rather than  // attempting to hardcode the list of supported platforms.#if defined(__clang__) && !defined(__i386__)#if !(defined(ABSL_HAVE_MEMORY_SANITIZER) ||  \      defined(ABSL_HAVE_THREAD_SANITIZER) ||  \      defined(ABSL_HAVE_ADDRESS_SANITIZER) || \      defined(UNDEFINED_BEHAVIOR_SANITIZER))#pragma clang diagnostic push#pragma clang diagnostic ignored "-Wattributes"  __attribute__((preserve_most))#pragma clang diagnostic pop#endif  // *_SANITIZER#endif  static void Destroy(CordRep* rep);  // Increments the reference count of `rep`.  // Requires `rep` to be a non-null pointer value.  static inline CordRep* Ref(CordRep* rep);  // Decrements the reference count of `rep`. Destroys rep if count reaches  // zero. Requires `rep` to be a non-null pointer value.  static inline void Unref(CordRep* rep);};struct CordRepConcat : public CordRep {  CordRep* left;  CordRep* right;  uint8_t depth() const { return static_cast<uint8_t>(storage[0]); }  void set_depth(uint8_t depth) { storage[0] = static_cast<char>(depth); }};struct CordRepSubstring : public CordRep {  size_t start;  // Starting offset of substring in child  CordRep* child;};// Type for function pointer that will invoke the releaser function and also// delete the `CordRepExternalImpl` corresponding to the passed in// `CordRepExternal`.using ExternalReleaserInvoker = void (*)(CordRepExternal*);// External CordReps are allocated together with a type erased releaser. The// releaser is stored in the memory directly following the CordRepExternal.struct CordRepExternal : public CordRep {  CordRepExternal() = default;  explicit constexpr CordRepExternal(absl::string_view str)      : CordRep(Refcount::Immortal{}, str.size()),        base(str.data()),        releaser_invoker(nullptr) {}  const char* base;  // Pointer to function that knows how to call and destroy the releaser.  ExternalReleaserInvoker releaser_invoker;  // Deletes (releases) the external rep.  // Requires rep != nullptr and rep->tag == EXTERNAL  static void Delete(CordRep* rep);};struct Rank1 {};struct Rank0 : Rank1 {};template <typename Releaser, typename = ::absl::base_internal::invoke_result_t<                                 Releaser, absl::string_view>>void InvokeReleaser(Rank0, Releaser&& releaser, absl::string_view data) {  ::absl::base_internal::invoke(std::forward<Releaser>(releaser), data);}template <typename Releaser,          typename = ::absl::base_internal::invoke_result_t<Releaser>>void InvokeReleaser(Rank1, Releaser&& releaser, absl::string_view) {  ::absl::base_internal::invoke(std::forward<Releaser>(releaser));}// We use CompressedTuple so that we can benefit from EBCO.template <typename Releaser>struct CordRepExternalImpl    : public CordRepExternal,      public ::absl::container_internal::CompressedTuple<Releaser> {  // The extra int arg is so that we can avoid interfering with copy/move  // constructors while still benefitting from perfect forwarding.  template <typename T>  CordRepExternalImpl(T&& releaser, int)      : CordRepExternalImpl::CompressedTuple(std::forward<T>(releaser)) {    this->releaser_invoker = &Release;  }  ~CordRepExternalImpl() {    InvokeReleaser(Rank0{}, std::move(this->template get<0>()),                   absl::string_view(base, length));  }  static void Release(CordRepExternal* rep) {    delete static_cast<CordRepExternalImpl*>(rep);  }};inline void CordRepExternal::Delete(CordRep* rep) {  assert(rep != nullptr && rep->tag == EXTERNAL);  auto* rep_external = static_cast<CordRepExternal*>(rep);  assert(rep_external->releaser_invoker != nullptr);  rep_external->releaser_invoker(rep_external);}template <typename Str>struct ConstInitExternalStorage {  ABSL_CONST_INIT static CordRepExternal value;};template <typename Str>CordRepExternal ConstInitExternalStorage<Str>::value(Str::value);enum {  kMaxInline = 15,};constexpr char GetOrNull(absl::string_view data, size_t pos) {  return pos < data.size() ? data[pos] : '\0';}// We store cordz_info as 64 bit pointer value in big endian format. This// guarantees that the least significant byte of cordz_info matches the last// byte of the inline data representation in as_chars_, which holds the inlined// size or the 'is_tree' bit.using cordz_info_t = int64_t;// Assert that the `cordz_info` pointer value perfectly overlaps the last half// of `as_chars_` and can hold a pointer value.static_assert(sizeof(cordz_info_t) * 2 == kMaxInline + 1, "");static_assert(sizeof(cordz_info_t) >= sizeof(intptr_t), "");// BigEndianByte() creates a big endian representation of 'value', i.e.: a big// endian value where the last byte in the host's representation holds 'value`,// with all other bytes being 0.static constexpr cordz_info_t BigEndianByte(unsigned char value) {#if defined(ABSL_IS_BIG_ENDIAN)  return value;#else  return static_cast<cordz_info_t>(value) << ((sizeof(cordz_info_t) - 1) * 8);#endif}class InlineData { public:  // kNullCordzInfo holds the big endian representation of intptr_t(1)  // This is the 'null' / initial value of 'cordz_info'. The null value  // is specifically big endian 1 as with 64-bit pointers, the last  // byte of cordz_info overlaps with the last byte holding the tag.  static constexpr cordz_info_t kNullCordzInfo = BigEndianByte(1);  // kFakeCordzInfo holds a 'fake', non-null cordz-info value we use to  // emulate the previous 'kProfiled' tag logic in 'set_profiled' until  // cord code is changed to store cordz_info values in InlineData.  static constexpr cordz_info_t kFakeCordzInfo = BigEndianByte(9);  constexpr InlineData() : as_chars_{0} {}  explicit constexpr InlineData(CordRep* rep) : as_tree_(rep) {}  explicit constexpr InlineData(absl::string_view chars)      : as_chars_{            GetOrNull(chars, 0),  GetOrNull(chars, 1),            GetOrNull(chars, 2),  GetOrNull(chars, 3),            GetOrNull(chars, 4),  GetOrNull(chars, 5),            GetOrNull(chars, 6),  GetOrNull(chars, 7),            GetOrNull(chars, 8),  GetOrNull(chars, 9),            GetOrNull(chars, 10), GetOrNull(chars, 11),            GetOrNull(chars, 12), GetOrNull(chars, 13),            GetOrNull(chars, 14), static_cast<char>((chars.size() << 1))} {}  // Returns true if the current instance is empty.  // The 'empty value' is an inlined data value of zero length.  bool is_empty() const { return tag() == 0; }  // Returns true if the current instance holds a tree value.  bool is_tree() const { return (tag() & 1) != 0; }  // Returns true if the current instance holds a cordz_info value.  // Requires the current instance to hold a tree value.  bool is_profiled() const {    assert(is_tree());    return as_tree_.cordz_info != kNullCordzInfo;  }  // Returns a read only pointer to the character data inside this instance.  // Requires the current instance to hold inline data.  const char* as_chars() const {    assert(!is_tree());    return as_chars_;  }  // Returns a mutable pointer to the character data inside this instance.  // Should be used for 'write only' operations setting an inlined value.  // Applications can set the value of inlined data either before or after  // setting the inlined size, i.e., both of the below are valid:  //  //   // Set inlined data and inline size  //   memcpy(data_.as_chars(), data, size);  //   data_.set_inline_size(size);  //  //   // Set inlined size and inline data  //   data_.set_inline_size(size);  //   memcpy(data_.as_chars(), data, size);  //  // It's an error to read from the returned pointer without a preceding write  // if the current instance does not hold inline data, i.e.: is_tree() == true.  char* as_chars() { return as_chars_; }  // Returns the tree value of this value.  // Requires the current instance to hold a tree value.  CordRep* as_tree() const {    assert(is_tree());    return as_tree_.rep;  }  // Initialize this instance to holding the tree value `rep`,  // initializing the cordz_info to null, i.e.: 'not profiled'.  void make_tree(CordRep* rep) {    as_tree_.rep = rep;    as_tree_.cordz_info = kNullCordzInfo;  }  // Set the tree value of this instance to 'rep`.  // Requires the current instance to already hold a tree value.  // Does not affect the value of cordz_info.  void set_tree(CordRep* rep) {    assert(is_tree());    as_tree_.rep = rep;  }  // Returns the size of the inlined character data inside this instance.  // Requires the current instance to hold inline data.  size_t inline_size() const {    assert(!is_tree());    return tag() >> 1;  }  // Sets the size of the inlined character data inside this instance.  // Requires `size` to be <= kMaxInline.  // See the documentation on 'as_chars()' for more information and examples.  void set_inline_size(size_t size) {    ABSL_ASSERT(size <= kMaxInline);    tag() = static_cast<char>(size << 1);  }  // Sets or unsets the 'is_profiled' state of this instance.  // Requires the current instance to hold a tree value.  void set_profiled(bool profiled) {    assert(is_tree());    as_tree_.cordz_info = profiled ? kFakeCordzInfo : kNullCordzInfo;  } private:  // See cordz_info_t for forced alignment and size of `cordz_info` details.  struct AsTree {    explicit constexpr AsTree(absl::cord_internal::CordRep* tree)        : rep(tree), cordz_info(kNullCordzInfo) {}    // This union uses up extra space so that whether rep is 32 or 64 bits,    // cordz_info will still start at the eighth byte, and the last    // byte of cordz_info will still be the last byte of InlineData.    union {      absl::cord_internal::CordRep* rep;      cordz_info_t unused_aligner;    };    cordz_info_t cordz_info;  };  char& tag() { return reinterpret_cast<char*>(this)[kMaxInline]; }  char tag() const { return reinterpret_cast<const char*>(this)[kMaxInline]; }  // If the data has length <= kMaxInline, we store it in `as_chars_`, and  // store the size in the last char of `as_chars_` shifted left + 1.  // Else we store it in a tree and store a pointer to that tree in  // `as_tree_.rep` and store a tag in `tagged_size`.  union  {    char as_chars_[kMaxInline + 1];    AsTree as_tree_;  };};static_assert(sizeof(InlineData) == kMaxInline + 1, "");inline CordRepConcat* CordRep::concat() {  assert(tag == CONCAT);  return static_cast<CordRepConcat*>(this);}inline const CordRepConcat* CordRep::concat() const {  assert(tag == CONCAT);  return static_cast<const CordRepConcat*>(this);}inline CordRepSubstring* CordRep::substring() {  assert(tag == SUBSTRING);  return static_cast<CordRepSubstring*>(this);}inline const CordRepSubstring* CordRep::substring() const {  assert(tag == SUBSTRING);  return static_cast<const CordRepSubstring*>(this);}inline CordRepExternal* CordRep::external() {  assert(tag == EXTERNAL);  return static_cast<CordRepExternal*>(this);}inline const CordRepExternal* CordRep::external() const {  assert(tag == EXTERNAL);  return static_cast<const CordRepExternal*>(this);}inline CordRep* CordRep::Ref(CordRep* rep) {  assert(rep != nullptr);  rep->refcount.Increment();  return rep;}inline void CordRep::Unref(CordRep* rep) {  assert(rep != nullptr);  // Expect refcount to be 0. Avoiding the cost of an atomic decrement should  // typically outweigh the cost of an extra branch checking for ref == 1.  if (ABSL_PREDICT_FALSE(!rep->refcount.DecrementExpectHighRefcount())) {    Destroy(rep);  }}}  // namespace cord_internalABSL_NAMESPACE_END}  // namespace absl#endif  // ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_
 |