| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224 | 
							- // Copyright 2017 The Abseil Authors.
 
- //
 
- // Licensed under the Apache License, Version 2.0 (the "License");
 
- // you may not use this file except in compliance with the License.
 
- // You may obtain a copy of the License at
 
- //
 
- //      https://www.apache.org/licenses/LICENSE-2.0
 
- //
 
- // Unless required by applicable law or agreed to in writing, software
 
- // distributed under the License is distributed on an "AS IS" BASIS,
 
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 
- // See the License for the specific language governing permissions and
 
- // limitations under the License.
 
- #include <cstdint>
 
- #include <mutex>  // NOLINT(build/c++11)
 
- #include <vector>
 
- #include "absl/base/config.h"
 
- #include "absl/base/internal/cycleclock.h"
 
- #include "absl/base/internal/spinlock.h"
 
- #include "absl/synchronization/blocking_counter.h"
 
- #include "absl/synchronization/internal/thread_pool.h"
 
- #include "absl/synchronization/mutex.h"
 
- #include "benchmark/benchmark.h"
 
- namespace {
 
- void BM_Mutex(benchmark::State& state) {
 
-   static absl::Mutex* mu = new absl::Mutex;
 
-   for (auto _ : state) {
 
-     absl::MutexLock lock(mu);
 
-   }
 
- }
 
- BENCHMARK(BM_Mutex)->UseRealTime()->Threads(1)->ThreadPerCpu();
 
- static void DelayNs(int64_t ns, int* data) {
 
-   int64_t end = absl::base_internal::CycleClock::Now() +
 
-                 ns * absl::base_internal::CycleClock::Frequency() / 1e9;
 
-   while (absl::base_internal::CycleClock::Now() < end) {
 
-     ++(*data);
 
-     benchmark::DoNotOptimize(*data);
 
-   }
 
- }
 
- template <typename MutexType>
 
- class RaiiLocker {
 
-  public:
 
-   explicit RaiiLocker(MutexType* mu) : mu_(mu) { mu_->Lock(); }
 
-   ~RaiiLocker() { mu_->Unlock(); }
 
-  private:
 
-   MutexType* mu_;
 
- };
 
- template <>
 
- class RaiiLocker<std::mutex> {
 
-  public:
 
-   explicit RaiiLocker(std::mutex* mu) : mu_(mu) { mu_->lock(); }
 
-   ~RaiiLocker() { mu_->unlock(); }
 
-  private:
 
-   std::mutex* mu_;
 
- };
 
- template <typename MutexType>
 
- void BM_Contended(benchmark::State& state) {
 
-   struct Shared {
 
-     MutexType mu;
 
-     int data = 0;
 
-   };
 
-   static auto* shared = new Shared;
 
-   int local = 0;
 
-   for (auto _ : state) {
 
-     // Here we model both local work outside of the critical section as well as
 
-     // some work inside of the critical section. The idea is to capture some
 
-     // more or less realisitic contention levels.
 
-     // If contention is too low, the benchmark won't measure anything useful.
 
-     // If contention is unrealistically high, the benchmark will favor
 
-     // bad mutex implementations that block and otherwise distract threads
 
-     // from the mutex and shared state for as much as possible.
 
-     // To achieve this amount of local work is multiplied by number of threads
 
-     // to keep ratio between local work and critical section approximately
 
-     // equal regardless of number of threads.
 
-     DelayNs(100 * state.threads, &local);
 
-     RaiiLocker<MutexType> locker(&shared->mu);
 
-     DelayNs(state.range(0), &shared->data);
 
-   }
 
- }
 
- BENCHMARK_TEMPLATE(BM_Contended, absl::Mutex)
 
-     ->UseRealTime()
 
-     // ThreadPerCpu poorly handles non-power-of-two CPU counts.
 
-     ->Threads(1)
 
-     ->Threads(2)
 
-     ->Threads(4)
 
-     ->Threads(6)
 
-     ->Threads(8)
 
-     ->Threads(12)
 
-     ->Threads(16)
 
-     ->Threads(24)
 
-     ->Threads(32)
 
-     ->Threads(48)
 
-     ->Threads(64)
 
-     ->Threads(96)
 
-     ->Threads(128)
 
-     ->Threads(192)
 
-     ->Threads(256)
 
-     // Some empirically chosen amounts of work in critical section.
 
-     // 1 is low contention, 200 is high contention and few values in between.
 
-     ->Arg(1)
 
-     ->Arg(20)
 
-     ->Arg(50)
 
-     ->Arg(200);
 
- BENCHMARK_TEMPLATE(BM_Contended, absl::base_internal::SpinLock)
 
-     ->UseRealTime()
 
-     // ThreadPerCpu poorly handles non-power-of-two CPU counts.
 
-     ->Threads(1)
 
-     ->Threads(2)
 
-     ->Threads(4)
 
-     ->Threads(6)
 
-     ->Threads(8)
 
-     ->Threads(12)
 
-     ->Threads(16)
 
-     ->Threads(24)
 
-     ->Threads(32)
 
-     ->Threads(48)
 
-     ->Threads(64)
 
-     ->Threads(96)
 
-     ->Threads(128)
 
-     ->Threads(192)
 
-     ->Threads(256)
 
-     // Some empirically chosen amounts of work in critical section.
 
-     // 1 is low contention, 200 is high contention and few values in between.
 
-     ->Arg(1)
 
-     ->Arg(20)
 
-     ->Arg(50)
 
-     ->Arg(200);
 
- BENCHMARK_TEMPLATE(BM_Contended, std::mutex)
 
-     ->UseRealTime()
 
-     // ThreadPerCpu poorly handles non-power-of-two CPU counts.
 
-     ->Threads(1)
 
-     ->Threads(2)
 
-     ->Threads(4)
 
-     ->Threads(6)
 
-     ->Threads(8)
 
-     ->Threads(12)
 
-     ->Threads(16)
 
-     ->Threads(24)
 
-     ->Threads(32)
 
-     ->Threads(48)
 
-     ->Threads(64)
 
-     ->Threads(96)
 
-     ->Threads(128)
 
-     ->Threads(192)
 
-     ->Threads(256)
 
-     // Some empirically chosen amounts of work in critical section.
 
-     // 1 is low contention, 200 is high contention and few values in between.
 
-     ->Arg(1)
 
-     ->Arg(20)
 
-     ->Arg(50)
 
-     ->Arg(200);
 
- // Measure the overhead of conditions on mutex release (when they must be
 
- // evaluated).  Mutex has (some) support for equivalence classes allowing
 
- // Conditions with the same function/argument to potentially not be multiply
 
- // evaluated.
 
- //
 
- // num_classes==0 is used for the special case of every waiter being distinct.
 
- void BM_ConditionWaiters(benchmark::State& state) {
 
-   int num_classes = state.range(0);
 
-   int num_waiters = state.range(1);
 
-   struct Helper {
 
-     static void Waiter(absl::BlockingCounter* init, absl::Mutex* m, int* p) {
 
-       init->DecrementCount();
 
-       m->LockWhen(absl::Condition(
 
-           static_cast<bool (*)(int*)>([](int* v) { return *v == 0; }), p));
 
-       m->Unlock();
 
-     }
 
-   };
 
-   if (num_classes == 0) {
 
-     // No equivalence classes.
 
-     num_classes = num_waiters;
 
-   }
 
-   absl::BlockingCounter init(num_waiters);
 
-   absl::Mutex mu;
 
-   std::vector<int> equivalence_classes(num_classes, 1);
 
-   // Must be declared last to be destroyed first.
 
-   absl::synchronization_internal::ThreadPool pool(num_waiters);
 
-   for (int i = 0; i < num_waiters; i++) {
 
-     // Mutex considers Conditions with the same function and argument
 
-     // to be equivalent.
 
-     pool.Schedule([&, i] {
 
-       Helper::Waiter(&init, &mu, &equivalence_classes[i % num_classes]);
 
-     });
 
-   }
 
-   init.Wait();
 
-   for (auto _ : state) {
 
-     mu.Lock();
 
-     mu.Unlock();  // Each unlock requires Condition evaluation for our waiters.
 
-   }
 
-   mu.Lock();
 
-   for (int i = 0; i < num_classes; i++) {
 
-     equivalence_classes[i] = 0;
 
-   }
 
-   mu.Unlock();
 
- }
 
- // Some configurations have higher thread limits than others.
 
- #if defined(__linux__) && !defined(ABSL_HAVE_THREAD_SANITIZER)
 
- constexpr int kMaxConditionWaiters = 8192;
 
- #else
 
- constexpr int kMaxConditionWaiters = 1024;
 
- #endif
 
- BENCHMARK(BM_ConditionWaiters)->RangePair(0, 2, 1, kMaxConditionWaiters);
 
- }  // namespace
 
 
  |