| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426 | // Copyright 2017 The Abseil Authors.//// Licensed under the Apache License, Version 2.0 (the "License");// you may not use this file except in compliance with the License.// You may obtain a copy of the License at////      https://www.apache.org/licenses/LICENSE-2.0//// Unless required by applicable law or agreed to in writing, software// distributed under the License is distributed on an "AS IS" BASIS,// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.// See the License for the specific language governing permissions and// limitations under the License.#include "absl/random/exponential_distribution.h"#include <algorithm>#include <cmath>#include <cstddef>#include <cstdint>#include <iterator>#include <limits>#include <random>#include <sstream>#include <string>#include <type_traits>#include <vector>#include "gmock/gmock.h"#include "gtest/gtest.h"#include "absl/base/internal/raw_logging.h"#include "absl/base/macros.h"#include "absl/random/internal/chi_square.h"#include "absl/random/internal/distribution_test_util.h"#include "absl/random/internal/pcg_engine.h"#include "absl/random/internal/sequence_urbg.h"#include "absl/random/random.h"#include "absl/strings/str_cat.h"#include "absl/strings/str_format.h"#include "absl/strings/str_replace.h"#include "absl/strings/strip.h"namespace {using absl::random_internal::kChiSquared;template <typename RealType>class ExponentialDistributionTypedTest : public ::testing::Test {};using RealTypes = ::testing::Types<float, double, long double>;TYPED_TEST_CASE(ExponentialDistributionTypedTest, RealTypes);TYPED_TEST(ExponentialDistributionTypedTest, SerializeTest) {  using param_type =      typename absl::exponential_distribution<TypeParam>::param_type;  const TypeParam kParams[] = {      // Cases around 1.      1,                                           //      std::nextafter(TypeParam(1), TypeParam(0)),  // 1 - epsilon      std::nextafter(TypeParam(1), TypeParam(2)),  // 1 + epsilon      // Typical cases.      TypeParam(1e-8), TypeParam(1e-4), TypeParam(1), TypeParam(2),      TypeParam(1e4), TypeParam(1e8), TypeParam(1e20), TypeParam(2.5),      // Boundary cases.      std::numeric_limits<TypeParam>::max(),      std::numeric_limits<TypeParam>::epsilon(),      std::nextafter(std::numeric_limits<TypeParam>::min(),                     TypeParam(1)),           // min + epsilon      std::numeric_limits<TypeParam>::min(),  // smallest normal      // There are some errors dealing with denorms on apple platforms.      std::numeric_limits<TypeParam>::denorm_min(),  // smallest denorm      std::numeric_limits<TypeParam>::min() / 2,     // denorm      std::nextafter(std::numeric_limits<TypeParam>::min(),                     TypeParam(0)),  // denorm_max  };  constexpr int kCount = 1000;  absl::InsecureBitGen gen;  for (const TypeParam lambda : kParams) {    // Some values may be invalid; skip those.    if (!std::isfinite(lambda)) continue;    ABSL_ASSERT(lambda > 0);    const param_type param(lambda);    absl::exponential_distribution<TypeParam> before(lambda);    EXPECT_EQ(before.lambda(), param.lambda());    {      absl::exponential_distribution<TypeParam> via_param(param);      EXPECT_EQ(via_param, before);      EXPECT_EQ(via_param.param(), before.param());    }    // Smoke test.    auto sample_min = before.max();    auto sample_max = before.min();    for (int i = 0; i < kCount; i++) {      auto sample = before(gen);      EXPECT_GE(sample, before.min()) << before;      EXPECT_LE(sample, before.max()) << before;      if (sample > sample_max) sample_max = sample;      if (sample < sample_min) sample_min = sample;    }    if (!std::is_same<TypeParam, long double>::value) {      ABSL_INTERNAL_LOG(INFO,                        absl::StrFormat("Range {%f}: %f, %f, lambda=%f", lambda,                                        sample_min, sample_max, lambda));    }    std::stringstream ss;    ss << before;    if (!std::isfinite(lambda)) {      // Streams do not deserialize inf/nan correctly.      continue;    }    // Validate stream serialization.    absl::exponential_distribution<TypeParam> after(34.56f);    EXPECT_NE(before.lambda(), after.lambda());    EXPECT_NE(before.param(), after.param());    EXPECT_NE(before, after);    ss >> after;#if defined(__powerpc64__) || defined(__PPC64__) || defined(__powerpc__) || \    defined(__ppc__) || defined(__PPC__)    if (std::is_same<TypeParam, long double>::value) {      // Roundtripping floating point values requires sufficient precision to      // reconstruct the exact value. It turns out that long double has some      // errors doing this on ppc, particularly for values      // near {1.0 +/- epsilon}.      if (lambda <= std::numeric_limits<double>::max() &&          lambda >= std::numeric_limits<double>::lowest()) {        EXPECT_EQ(static_cast<double>(before.lambda()),                  static_cast<double>(after.lambda()))            << ss.str();      }      continue;    }#endif    EXPECT_EQ(before.lambda(), after.lambda())  //        << ss.str() << " "                      //        << (ss.good() ? "good " : "")           //        << (ss.bad() ? "bad " : "")             //        << (ss.eof() ? "eof " : "")             //        << (ss.fail() ? "fail " : "");  }}// http://www.itl.nist.gov/div898/handbook/eda/section3/eda3667.htmclass ExponentialModel { public:  explicit ExponentialModel(double lambda)      : lambda_(lambda), beta_(1.0 / lambda) {}  double lambda() const { return lambda_; }  double mean() const { return beta_; }  double variance() const { return beta_ * beta_; }  double stddev() const { return std::sqrt(variance()); }  double skew() const { return 2; }  double kurtosis() const { return 6.0; }  double CDF(double x) { return 1.0 - std::exp(-lambda_ * x); }  // The inverse CDF, or PercentPoint function of the distribution  double InverseCDF(double p) {    ABSL_ASSERT(p >= 0.0);    ABSL_ASSERT(p < 1.0);    return -beta_ * std::log(1.0 - p);  } private:  const double lambda_;  const double beta_;};struct Param {  double lambda;  double p_fail;  int trials;};class ExponentialDistributionTests : public testing::TestWithParam<Param>,                                     public ExponentialModel { public:  ExponentialDistributionTests() : ExponentialModel(GetParam().lambda) {}  // SingleZTest provides a basic z-squared test of the mean vs. expected  // mean for data generated by the poisson distribution.  template <typename D>  bool SingleZTest(const double p, const size_t samples);  // SingleChiSquaredTest provides a basic chi-squared test of the normal  // distribution.  template <typename D>  double SingleChiSquaredTest();  // We use a fixed bit generator for distribution accuracy tests.  This allows  // these tests to be deterministic, while still testing the qualify of the  // implementation.  absl::random_internal::pcg64_2018_engine rng_{0x2B7E151628AED2A6};};template <typename D>bool ExponentialDistributionTests::SingleZTest(const double p,                                               const size_t samples) {  D dis(lambda());  std::vector<double> data;  data.reserve(samples);  for (size_t i = 0; i < samples; i++) {    const double x = dis(rng_);    data.push_back(x);  }  const auto m = absl::random_internal::ComputeDistributionMoments(data);  const double max_err = absl::random_internal::MaxErrorTolerance(p);  const double z = absl::random_internal::ZScore(mean(), m);  const bool pass = absl::random_internal::Near("z", z, 0.0, max_err);  if (!pass) {    ABSL_INTERNAL_LOG(        INFO, absl::StrFormat("p=%f max_err=%f\n"                              " lambda=%f\n"                              " mean=%f vs. %f\n"                              " stddev=%f vs. %f\n"                              " skewness=%f vs. %f\n"                              " kurtosis=%f vs. %f\n"                              " z=%f vs. 0",                              p, max_err, lambda(), m.mean, mean(),                              std::sqrt(m.variance), stddev(), m.skewness,                              skew(), m.kurtosis, kurtosis(), z));  }  return pass;}template <typename D>double ExponentialDistributionTests::SingleChiSquaredTest() {  const size_t kSamples = 10000;  const int kBuckets = 50;  // The InverseCDF is the percent point function of the distribution, and can  // be used to assign buckets roughly uniformly.  std::vector<double> cutoffs;  const double kInc = 1.0 / static_cast<double>(kBuckets);  for (double p = kInc; p < 1.0; p += kInc) {    cutoffs.push_back(InverseCDF(p));  }  if (cutoffs.back() != std::numeric_limits<double>::infinity()) {    cutoffs.push_back(std::numeric_limits<double>::infinity());  }  D dis(lambda());  std::vector<int32_t> counts(cutoffs.size(), 0);  for (int j = 0; j < kSamples; j++) {    const double x = dis(rng_);    auto it = std::upper_bound(cutoffs.begin(), cutoffs.end(), x);    counts[std::distance(cutoffs.begin(), it)]++;  }  // Null-hypothesis is that the distribution is exponentially distributed  // with the provided lambda (not estimated from the data).  const int dof = static_cast<int>(counts.size()) - 1;  // Our threshold for logging is 1-in-50.  const double threshold = absl::random_internal::ChiSquareValue(dof, 0.98);  const double expected =      static_cast<double>(kSamples) / static_cast<double>(counts.size());  double chi_square = absl::random_internal::ChiSquareWithExpected(      std::begin(counts), std::end(counts), expected);  double p = absl::random_internal::ChiSquarePValue(chi_square, dof);  if (chi_square > threshold) {    for (int i = 0; i < cutoffs.size(); i++) {      ABSL_INTERNAL_LOG(          INFO, absl::StrFormat("%d : (%f) = %d", i, cutoffs[i], counts[i]));    }    ABSL_INTERNAL_LOG(INFO,                      absl::StrCat("lambda ", lambda(), "\n",     //                                   " expected ", expected, "\n",  //                                   kChiSquared, " ", chi_square, " (", p, ")\n",                                   kChiSquared, " @ 0.98 = ", threshold));  }  return p;}TEST_P(ExponentialDistributionTests, ZTest) {  const size_t kSamples = 10000;  const auto& param = GetParam();  const int expected_failures =      std::max(1, static_cast<int>(std::ceil(param.trials * param.p_fail)));  const double p = absl::random_internal::RequiredSuccessProbability(      param.p_fail, param.trials);  int failures = 0;  for (int i = 0; i < param.trials; i++) {    failures += SingleZTest<absl::exponential_distribution<double>>(p, kSamples)                    ? 0                    : 1;  }  EXPECT_LE(failures, expected_failures);}TEST_P(ExponentialDistributionTests, ChiSquaredTest) {  const int kTrials = 20;  int failures = 0;  for (int i = 0; i < kTrials; i++) {    double p_value =        SingleChiSquaredTest<absl::exponential_distribution<double>>();    if (p_value < 0.005) {  // 1/200      failures++;    }  }  // There is a 0.10% chance of producing at least one failure, so raise the  // failure threshold high enough to allow for a flake rate < 10,000.  EXPECT_LE(failures, 4);}std::vector<Param> GenParams() {  return {      Param{1.0, 0.02, 100},      Param{2.5, 0.02, 100},      Param{10, 0.02, 100},      // large      Param{1e4, 0.02, 100},      Param{1e9, 0.02, 100},      // small      Param{0.1, 0.02, 100},      Param{1e-3, 0.02, 100},      Param{1e-5, 0.02, 100},  };}std::string ParamName(const ::testing::TestParamInfo<Param>& info) {  const auto& p = info.param;  std::string name = absl::StrCat("lambda_", absl::SixDigits(p.lambda));  return absl::StrReplaceAll(name, {{"+", "_"}, {"-", "_"}, {".", "_"}});}INSTANTIATE_TEST_CASE_P(All, ExponentialDistributionTests,                        ::testing::ValuesIn(GenParams()), ParamName);// NOTE: absl::exponential_distribution is not guaranteed to be stable.TEST(ExponentialDistributionTest, StabilityTest) {  // absl::exponential_distribution stability relies on std::log1p and  // absl::uniform_real_distribution.  absl::random_internal::sequence_urbg urbg(      {0x0003eb76f6f7f755ull, 0xFFCEA50FDB2F953Bull, 0xC332DDEFBE6C5AA5ull,       0x6558218568AB9702ull, 0x2AEF7DAD5B6E2F84ull, 0x1521B62829076170ull,       0xECDD4775619F1510ull, 0x13CCA830EB61BD96ull, 0x0334FE1EAA0363CFull,       0xB5735C904C70A239ull, 0xD59E9E0BCBAADE14ull, 0xEECC86BC60622CA7ull});  std::vector<int> output(14);  {    absl::exponential_distribution<double> dist;    std::generate(std::begin(output), std::end(output),                  [&] { return static_cast<int>(10000.0 * dist(urbg)); });    EXPECT_EQ(14, urbg.invocations());    EXPECT_THAT(output,                testing::ElementsAre(0, 71913, 14375, 5039, 1835, 861, 25936,                                     804, 126, 12337, 17984, 27002, 0, 71913));  }  urbg.reset();  {    absl::exponential_distribution<float> dist;    std::generate(std::begin(output), std::end(output),                  [&] { return static_cast<int>(10000.0f * dist(urbg)); });    EXPECT_EQ(14, urbg.invocations());    EXPECT_THAT(output,                testing::ElementsAre(0, 71913, 14375, 5039, 1835, 861, 25936,                                     804, 126, 12337, 17984, 27002, 0, 71913));  }}TEST(ExponentialDistributionTest, AlgorithmBounds) {  // Relies on absl::uniform_real_distribution, so some of these comments  // reference that.  absl::exponential_distribution<double> dist;  {    // This returns the smallest value >0 from absl::uniform_real_distribution.    absl::random_internal::sequence_urbg urbg({0x0000000000000001ull});    double a = dist(urbg);    EXPECT_EQ(a, 5.42101086242752217004e-20);  }  {    // This returns a value very near 0.5 from absl::uniform_real_distribution.    absl::random_internal::sequence_urbg urbg({0x7fffffffffffffefull});    double a = dist(urbg);    EXPECT_EQ(a, 0.693147180559945175204);  }  {    // This returns the largest value <1 from absl::uniform_real_distribution.    // WolframAlpha: ~39.1439465808987766283058547296341915292187253    absl::random_internal::sequence_urbg urbg({0xFFFFFFFFFFFFFFeFull});    double a = dist(urbg);    EXPECT_EQ(a, 36.7368005696771007251);  }  {    // This *ALSO* returns the largest value <1.    absl::random_internal::sequence_urbg urbg({0xFFFFFFFFFFFFFFFFull});    double a = dist(urbg);    EXPECT_EQ(a, 36.7368005696771007251);  }}}  // namespace
 |