benchfn.c 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252
  1. /*
  2. * Copyright (C) 2016-2020 Yann Collet, Facebook, Inc.
  3. * All rights reserved.
  4. *
  5. * This source code is licensed under both the BSD-style license (found in the
  6. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  7. * in the COPYING file in the root directory of this source tree).
  8. * You may select, at your option, one of the above-listed licenses.
  9. */
  10. /* *************************************
  11. * Includes
  12. ***************************************/
  13. #include <stdlib.h> /* malloc, free */
  14. #include <string.h> /* memset */
  15. #undef NDEBUG /* assert must not be disabled */
  16. #include <assert.h> /* assert */
  17. #include "timefn.h" /* UTIL_time_t, UTIL_getTime */
  18. #include "benchfn.h"
  19. /* *************************************
  20. * Constants
  21. ***************************************/
  22. #define TIMELOOP_MICROSEC SEC_TO_MICRO /* 1 second */
  23. #define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */
  24. #define KB *(1 <<10)
  25. #define MB *(1 <<20)
  26. #define GB *(1U<<30)
  27. /* *************************************
  28. * Debug errors
  29. ***************************************/
  30. #if defined(DEBUG) && (DEBUG >= 1)
  31. # include <stdio.h> /* fprintf */
  32. # define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
  33. # define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
  34. #else
  35. # define DEBUGOUTPUT(...)
  36. #endif
  37. /* error without displaying */
  38. #define RETURN_QUIET_ERROR(retValue, ...) { \
  39. DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
  40. DEBUGOUTPUT("Error : "); \
  41. DEBUGOUTPUT(__VA_ARGS__); \
  42. DEBUGOUTPUT(" \n"); \
  43. return retValue; \
  44. }
  45. /* *************************************
  46. * Benchmarking an arbitrary function
  47. ***************************************/
  48. int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome)
  49. {
  50. return outcome.error_tag_never_ever_use_directly == 0;
  51. }
  52. /* warning : this function will stop program execution if outcome is invalid !
  53. * check outcome validity first, using BMK_isValid_runResult() */
  54. BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome)
  55. {
  56. assert(outcome.error_tag_never_ever_use_directly == 0);
  57. return outcome.internal_never_ever_use_directly;
  58. }
  59. size_t BMK_extract_errorResult(BMK_runOutcome_t outcome)
  60. {
  61. assert(outcome.error_tag_never_ever_use_directly != 0);
  62. return outcome.error_result_never_ever_use_directly;
  63. }
  64. static BMK_runOutcome_t BMK_runOutcome_error(size_t errorResult)
  65. {
  66. BMK_runOutcome_t b;
  67. memset(&b, 0, sizeof(b));
  68. b.error_tag_never_ever_use_directly = 1;
  69. b.error_result_never_ever_use_directly = errorResult;
  70. return b;
  71. }
  72. static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime)
  73. {
  74. BMK_runOutcome_t outcome;
  75. outcome.error_tag_never_ever_use_directly = 0;
  76. outcome.internal_never_ever_use_directly = runTime;
  77. return outcome;
  78. }
  79. /* initFn will be measured once, benchFn will be measured `nbLoops` times */
  80. /* initFn is optional, provide NULL if none */
  81. /* benchFn must return a size_t value that errorFn can interpret */
  82. /* takes # of blocks and list of size & stuff for each. */
  83. /* can report result of benchFn for each block into blockResult. */
  84. /* blockResult is optional, provide NULL if this information is not required */
  85. /* note : time per loop can be reported as zero if run time < timer resolution */
  86. BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p,
  87. unsigned nbLoops)
  88. {
  89. /* init */
  90. { size_t i;
  91. for (i = 0; i < p.blockCount; i++) {
  92. memset(p.dstBuffers[i], 0xE5, p.dstCapacities[i]); /* warm up and erase result buffer */
  93. } }
  94. /* benchmark */
  95. { UTIL_time_t const clockStart = UTIL_getTime();
  96. size_t dstSize = 0;
  97. unsigned loopNb, blockNb;
  98. nbLoops += !nbLoops; /* minimum nbLoops is 1 */
  99. if (p.initFn != NULL) p.initFn(p.initPayload);
  100. for (loopNb = 0; loopNb < nbLoops; loopNb++) {
  101. for (blockNb = 0; blockNb < p.blockCount; blockNb++) {
  102. size_t const res = p.benchFn(p.srcBuffers[blockNb], p.srcSizes[blockNb],
  103. p.dstBuffers[blockNb], p.dstCapacities[blockNb],
  104. p.benchPayload);
  105. if (loopNb == 0) {
  106. if (p.blockResults != NULL) p.blockResults[blockNb] = res;
  107. if ((p.errorFn != NULL) && (p.errorFn(res))) {
  108. RETURN_QUIET_ERROR(BMK_runOutcome_error(res),
  109. "Function benchmark failed on block %u (of size %u) with error %i",
  110. blockNb, (unsigned)p.srcSizes[blockNb], (int)res);
  111. }
  112. dstSize += res;
  113. } }
  114. } /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */
  115. { PTime const totalTime = UTIL_clockSpanNano(clockStart);
  116. BMK_runTime_t rt;
  117. rt.nanoSecPerRun = (double)totalTime / nbLoops;
  118. rt.sumOfReturn = dstSize;
  119. return BMK_setValid_runTime(rt);
  120. } }
  121. }
  122. /* ==== Benchmarking any function, providing intermediate results ==== */
  123. struct BMK_timedFnState_s {
  124. PTime timeSpent_ns;
  125. PTime timeBudget_ns;
  126. PTime runBudget_ns;
  127. BMK_runTime_t fastestRun;
  128. unsigned nbLoops;
  129. UTIL_time_t coolTime;
  130. }; /* typedef'd to BMK_timedFnState_t within bench.h */
  131. BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms)
  132. {
  133. BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r));
  134. if (r == NULL) return NULL; /* malloc() error */
  135. BMK_resetTimedFnState(r, total_ms, run_ms);
  136. return r;
  137. }
  138. void BMK_freeTimedFnState(BMK_timedFnState_t* state) { free(state); }
  139. BMK_timedFnState_t*
  140. BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms)
  141. {
  142. typedef char check_size[ 2 * (sizeof(BMK_timedFnState_shell) >= sizeof(struct BMK_timedFnState_s)) - 1]; /* static assert : a compilation failure indicates that BMK_timedFnState_shell is not large enough */
  143. typedef struct { check_size c; BMK_timedFnState_t tfs; } tfs_align; /* force tfs to be aligned at its next best position */
  144. size_t const tfs_alignment = offsetof(tfs_align, tfs); /* provides the minimal alignment restriction for BMK_timedFnState_t */
  145. BMK_timedFnState_t* const r = (BMK_timedFnState_t*)buffer;
  146. if (buffer == NULL) return NULL;
  147. if (size < sizeof(struct BMK_timedFnState_s)) return NULL;
  148. if ((size_t)buffer % tfs_alignment) return NULL; /* buffer must be properly aligned */
  149. BMK_resetTimedFnState(r, total_ms, run_ms);
  150. return r;
  151. }
  152. void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms)
  153. {
  154. if (!total_ms) total_ms = 1 ;
  155. if (!run_ms) run_ms = 1;
  156. if (run_ms > total_ms) run_ms = total_ms;
  157. timedFnState->timeSpent_ns = 0;
  158. timedFnState->timeBudget_ns = (PTime)total_ms * TIMELOOP_NANOSEC / 1000;
  159. timedFnState->runBudget_ns = (PTime)run_ms * TIMELOOP_NANOSEC / 1000;
  160. timedFnState->fastestRun.nanoSecPerRun = (double)TIMELOOP_NANOSEC * 2000000000; /* hopefully large enough : must be larger than any potential measurement */
  161. timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL);
  162. timedFnState->nbLoops = 1;
  163. timedFnState->coolTime = UTIL_getTime();
  164. }
  165. /* Tells if nb of seconds set in timedFnState for all runs is spent.
  166. * note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */
  167. int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState)
  168. {
  169. return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns);
  170. }
  171. #undef MIN
  172. #define MIN(a,b) ( (a) < (b) ? (a) : (b) )
  173. #define MINUSABLETIME (TIMELOOP_NANOSEC / 2) /* 0.5 seconds */
  174. BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont,
  175. BMK_benchParams_t p)
  176. {
  177. PTime const runBudget_ns = cont->runBudget_ns;
  178. PTime const runTimeMin_ns = runBudget_ns / 2;
  179. BMK_runTime_t bestRunTime = cont->fastestRun;
  180. for (;;) {
  181. BMK_runOutcome_t const runResult = BMK_benchFunction(p, cont->nbLoops);
  182. if (!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */
  183. return runResult;
  184. }
  185. { BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult);
  186. double const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops;
  187. cont->timeSpent_ns += (unsigned long long)loopDuration_ns;
  188. /* estimate nbLoops for next run to last approximately 1 second */
  189. if (loopDuration_ns > (runBudget_ns / 50)) {
  190. double const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun);
  191. cont->nbLoops = (unsigned)(runBudget_ns / fastestRun_ns) + 1;
  192. } else {
  193. /* previous run was too short : blindly increase workload by x multiplier */
  194. const unsigned multiplier = 10;
  195. assert(cont->nbLoops < ((unsigned)-1) / multiplier); /* avoid overflow */
  196. cont->nbLoops *= multiplier;
  197. }
  198. if (loopDuration_ns < runTimeMin_ns) {
  199. /* When benchmark run time is too small : don't report results.
  200. * increased risks of rounding errors */
  201. continue;
  202. }
  203. if (newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) {
  204. bestRunTime = newRunTime;
  205. }
  206. }
  207. break;
  208. } /* while (!completed) */
  209. return BMK_setValid_runTime(bestRunTime);
  210. }