ParseRawPrimitivesBenchmark.cs 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536
  1. #region Copyright notice and license
  2. // Protocol Buffers - Google's data interchange format
  3. // Copyright 2019 Google Inc. All rights reserved.
  4. // https://github.com/protocolbuffers/protobuf
  5. //
  6. // Redistribution and use in source and binary forms, with or without
  7. // modification, are permitted provided that the following conditions are
  8. // met:
  9. //
  10. // * Redistributions of source code must retain the above copyright
  11. // notice, this list of conditions and the following disclaimer.
  12. // * Redistributions in binary form must reproduce the above
  13. // copyright notice, this list of conditions and the following disclaimer
  14. // in the documentation and/or other materials provided with the
  15. // distribution.
  16. // * Neither the name of Google Inc. nor the names of its
  17. // contributors may be used to endorse or promote products derived from
  18. // this software without specific prior written permission.
  19. //
  20. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. #endregion
  32. using BenchmarkDotNet.Attributes;
  33. using System;
  34. using System.Buffers.Binary;
  35. using System.Collections.Generic;
  36. using System.IO;
  37. using System.Buffers;
  38. namespace Google.Protobuf.Benchmarks
  39. {
  40. /// <summary>
  41. /// Benchmarks throughput when parsing raw primitives.
  42. /// </summary>
  43. [MemoryDiagnoser]
  44. public class ParseRawPrimitivesBenchmark
  45. {
  46. // key is the encodedSize of varint values
  47. Dictionary<int, byte[]> varintInputBuffers;
  48. byte[] doubleInputBuffer;
  49. byte[] floatInputBuffer;
  50. byte[] fixedIntInputBuffer;
  51. // key is the encodedSize of string values
  52. Dictionary<int, byte[]> stringInputBuffers;
  53. Dictionary<int, ReadOnlySequence<byte>> stringInputBuffersSegmented;
  54. Random random = new Random(417384220); // random but deterministic seed
  55. public IEnumerable<int> StringEncodedSizes => new[] { 1, 4, 10, 105, 10080 };
  56. public IEnumerable<int> StringSegmentedEncodedSizes => new[] { 105, 10080 };
  57. [GlobalSetup]
  58. public void GlobalSetup()
  59. {
  60. // add some extra values that we won't read just to make sure we are far enough from the end of the buffer
  61. // which allows the parser fastpath to always kick in.
  62. const int paddingValueCount = 100;
  63. varintInputBuffers = new Dictionary<int, byte[]>();
  64. for (int encodedSize = 1; encodedSize <= 10; encodedSize++)
  65. {
  66. byte[] buffer = CreateBufferWithRandomVarints(random, BytesToParse / encodedSize, encodedSize, paddingValueCount);
  67. varintInputBuffers.Add(encodedSize, buffer);
  68. }
  69. doubleInputBuffer = CreateBufferWithRandomDoubles(random, BytesToParse / sizeof(double), paddingValueCount);
  70. floatInputBuffer = CreateBufferWithRandomFloats(random, BytesToParse / sizeof(float), paddingValueCount);
  71. fixedIntInputBuffer = CreateBufferWithRandomData(random, BytesToParse / sizeof(long), sizeof(long), paddingValueCount);
  72. stringInputBuffers = new Dictionary<int, byte[]>();
  73. foreach (var encodedSize in StringEncodedSizes)
  74. {
  75. byte[] buffer = CreateBufferWithStrings(BytesToParse / encodedSize, encodedSize, encodedSize < 10 ? 10 : 1 );
  76. stringInputBuffers.Add(encodedSize, buffer);
  77. }
  78. stringInputBuffersSegmented = new Dictionary<int, ReadOnlySequence<byte>>();
  79. foreach (var encodedSize in StringSegmentedEncodedSizes)
  80. {
  81. byte[] buffer = CreateBufferWithStrings(BytesToParse / encodedSize, encodedSize, encodedSize < 10 ? 10 : 1);
  82. stringInputBuffersSegmented.Add(encodedSize, ReadOnlySequenceFactory.CreateWithContent(buffer, segmentSize: 128, addEmptySegmentDelimiters: false));
  83. }
  84. }
  85. // Total number of bytes that each benchmark will parse.
  86. // Measuring the time taken to parse buffer of given size makes it easier to compare parsing speed for different
  87. // types and makes it easy to calculate the througput (in MB/s)
  88. // 10800 bytes is chosen because it is divisible by all possible encoded sizes for all primitive types {1..10}
  89. [Params(10080)]
  90. public int BytesToParse { get; set; }
  91. [Benchmark]
  92. [Arguments(1)]
  93. [Arguments(2)]
  94. [Arguments(3)]
  95. [Arguments(4)]
  96. [Arguments(5)]
  97. public int ParseRawVarint32_CodedInputStream(int encodedSize)
  98. {
  99. CodedInputStream cis = new CodedInputStream(varintInputBuffers[encodedSize]);
  100. int sum = 0;
  101. for (int i = 0; i < BytesToParse / encodedSize; i++)
  102. {
  103. sum += cis.ReadInt32();
  104. }
  105. return sum;
  106. }
  107. [Benchmark]
  108. [Arguments(1)]
  109. [Arguments(2)]
  110. [Arguments(3)]
  111. [Arguments(4)]
  112. [Arguments(5)]
  113. public int ParseRawVarint32_ParseContext(int encodedSize)
  114. {
  115. InitializeParseContext(varintInputBuffers[encodedSize], out ParseContext ctx);
  116. int sum = 0;
  117. for (int i = 0; i < BytesToParse / encodedSize; i++)
  118. {
  119. sum += ctx.ReadInt32();
  120. }
  121. return sum;
  122. }
  123. [Benchmark]
  124. [Arguments(1)]
  125. [Arguments(2)]
  126. [Arguments(3)]
  127. [Arguments(4)]
  128. [Arguments(5)]
  129. [Arguments(6)]
  130. [Arguments(7)]
  131. [Arguments(8)]
  132. [Arguments(9)]
  133. [Arguments(10)]
  134. public long ParseRawVarint64_CodedInputStream(int encodedSize)
  135. {
  136. CodedInputStream cis = new CodedInputStream(varintInputBuffers[encodedSize]);
  137. long sum = 0;
  138. for (int i = 0; i < BytesToParse / encodedSize; i++)
  139. {
  140. sum += cis.ReadInt64();
  141. }
  142. return sum;
  143. }
  144. [Benchmark]
  145. [Arguments(1)]
  146. [Arguments(2)]
  147. [Arguments(3)]
  148. [Arguments(4)]
  149. [Arguments(5)]
  150. [Arguments(6)]
  151. [Arguments(7)]
  152. [Arguments(8)]
  153. [Arguments(9)]
  154. [Arguments(10)]
  155. public long ParseRawVarint64_ParseContext(int encodedSize)
  156. {
  157. InitializeParseContext(varintInputBuffers[encodedSize], out ParseContext ctx);
  158. long sum = 0;
  159. for (int i = 0; i < BytesToParse / encodedSize; i++)
  160. {
  161. sum += ctx.ReadInt64();
  162. }
  163. return sum;
  164. }
  165. [Benchmark]
  166. public uint ParseFixed32_CodedInputStream()
  167. {
  168. const int encodedSize = sizeof(uint);
  169. CodedInputStream cis = new CodedInputStream(fixedIntInputBuffer);
  170. uint sum = 0;
  171. for (uint i = 0; i < BytesToParse / encodedSize; i++)
  172. {
  173. sum += cis.ReadFixed32();
  174. }
  175. return sum;
  176. }
  177. [Benchmark]
  178. public uint ParseFixed32_ParseContext()
  179. {
  180. const int encodedSize = sizeof(uint);
  181. InitializeParseContext(fixedIntInputBuffer, out ParseContext ctx);
  182. uint sum = 0;
  183. for (uint i = 0; i < BytesToParse / encodedSize; i++)
  184. {
  185. sum += ctx.ReadFixed32();
  186. }
  187. return sum;
  188. }
  189. [Benchmark]
  190. public ulong ParseFixed64_CodedInputStream()
  191. {
  192. const int encodedSize = sizeof(ulong);
  193. CodedInputStream cis = new CodedInputStream(fixedIntInputBuffer);
  194. ulong sum = 0;
  195. for (int i = 0; i < BytesToParse / encodedSize; i++)
  196. {
  197. sum += cis.ReadFixed64();
  198. }
  199. return sum;
  200. }
  201. [Benchmark]
  202. public ulong ParseFixed64_ParseContext()
  203. {
  204. const int encodedSize = sizeof(ulong);
  205. InitializeParseContext(fixedIntInputBuffer, out ParseContext ctx);
  206. ulong sum = 0;
  207. for (int i = 0; i < BytesToParse / encodedSize; i++)
  208. {
  209. sum += ctx.ReadFixed64();
  210. }
  211. return sum;
  212. }
  213. [Benchmark]
  214. public float ParseRawFloat_CodedInputStream()
  215. {
  216. const int encodedSize = sizeof(float);
  217. CodedInputStream cis = new CodedInputStream(floatInputBuffer);
  218. float sum = 0;
  219. for (int i = 0; i < BytesToParse / encodedSize; i++)
  220. {
  221. sum += cis.ReadFloat();
  222. }
  223. return sum;
  224. }
  225. [Benchmark]
  226. public float ParseRawFloat_ParseContext()
  227. {
  228. const int encodedSize = sizeof(float);
  229. InitializeParseContext(floatInputBuffer, out ParseContext ctx);
  230. float sum = 0;
  231. for (int i = 0; i < BytesToParse / encodedSize; i++)
  232. {
  233. sum += ctx.ReadFloat();
  234. }
  235. return sum;
  236. }
  237. [Benchmark]
  238. public double ParseRawDouble_CodedInputStream()
  239. {
  240. const int encodedSize = sizeof(double);
  241. CodedInputStream cis = new CodedInputStream(doubleInputBuffer);
  242. double sum = 0;
  243. for (int i = 0; i < BytesToParse / encodedSize; i++)
  244. {
  245. sum += cis.ReadDouble();
  246. }
  247. return sum;
  248. }
  249. [Benchmark]
  250. public double ParseRawDouble_ParseContext()
  251. {
  252. const int encodedSize = sizeof(double);
  253. InitializeParseContext(doubleInputBuffer, out ParseContext ctx);
  254. double sum = 0;
  255. for (int i = 0; i < BytesToParse / encodedSize; i++)
  256. {
  257. sum += ctx.ReadDouble();
  258. }
  259. return sum;
  260. }
  261. [Benchmark]
  262. [ArgumentsSource(nameof(StringEncodedSizes))]
  263. public int ParseString_CodedInputStream(int encodedSize)
  264. {
  265. CodedInputStream cis = new CodedInputStream(stringInputBuffers[encodedSize]);
  266. int sum = 0;
  267. for (int i = 0; i < BytesToParse / encodedSize; i++)
  268. {
  269. sum += cis.ReadString().Length;
  270. }
  271. return sum;
  272. }
  273. [Benchmark]
  274. [ArgumentsSource(nameof(StringEncodedSizes))]
  275. public int ParseString_ParseContext(int encodedSize)
  276. {
  277. InitializeParseContext(stringInputBuffers[encodedSize], out ParseContext ctx);
  278. int sum = 0;
  279. for (int i = 0; i < BytesToParse / encodedSize; i++)
  280. {
  281. sum += ctx.ReadString().Length;
  282. }
  283. return sum;
  284. }
  285. [Benchmark]
  286. [ArgumentsSource(nameof(StringSegmentedEncodedSizes))]
  287. public int ParseString_ParseContext_MultipleSegments(int encodedSize)
  288. {
  289. InitializeParseContext(stringInputBuffersSegmented[encodedSize], out ParseContext ctx);
  290. int sum = 0;
  291. for (int i = 0; i < BytesToParse / encodedSize; i++)
  292. {
  293. sum += ctx.ReadString().Length;
  294. }
  295. return sum;
  296. }
  297. [Benchmark]
  298. [ArgumentsSource(nameof(StringEncodedSizes))]
  299. public int ParseBytes_CodedInputStream(int encodedSize)
  300. {
  301. CodedInputStream cis = new CodedInputStream(stringInputBuffers[encodedSize]);
  302. int sum = 0;
  303. for (int i = 0; i < BytesToParse / encodedSize; i++)
  304. {
  305. sum += cis.ReadBytes().Length;
  306. }
  307. return sum;
  308. }
  309. [Benchmark]
  310. [ArgumentsSource(nameof(StringEncodedSizes))]
  311. public int ParseBytes_ParseContext(int encodedSize)
  312. {
  313. InitializeParseContext(stringInputBuffers[encodedSize], out ParseContext ctx);
  314. int sum = 0;
  315. for (int i = 0; i < BytesToParse / encodedSize; i++)
  316. {
  317. sum += ctx.ReadBytes().Length;
  318. }
  319. return sum;
  320. }
  321. [Benchmark]
  322. [ArgumentsSource(nameof(StringSegmentedEncodedSizes))]
  323. public int ParseBytes_ParseContext_MultipleSegments(int encodedSize)
  324. {
  325. InitializeParseContext(stringInputBuffersSegmented[encodedSize], out ParseContext ctx);
  326. int sum = 0;
  327. for (int i = 0; i < BytesToParse / encodedSize; i++)
  328. {
  329. sum += ctx.ReadBytes().Length;
  330. }
  331. return sum;
  332. }
  333. private static void InitializeParseContext(byte[] buffer, out ParseContext ctx)
  334. {
  335. ParseContext.Initialize(new ReadOnlySequence<byte>(buffer), out ctx);
  336. }
  337. private static void InitializeParseContext(ReadOnlySequence<byte> buffer, out ParseContext ctx)
  338. {
  339. ParseContext.Initialize(buffer, out ctx);
  340. }
  341. private static byte[] CreateBufferWithRandomVarints(Random random, int valueCount, int encodedSize, int paddingValueCount)
  342. {
  343. MemoryStream ms = new MemoryStream();
  344. CodedOutputStream cos = new CodedOutputStream(ms);
  345. for (int i = 0; i < valueCount + paddingValueCount; i++)
  346. {
  347. cos.WriteUInt64(RandomUnsignedVarint(random, encodedSize, false));
  348. }
  349. cos.Flush();
  350. var buffer = ms.ToArray();
  351. if (buffer.Length != encodedSize * (valueCount + paddingValueCount))
  352. {
  353. throw new InvalidOperationException($"Unexpected output buffer length {buffer.Length}");
  354. }
  355. return buffer;
  356. }
  357. private static byte[] CreateBufferWithRandomFloats(Random random, int valueCount, int paddingValueCount)
  358. {
  359. MemoryStream ms = new MemoryStream();
  360. CodedOutputStream cos = new CodedOutputStream(ms);
  361. for (int i = 0; i < valueCount + paddingValueCount; i++)
  362. {
  363. cos.WriteFloat((float)random.NextDouble());
  364. }
  365. cos.Flush();
  366. var buffer = ms.ToArray();
  367. return buffer;
  368. }
  369. private static byte[] CreateBufferWithRandomDoubles(Random random, int valueCount, int paddingValueCount)
  370. {
  371. MemoryStream ms = new MemoryStream();
  372. CodedOutputStream cos = new CodedOutputStream(ms);
  373. for (int i = 0; i < valueCount + paddingValueCount; i++)
  374. {
  375. cos.WriteDouble(random.NextDouble());
  376. }
  377. cos.Flush();
  378. var buffer = ms.ToArray();
  379. return buffer;
  380. }
  381. private static byte[] CreateBufferWithRandomData(Random random, int valueCount, int encodedSize, int paddingValueCount)
  382. {
  383. int bufferSize = (valueCount + paddingValueCount) * encodedSize;
  384. byte[] buffer = new byte[bufferSize];
  385. random.NextBytes(buffer);
  386. return buffer;
  387. }
  388. /// <summary>
  389. /// Generate a random value that will take exactly "encodedSize" bytes when varint-encoded.
  390. /// </summary>
  391. public static ulong RandomUnsignedVarint(Random random, int encodedSize, bool fitsIn32Bits)
  392. {
  393. Span<byte> randomBytesBuffer = stackalloc byte[8];
  394. if (encodedSize < 1 || encodedSize > 10 || (fitsIn32Bits && encodedSize > 5))
  395. {
  396. throw new ArgumentException("Illegal encodedSize value requested", nameof(encodedSize));
  397. }
  398. const int bitsPerByte = 7;
  399. ulong result = 0;
  400. while (true)
  401. {
  402. random.NextBytes(randomBytesBuffer);
  403. ulong randomValue = BinaryPrimitives.ReadUInt64LittleEndian(randomBytesBuffer);
  404. // only use the number of random bits we need
  405. ulong bitmask = encodedSize < 10 ? ((1UL << (encodedSize * bitsPerByte)) - 1) : ulong.MaxValue;
  406. result = randomValue & bitmask;
  407. if (fitsIn32Bits)
  408. {
  409. // make sure the resulting value is representable by a uint.
  410. result &= uint.MaxValue;
  411. }
  412. if (encodedSize == 10)
  413. {
  414. // for 10-byte values the highest bit always needs to be set (7*9=63)
  415. result |= ulong.MaxValue;
  416. break;
  417. }
  418. // some random values won't require the full "encodedSize" bytes, check that at least
  419. // one of the top 7 bits is set. Retrying is fine since it only happens rarely
  420. if (encodedSize == 1 || (result & (0x7FUL << ((encodedSize - 1) * bitsPerByte))) != 0)
  421. {
  422. break;
  423. }
  424. }
  425. return result;
  426. }
  427. private static byte[] CreateBufferWithStrings(int valueCount, int encodedSize, int paddingValueCount)
  428. {
  429. var str = CreateStringWithEncodedSize(encodedSize);
  430. MemoryStream ms = new MemoryStream();
  431. CodedOutputStream cos = new CodedOutputStream(ms);
  432. for (int i = 0; i < valueCount + paddingValueCount; i++)
  433. {
  434. cos.WriteString(str);
  435. }
  436. cos.Flush();
  437. var buffer = ms.ToArray();
  438. if (buffer.Length != encodedSize * (valueCount + paddingValueCount))
  439. {
  440. throw new InvalidOperationException($"Unexpected output buffer length {buffer.Length}");
  441. }
  442. return buffer;
  443. }
  444. public static string CreateStringWithEncodedSize(int encodedSize)
  445. {
  446. var str = new string('a', encodedSize);
  447. while (CodedOutputStream.ComputeStringSize(str) > encodedSize)
  448. {
  449. str = str.Substring(1);
  450. }
  451. if (CodedOutputStream.ComputeStringSize(str) != encodedSize)
  452. {
  453. throw new InvalidOperationException($"Generated string with wrong encodedSize");
  454. }
  455. return str;
  456. }
  457. public static string CreateNonAsciiStringWithEncodedSize(int encodedSize)
  458. {
  459. if (encodedSize < 3)
  460. {
  461. throw new ArgumentException("Illegal encoded size for a string with non-ascii chars.");
  462. }
  463. var twoByteChar = '\u00DC'; // U-umlaut, UTF8 encoding has 2 bytes
  464. var str = new string(twoByteChar, encodedSize / 2);
  465. while (CodedOutputStream.ComputeStringSize(str) > encodedSize)
  466. {
  467. str = str.Substring(1);
  468. }
  469. // add padding of ascii characters to reach the desired encoded size.
  470. while (CodedOutputStream.ComputeStringSize(str) < encodedSize)
  471. {
  472. str += 'a';
  473. }
  474. // Note that for a few specific encodedSize values, it might be impossible to generate
  475. // the string with the desired encodedSize using the algorithm above. For testing purposes, checking that
  476. // the encoded size we got is actually correct is good enough.
  477. if (CodedOutputStream.ComputeStringSize(str) != encodedSize)
  478. {
  479. throw new InvalidOperationException($"Generated string with wrong encodedSize");
  480. }
  481. return str;
  482. }
  483. }
  484. }