123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536 |
- #region Copyright notice and license
- // Protocol Buffers - Google's data interchange format
- // Copyright 2019 Google Inc. All rights reserved.
- // https://github.com/protocolbuffers/protobuf
- //
- // Redistribution and use in source and binary forms, with or without
- // modification, are permitted provided that the following conditions are
- // met:
- //
- // * Redistributions of source code must retain the above copyright
- // notice, this list of conditions and the following disclaimer.
- // * Redistributions in binary form must reproduce the above
- // copyright notice, this list of conditions and the following disclaimer
- // in the documentation and/or other materials provided with the
- // distribution.
- // * Neither the name of Google Inc. nor the names of its
- // contributors may be used to endorse or promote products derived from
- // this software without specific prior written permission.
- //
- // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- #endregion
- using BenchmarkDotNet.Attributes;
- using System;
- using System.Buffers.Binary;
- using System.Collections.Generic;
- using System.IO;
- using System.Buffers;
- namespace Google.Protobuf.Benchmarks
- {
- /// <summary>
- /// Benchmarks throughput when parsing raw primitives.
- /// </summary>
- [MemoryDiagnoser]
- public class ParseRawPrimitivesBenchmark
- {
- // key is the encodedSize of varint values
- Dictionary<int, byte[]> varintInputBuffers;
- byte[] doubleInputBuffer;
- byte[] floatInputBuffer;
- byte[] fixedIntInputBuffer;
- // key is the encodedSize of string values
- Dictionary<int, byte[]> stringInputBuffers;
- Dictionary<int, ReadOnlySequence<byte>> stringInputBuffersSegmented;
- Random random = new Random(417384220); // random but deterministic seed
- public IEnumerable<int> StringEncodedSizes => new[] { 1, 4, 10, 105, 10080 };
- public IEnumerable<int> StringSegmentedEncodedSizes => new[] { 105, 10080 };
- [GlobalSetup]
- public void GlobalSetup()
- {
- // add some extra values that we won't read just to make sure we are far enough from the end of the buffer
- // which allows the parser fastpath to always kick in.
- const int paddingValueCount = 100;
- varintInputBuffers = new Dictionary<int, byte[]>();
- for (int encodedSize = 1; encodedSize <= 10; encodedSize++)
- {
- byte[] buffer = CreateBufferWithRandomVarints(random, BytesToParse / encodedSize, encodedSize, paddingValueCount);
- varintInputBuffers.Add(encodedSize, buffer);
- }
- doubleInputBuffer = CreateBufferWithRandomDoubles(random, BytesToParse / sizeof(double), paddingValueCount);
- floatInputBuffer = CreateBufferWithRandomFloats(random, BytesToParse / sizeof(float), paddingValueCount);
- fixedIntInputBuffer = CreateBufferWithRandomData(random, BytesToParse / sizeof(long), sizeof(long), paddingValueCount);
- stringInputBuffers = new Dictionary<int, byte[]>();
- foreach (var encodedSize in StringEncodedSizes)
- {
- byte[] buffer = CreateBufferWithStrings(BytesToParse / encodedSize, encodedSize, encodedSize < 10 ? 10 : 1 );
- stringInputBuffers.Add(encodedSize, buffer);
- }
- stringInputBuffersSegmented = new Dictionary<int, ReadOnlySequence<byte>>();
- foreach (var encodedSize in StringSegmentedEncodedSizes)
- {
- byte[] buffer = CreateBufferWithStrings(BytesToParse / encodedSize, encodedSize, encodedSize < 10 ? 10 : 1);
- stringInputBuffersSegmented.Add(encodedSize, ReadOnlySequenceFactory.CreateWithContent(buffer, segmentSize: 128, addEmptySegmentDelimiters: false));
- }
- }
- // Total number of bytes that each benchmark will parse.
- // Measuring the time taken to parse buffer of given size makes it easier to compare parsing speed for different
- // types and makes it easy to calculate the througput (in MB/s)
- // 10800 bytes is chosen because it is divisible by all possible encoded sizes for all primitive types {1..10}
- [Params(10080)]
- public int BytesToParse { get; set; }
- [Benchmark]
- [Arguments(1)]
- [Arguments(2)]
- [Arguments(3)]
- [Arguments(4)]
- [Arguments(5)]
- public int ParseRawVarint32_CodedInputStream(int encodedSize)
- {
- CodedInputStream cis = new CodedInputStream(varintInputBuffers[encodedSize]);
- int sum = 0;
- for (int i = 0; i < BytesToParse / encodedSize; i++)
- {
- sum += cis.ReadInt32();
- }
- return sum;
- }
- [Benchmark]
- [Arguments(1)]
- [Arguments(2)]
- [Arguments(3)]
- [Arguments(4)]
- [Arguments(5)]
- public int ParseRawVarint32_ParseContext(int encodedSize)
- {
- InitializeParseContext(varintInputBuffers[encodedSize], out ParseContext ctx);
- int sum = 0;
- for (int i = 0; i < BytesToParse / encodedSize; i++)
- {
- sum += ctx.ReadInt32();
- }
- return sum;
- }
- [Benchmark]
- [Arguments(1)]
- [Arguments(2)]
- [Arguments(3)]
- [Arguments(4)]
- [Arguments(5)]
- [Arguments(6)]
- [Arguments(7)]
- [Arguments(8)]
- [Arguments(9)]
- [Arguments(10)]
- public long ParseRawVarint64_CodedInputStream(int encodedSize)
- {
- CodedInputStream cis = new CodedInputStream(varintInputBuffers[encodedSize]);
- long sum = 0;
- for (int i = 0; i < BytesToParse / encodedSize; i++)
- {
- sum += cis.ReadInt64();
- }
- return sum;
- }
- [Benchmark]
- [Arguments(1)]
- [Arguments(2)]
- [Arguments(3)]
- [Arguments(4)]
- [Arguments(5)]
- [Arguments(6)]
- [Arguments(7)]
- [Arguments(8)]
- [Arguments(9)]
- [Arguments(10)]
- public long ParseRawVarint64_ParseContext(int encodedSize)
- {
- InitializeParseContext(varintInputBuffers[encodedSize], out ParseContext ctx);
- long sum = 0;
- for (int i = 0; i < BytesToParse / encodedSize; i++)
- {
- sum += ctx.ReadInt64();
- }
- return sum;
- }
- [Benchmark]
- public uint ParseFixed32_CodedInputStream()
- {
- const int encodedSize = sizeof(uint);
- CodedInputStream cis = new CodedInputStream(fixedIntInputBuffer);
- uint sum = 0;
- for (uint i = 0; i < BytesToParse / encodedSize; i++)
- {
- sum += cis.ReadFixed32();
- }
- return sum;
- }
- [Benchmark]
- public uint ParseFixed32_ParseContext()
- {
- const int encodedSize = sizeof(uint);
- InitializeParseContext(fixedIntInputBuffer, out ParseContext ctx);
- uint sum = 0;
- for (uint i = 0; i < BytesToParse / encodedSize; i++)
- {
- sum += ctx.ReadFixed32();
- }
- return sum;
- }
- [Benchmark]
- public ulong ParseFixed64_CodedInputStream()
- {
- const int encodedSize = sizeof(ulong);
- CodedInputStream cis = new CodedInputStream(fixedIntInputBuffer);
- ulong sum = 0;
- for (int i = 0; i < BytesToParse / encodedSize; i++)
- {
- sum += cis.ReadFixed64();
- }
- return sum;
- }
- [Benchmark]
- public ulong ParseFixed64_ParseContext()
- {
- const int encodedSize = sizeof(ulong);
- InitializeParseContext(fixedIntInputBuffer, out ParseContext ctx);
- ulong sum = 0;
- for (int i = 0; i < BytesToParse / encodedSize; i++)
- {
- sum += ctx.ReadFixed64();
- }
- return sum;
- }
- [Benchmark]
- public float ParseRawFloat_CodedInputStream()
- {
- const int encodedSize = sizeof(float);
- CodedInputStream cis = new CodedInputStream(floatInputBuffer);
- float sum = 0;
- for (int i = 0; i < BytesToParse / encodedSize; i++)
- {
- sum += cis.ReadFloat();
- }
- return sum;
- }
- [Benchmark]
- public float ParseRawFloat_ParseContext()
- {
- const int encodedSize = sizeof(float);
- InitializeParseContext(floatInputBuffer, out ParseContext ctx);
- float sum = 0;
- for (int i = 0; i < BytesToParse / encodedSize; i++)
- {
- sum += ctx.ReadFloat();
- }
- return sum;
- }
- [Benchmark]
- public double ParseRawDouble_CodedInputStream()
- {
- const int encodedSize = sizeof(double);
- CodedInputStream cis = new CodedInputStream(doubleInputBuffer);
- double sum = 0;
- for (int i = 0; i < BytesToParse / encodedSize; i++)
- {
- sum += cis.ReadDouble();
- }
- return sum;
- }
- [Benchmark]
- public double ParseRawDouble_ParseContext()
- {
- const int encodedSize = sizeof(double);
- InitializeParseContext(doubleInputBuffer, out ParseContext ctx);
- double sum = 0;
- for (int i = 0; i < BytesToParse / encodedSize; i++)
- {
- sum += ctx.ReadDouble();
- }
- return sum;
- }
- [Benchmark]
- [ArgumentsSource(nameof(StringEncodedSizes))]
- public int ParseString_CodedInputStream(int encodedSize)
- {
- CodedInputStream cis = new CodedInputStream(stringInputBuffers[encodedSize]);
- int sum = 0;
- for (int i = 0; i < BytesToParse / encodedSize; i++)
- {
- sum += cis.ReadString().Length;
- }
- return sum;
- }
- [Benchmark]
- [ArgumentsSource(nameof(StringEncodedSizes))]
- public int ParseString_ParseContext(int encodedSize)
- {
- InitializeParseContext(stringInputBuffers[encodedSize], out ParseContext ctx);
- int sum = 0;
- for (int i = 0; i < BytesToParse / encodedSize; i++)
- {
- sum += ctx.ReadString().Length;
- }
- return sum;
- }
- [Benchmark]
- [ArgumentsSource(nameof(StringSegmentedEncodedSizes))]
- public int ParseString_ParseContext_MultipleSegments(int encodedSize)
- {
- InitializeParseContext(stringInputBuffersSegmented[encodedSize], out ParseContext ctx);
- int sum = 0;
- for (int i = 0; i < BytesToParse / encodedSize; i++)
- {
- sum += ctx.ReadString().Length;
- }
- return sum;
- }
- [Benchmark]
- [ArgumentsSource(nameof(StringEncodedSizes))]
- public int ParseBytes_CodedInputStream(int encodedSize)
- {
- CodedInputStream cis = new CodedInputStream(stringInputBuffers[encodedSize]);
- int sum = 0;
- for (int i = 0; i < BytesToParse / encodedSize; i++)
- {
- sum += cis.ReadBytes().Length;
- }
- return sum;
- }
- [Benchmark]
- [ArgumentsSource(nameof(StringEncodedSizes))]
- public int ParseBytes_ParseContext(int encodedSize)
- {
- InitializeParseContext(stringInputBuffers[encodedSize], out ParseContext ctx);
- int sum = 0;
- for (int i = 0; i < BytesToParse / encodedSize; i++)
- {
- sum += ctx.ReadBytes().Length;
- }
- return sum;
- }
- [Benchmark]
- [ArgumentsSource(nameof(StringSegmentedEncodedSizes))]
- public int ParseBytes_ParseContext_MultipleSegments(int encodedSize)
- {
- InitializeParseContext(stringInputBuffersSegmented[encodedSize], out ParseContext ctx);
- int sum = 0;
- for (int i = 0; i < BytesToParse / encodedSize; i++)
- {
- sum += ctx.ReadBytes().Length;
- }
- return sum;
- }
- private static void InitializeParseContext(byte[] buffer, out ParseContext ctx)
- {
- ParseContext.Initialize(new ReadOnlySequence<byte>(buffer), out ctx);
- }
- private static void InitializeParseContext(ReadOnlySequence<byte> buffer, out ParseContext ctx)
- {
- ParseContext.Initialize(buffer, out ctx);
- }
- private static byte[] CreateBufferWithRandomVarints(Random random, int valueCount, int encodedSize, int paddingValueCount)
- {
- MemoryStream ms = new MemoryStream();
- CodedOutputStream cos = new CodedOutputStream(ms);
- for (int i = 0; i < valueCount + paddingValueCount; i++)
- {
- cos.WriteUInt64(RandomUnsignedVarint(random, encodedSize, false));
- }
- cos.Flush();
- var buffer = ms.ToArray();
-
- if (buffer.Length != encodedSize * (valueCount + paddingValueCount))
- {
- throw new InvalidOperationException($"Unexpected output buffer length {buffer.Length}");
- }
- return buffer;
- }
- private static byte[] CreateBufferWithRandomFloats(Random random, int valueCount, int paddingValueCount)
- {
- MemoryStream ms = new MemoryStream();
- CodedOutputStream cos = new CodedOutputStream(ms);
- for (int i = 0; i < valueCount + paddingValueCount; i++)
- {
- cos.WriteFloat((float)random.NextDouble());
- }
- cos.Flush();
- var buffer = ms.ToArray();
- return buffer;
- }
- private static byte[] CreateBufferWithRandomDoubles(Random random, int valueCount, int paddingValueCount)
- {
- MemoryStream ms = new MemoryStream();
- CodedOutputStream cos = new CodedOutputStream(ms);
- for (int i = 0; i < valueCount + paddingValueCount; i++)
- {
- cos.WriteDouble(random.NextDouble());
- }
- cos.Flush();
- var buffer = ms.ToArray();
- return buffer;
- }
- private static byte[] CreateBufferWithRandomData(Random random, int valueCount, int encodedSize, int paddingValueCount)
- {
- int bufferSize = (valueCount + paddingValueCount) * encodedSize;
- byte[] buffer = new byte[bufferSize];
- random.NextBytes(buffer);
- return buffer;
- }
- /// <summary>
- /// Generate a random value that will take exactly "encodedSize" bytes when varint-encoded.
- /// </summary>
- public static ulong RandomUnsignedVarint(Random random, int encodedSize, bool fitsIn32Bits)
- {
- Span<byte> randomBytesBuffer = stackalloc byte[8];
- if (encodedSize < 1 || encodedSize > 10 || (fitsIn32Bits && encodedSize > 5))
- {
- throw new ArgumentException("Illegal encodedSize value requested", nameof(encodedSize));
- }
- const int bitsPerByte = 7;
-
- ulong result = 0;
- while (true)
- {
- random.NextBytes(randomBytesBuffer);
- ulong randomValue = BinaryPrimitives.ReadUInt64LittleEndian(randomBytesBuffer);
- // only use the number of random bits we need
- ulong bitmask = encodedSize < 10 ? ((1UL << (encodedSize * bitsPerByte)) - 1) : ulong.MaxValue;
- result = randomValue & bitmask;
- if (fitsIn32Bits)
- {
- // make sure the resulting value is representable by a uint.
- result &= uint.MaxValue;
- }
- if (encodedSize == 10)
- {
- // for 10-byte values the highest bit always needs to be set (7*9=63)
- result |= ulong.MaxValue;
- break;
- }
- // some random values won't require the full "encodedSize" bytes, check that at least
- // one of the top 7 bits is set. Retrying is fine since it only happens rarely
- if (encodedSize == 1 || (result & (0x7FUL << ((encodedSize - 1) * bitsPerByte))) != 0)
- {
- break;
- }
- }
- return result;
- }
- private static byte[] CreateBufferWithStrings(int valueCount, int encodedSize, int paddingValueCount)
- {
- var str = CreateStringWithEncodedSize(encodedSize);
- MemoryStream ms = new MemoryStream();
- CodedOutputStream cos = new CodedOutputStream(ms);
- for (int i = 0; i < valueCount + paddingValueCount; i++)
- {
- cos.WriteString(str);
- }
- cos.Flush();
- var buffer = ms.ToArray();
- if (buffer.Length != encodedSize * (valueCount + paddingValueCount))
- {
- throw new InvalidOperationException($"Unexpected output buffer length {buffer.Length}");
- }
- return buffer;
- }
- public static string CreateStringWithEncodedSize(int encodedSize)
- {
- var str = new string('a', encodedSize);
- while (CodedOutputStream.ComputeStringSize(str) > encodedSize)
- {
- str = str.Substring(1);
- }
- if (CodedOutputStream.ComputeStringSize(str) != encodedSize)
- {
- throw new InvalidOperationException($"Generated string with wrong encodedSize");
- }
- return str;
- }
- public static string CreateNonAsciiStringWithEncodedSize(int encodedSize)
- {
- if (encodedSize < 3)
- {
- throw new ArgumentException("Illegal encoded size for a string with non-ascii chars.");
- }
- var twoByteChar = '\u00DC'; // U-umlaut, UTF8 encoding has 2 bytes
- var str = new string(twoByteChar, encodedSize / 2);
- while (CodedOutputStream.ComputeStringSize(str) > encodedSize)
- {
- str = str.Substring(1);
- }
- // add padding of ascii characters to reach the desired encoded size.
- while (CodedOutputStream.ComputeStringSize(str) < encodedSize)
- {
- str += 'a';
- }
- // Note that for a few specific encodedSize values, it might be impossible to generate
- // the string with the desired encodedSize using the algorithm above. For testing purposes, checking that
- // the encoded size we got is actually correct is good enough.
- if (CodedOutputStream.ComputeStringSize(str) != encodedSize)
- {
- throw new InvalidOperationException($"Generated string with wrong encodedSize");
- }
- return str;
- }
- }
- }
|