Просмотр исходного кода

add benchmark for writing of NonAsciiStrings

Jan Tattermusch 5 лет назад
Родитель
Сommit
3dad187707

+ 29 - 0
csharp/src/Google.Protobuf.Benchmarks/ParseRawPrimitivesBenchmark.cs

@@ -463,5 +463,34 @@ namespace Google.Protobuf.Benchmarks
             }
             return str;
         }
+
+        public static string CreateNonAsciiStringWithEncodedSize(int encodedSize)
+        {
+            if (encodedSize < 3)
+            {
+                throw new ArgumentException("Illegal encoded size for a string with non-ascii chars.");
+            }
+            var twoByteChar = '\u00DC';  // U-umlaut, UTF8 encoding has 2 bytes
+            var str = new string(twoByteChar, encodedSize / 2);
+            while (CodedOutputStream.ComputeStringSize(str) > encodedSize)
+            {
+                str = str.Substring(1);
+            }
+
+            // add padding of ascii characters to reach the desired encoded size.
+            while (CodedOutputStream.ComputeStringSize(str) < encodedSize)
+            {
+                str += 'a';
+            }
+
+            // Note that for a few specific encodedSize values, it might be impossible to generate
+            // the string with the desired encodedSize using the algorithm above. For testing purposes, checking that
+            // the encoded size we got is actually correct is good enough.
+            if (CodedOutputStream.ComputeStringSize(str) != encodedSize)
+            {
+                throw new InvalidOperationException($"Generated string with wrong encodedSize");
+            }
+            return str;
+        }
     }
 }

+ 53 - 0
csharp/src/Google.Protobuf.Benchmarks/WriteRawPrimitivesBenchmark.cs

@@ -56,6 +56,9 @@ namespace Google.Protobuf.Benchmarks
         // key is the encodedSize of string values
         Dictionary<int, string[]> stringValues;
 
+        // key is the encodedSize of string values
+        Dictionary<int, string[]> nonAsciiStringValues;
+
         // key is the encodedSize of string values
         Dictionary<int, ByteString[]> byteStringValues;
 
@@ -66,6 +69,8 @@ namespace Google.Protobuf.Benchmarks
 
         public IEnumerable<int> StringEncodedSizes => new[] { 1, 4, 10, 105, 10080 };
 
+        public IEnumerable<int> NonAsciiStringEncodedSizes => new[] { 4, 10, 105, 10080 };
+
         [GlobalSetup]
         public void GlobalSetup()
         {
@@ -86,12 +91,19 @@ namespace Google.Protobuf.Benchmarks
             floatValues = CreateRandomFloats(random, BytesToWrite / sizeof(float));
 
             stringValues = new Dictionary<int, string[]>();
+
             byteStringValues = new Dictionary<int, ByteString[]>();
             foreach(var encodedSize in StringEncodedSizes)
             {
                 stringValues.Add(encodedSize, CreateStrings(BytesToWrite / encodedSize, encodedSize));
                 byteStringValues.Add(encodedSize, CreateByteStrings(BytesToWrite / encodedSize, encodedSize));
             }
+
+            nonAsciiStringValues = new Dictionary<int, string[]>();
+            foreach(var encodedSize in NonAsciiStringEncodedSizes)
+            {
+                nonAsciiStringValues.Add(encodedSize, CreateNonAsciiStrings(BytesToWrite / encodedSize, encodedSize));
+            }
         }
 
         // Total number of bytes that each benchmark will write.
@@ -318,6 +330,35 @@ namespace Google.Protobuf.Benchmarks
             ctx.CheckNoSpaceLeft();
         }
 
+        [Benchmark]
+        [ArgumentsSource(nameof(NonAsciiStringEncodedSizes))]
+        public void WriteNonAsciiString_CodedOutputStream(int encodedSize)
+        {
+            var values = nonAsciiStringValues[encodedSize];
+            var cos = new CodedOutputStream(outputBuffer);
+            foreach (var value in values)
+            {
+                cos.WriteString(value);
+            }
+            cos.Flush();
+            cos.CheckNoSpaceLeft();
+        }
+
+        [Benchmark]
+        [ArgumentsSource(nameof(NonAsciiStringEncodedSizes))]
+        public void WriteNonAsciiString_WriteContext(int encodedSize)
+        {
+            var values = nonAsciiStringValues[encodedSize];
+            var span = new Span<byte>(outputBuffer);
+            WriteContext.Initialize(ref span, out WriteContext ctx);
+            foreach (var value in values)
+            {
+                ctx.WriteString(value);
+            }
+            ctx.Flush();
+            ctx.CheckNoSpaceLeft();
+        }
+
         [Benchmark]
         [ArgumentsSource(nameof(StringEncodedSizes))]
         public void WriteBytes_CodedOutputStream(int encodedSize)
@@ -399,6 +440,18 @@ namespace Google.Protobuf.Benchmarks
             return result;
         }
 
+        private static string[] CreateNonAsciiStrings(int valueCount, int encodedSize)
+        {
+            var str = ParseRawPrimitivesBenchmark.CreateNonAsciiStringWithEncodedSize(encodedSize);
+
+            var result = new string[valueCount];
+            for (int i = 0; i < valueCount; i++)
+            {
+                result[i] = str;
+            }
+            return result;
+        }
+
         private static ByteString[] CreateByteStrings(int valueCount, int encodedSize)
         {
             var str = ParseRawPrimitivesBenchmark.CreateStringWithEncodedSize(encodedSize);