Selaa lähdekoodia

Merge pull request #8149 from JamesNK/jamesnk/writestring-small

Optimize writing small strings
Jan Tattermusch 4 vuotta sitten
vanhempi
commit
4140735f05

+ 24 - 0
csharp/src/Google.Protobuf.Test/CodedOutputStreamTest.cs

@@ -35,6 +35,7 @@ using System.IO;
 using Google.Protobuf.TestProtos;
 using Google.Protobuf.Buffers;
 using NUnit.Framework;
+using System.Text;
 
 namespace Google.Protobuf
 {
@@ -516,5 +517,28 @@ namespace Google.Protobuf
             var stream = new CodedOutputStream(new byte[10]);
             stream.Dispose();
         }
+
+        [Test]
+        public void WriteStringsOfDifferentSizes()
+        {
+            for (int i = 1; i <= 1024; i++)
+            {
+                var buffer = new byte[4096];
+                var output = new CodedOutputStream(buffer);
+                var sb = new StringBuilder();
+                for (int j = 0; j < i; j++)
+                {
+                    sb.Append((j % 10).ToString()); // incrementing numbers, repeating
+                }
+                var s = sb.ToString();
+                output.WriteString(s);
+
+                output.Flush();
+
+                // Verify written content
+                var input = new CodedInputStream(buffer);
+                Assert.AreEqual(s, input.ReadString());
+            }
+        }
     }
 }

+ 45 - 19
csharp/src/Google.Protobuf/WritingPrimitives.cs

@@ -163,10 +163,25 @@ namespace Google.Protobuf
         /// </summary>
         public static void WriteString(ref Span<byte> buffer, ref WriterInternalState state, string value)
         {
-            // Optimise the case where we have enough space to write
-            // the string directly to the buffer, which should be common.
+            const int MaxBytesPerChar = 3;
+            const int MaxSmallStringLength = 128 / MaxBytesPerChar;
+
+            // The string is small enough that the length will always be a 1 byte varint.
+            // Also there is enough space to write length + bytes to buffer.
+            // Write string directly to the buffer, and then write length.
+            // This saves calling GetByteCount on the string. We get the string length from GetBytes.
+            if (value.Length <= MaxSmallStringLength && buffer.Length - state.position - 1 >= value.Length * MaxBytesPerChar)
+            {
+                int indexOfLengthDelimiter = state.position++;
+                buffer[indexOfLengthDelimiter] = (byte)WriteStringToBuffer(buffer, ref state, value);
+                return;
+            }
+
             int length = Utf8Encoding.GetByteCount(value);
             WriteLength(ref buffer, ref state, length);
+
+            // Optimise the case where we have enough space to write
+            // the string directly to the buffer, which should be common.
             if (buffer.Length - state.position >= length)
             {
                 if (length == value.Length) // Must be all ASCII...
@@ -179,23 +194,7 @@ namespace Google.Protobuf
                 }
                 else
                 {
-#if NETSTANDARD1_1
-                    // slowpath when Encoding.GetBytes(Char*, Int32, Byte*, Int32) is not available
-                    byte[] bytes = Utf8Encoding.GetBytes(value);
-                    WriteRawBytes(ref buffer, ref state, bytes);
-#else
-                    ReadOnlySpan<char> source = value.AsSpan();
-                    int bytesUsed;
-                    unsafe
-                    {
-                        fixed (char* sourceChars = &MemoryMarshal.GetReference(source))
-                        fixed (byte* destinationBytes = &MemoryMarshal.GetReference(buffer.Slice(state.position)))
-                        {
-                            bytesUsed = Utf8Encoding.GetBytes(sourceChars, source.Length, destinationBytes, buffer.Length);
-                        }
-                    }
-                    state.position += bytesUsed;
-#endif
+                    WriteStringToBuffer(buffer, ref state, value);
                 }
             }
             else
@@ -209,6 +208,33 @@ namespace Google.Protobuf
             }
         }
 
+        private static int WriteStringToBuffer(Span<byte> buffer, ref WriterInternalState state, string value)
+        {
+#if NETSTANDARD1_1
+            // slowpath when Encoding.GetBytes(Char*, Int32, Byte*, Int32) is not available
+            byte[] bytes = Utf8Encoding.GetBytes(value);
+            WriteRawBytes(ref buffer, ref state, bytes);
+            return bytes.Length;
+#else
+            ReadOnlySpan<char> source = value.AsSpan();
+            int bytesUsed;
+            unsafe
+            {
+                fixed (char* sourceChars = &MemoryMarshal.GetReference(source))
+                fixed (byte* destinationBytes = &MemoryMarshal.GetReference(buffer))
+                {
+                    bytesUsed = Utf8Encoding.GetBytes(
+                        sourceChars,
+                        source.Length,
+                        destinationBytes + state.position,
+                        buffer.Length - state.position);
+                }
+            }
+            state.position += bytesUsed;
+            return bytesUsed;
+#endif
+        }
+
         /// <summary>
         /// Write a byte string, without a tag, to the stream.
         /// The data is length-prefixed.