Prechádzať zdrojové kódy

Micro-optimisations around varints and strings.

Jon Skeet 17 rokov pred
rodič
commit
38da52d349

+ 52 - 7
csharp/ProtocolBuffers/CodedInputStream.cs

@@ -356,14 +356,15 @@ namespace Google.ProtocolBuffers {
     #endregion
 
     #region Underlying reading primitives
+
     /// <summary>
-    /// Read a raw Varint from the stream.  If larger than 32 bits, discard the upper bits.
+    /// Same code as ReadRawVarint32, but read each byte individually, checking for
+    /// buffer overflow.
     /// </summary>
-    /// <returns></returns>
-    public uint ReadRawVarint32() {
+    private uint SlowReadRawVarint32() {
       int tmp = ReadRawByte();
       if (tmp < 128) {
-        return (uint) tmp;
+        return (uint)tmp;
       }
       int result = tmp & 0x7f;
       if ((tmp = ReadRawByte()) < 128) {
@@ -382,14 +383,59 @@ namespace Google.ProtocolBuffers {
             if (tmp >= 128) {
               // Discard upper 32 bits.
               for (int i = 0; i < 5; i++) {
-                if (ReadRawByte() < 128) return (uint) result;
+                if (ReadRawByte() < 128) return (uint)result;
+              }
+              throw InvalidProtocolBufferException.MalformedVarint();
+            }
+          }
+        }
+      }
+      return (uint)result;
+    }
+
+    /// <summary>
+    /// Read a raw Varint from the stream.  If larger than 32 bits, discard the upper bits.
+    /// This method is optimised for the case where we've got lots of data in the buffer.
+    /// That means we can check the size just once, then just read directly from the buffer
+    /// without constant rechecking of the buffer length.
+    /// </summary>
+    public uint ReadRawVarint32() {
+      if (bufferPos + 5 > bufferSize) {
+        return SlowReadRawVarint32();
+      }
+
+      int tmp = buffer[bufferPos++];
+      if (tmp < 128) {
+        return (uint)tmp;
+      }
+      int result = tmp & 0x7f;
+      if ((tmp = buffer[bufferPos++]) < 128) {
+        result |= tmp << 7;
+      } else {
+        result |= (tmp & 0x7f) << 7;
+        if ((tmp = buffer[bufferPos++]) < 128) {
+          result |= tmp << 14;
+        } else {
+          result |= (tmp & 0x7f) << 14;
+          if ((tmp = buffer[bufferPos++]) < 128) {
+            result |= tmp << 21;
+          } else {
+            result |= (tmp & 0x7f) << 21;
+            result |= (tmp = buffer[bufferPos++]) << 28;
+            if (tmp >= 128) {
+              // Discard upper 32 bits.
+              // Note that this has to use ReadRawByte() as we only ensure we've
+              // got at least 5 bytes at the start of the method. This lets us
+              // use the fast path in more cases, and we rarely hit this section of code.
+              for (int i = 0; i < 5; i++) {
+                if (ReadRawByte() < 128) return (uint)result;
               }
               throw InvalidProtocolBufferException.MalformedVarint();
             }
           }
         }
       }
-      return (uint) result;
+      return (uint)result;
     }
 
     /// <summary>
@@ -571,7 +617,6 @@ namespace Google.ProtocolBuffers {
       bufferPos = 0;
       bufferSize = (input == null) ? 0 : input.Read(buffer, 0, buffer.Length);
       if (bufferSize == 0) {
-        bufferSize = 0;
         if (mustSucceed) {
           throw InvalidProtocolBufferException.TruncatedMessage();
         } else {

+ 34 - 10
csharp/ProtocolBuffers/CodedOutputStream.cs

@@ -172,15 +172,17 @@ namespace Google.ProtocolBuffers {
     /// </summary>
     public void WriteString(int fieldNumber, string value) {
       WriteTag(fieldNumber, WireFormat.WireType.LengthDelimited);
-      // TODO(jonskeet): Optimise this if possible
-      // Unfortunately there does not appear to be any way to tell Java to encode
-      // UTF-8 directly into our buffer, so we have to let it create its own byte
-      // array and then copy. In .NET we can do the same thing very easily,
-      // so we don't need to worry about only writing one buffer at a time.
-      // We can optimise later.
-      byte[] bytes = Encoding.UTF8.GetBytes(value);
-      WriteRawVarint32((uint)bytes.Length);
-      WriteRawBytes(bytes);
+      // Optimise the case where we have enough space to write
+      // the string directly to the buffer, which should be common.
+      int length = Encoding.UTF8.GetByteCount(value);
+      WriteRawVarint32((uint) length);
+      if (limit - position >= length) {
+        Encoding.UTF8.GetBytes(value, 0, value.Length, buffer, position);
+        position += length;
+      } else {
+        byte[] bytes = Encoding.UTF8.GetBytes(value);
+        WriteRawBytes(bytes);
+      }
     }
 
     /// <summary>
@@ -290,7 +292,7 @@ namespace Google.ProtocolBuffers {
       WriteRawVarint32(WireFormat.MakeTag(fieldNumber, type));
     }
 
-    public void WriteRawVarint32(uint value) {
+    private void SlowWriteRawVarint32(uint value) {
       while (true) {
         if ((value & ~0x7F) == 0) {
           WriteRawByte(value);
@@ -302,6 +304,28 @@ namespace Google.ProtocolBuffers {
       }
     }
 
+    /// <summary>
+    /// Writes a 32 bit value as a varint. The fast route is taken when
+    /// there's enough buffer space left to whizz through without checking
+    /// for each byte; otherwise, we resort to calling WriteRawByte each time.
+    /// </summary>
+    public void WriteRawVarint32(uint value) {
+      if (position + 5 > limit) {
+        SlowWriteRawVarint32(value);
+        return;
+      }
+
+      while (true) {
+        if ((value & ~0x7F) == 0) {
+          buffer[position++] = (byte) value;
+          return;
+        } else {
+          buffer[position++] = (byte)((value & 0x7F) | 0x80);
+          value >>= 7;
+        }
+      }
+    }
+
     public void WriteRawVarint64(ulong value) {
       while (true) {
         if ((value & ~0x7FUL) == 0) {