ParsingPrimitives.cs 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815
  1. #region Copyright notice and license
  2. // Protocol Buffers - Google's data interchange format
  3. // Copyright 2008 Google Inc. All rights reserved.
  4. // https://developers.google.com/protocol-buffers/
  5. //
  6. // Redistribution and use in source and binary forms, with or without
  7. // modification, are permitted provided that the following conditions are
  8. // met:
  9. //
  10. // * Redistributions of source code must retain the above copyright
  11. // notice, this list of conditions and the following disclaimer.
  12. // * Redistributions in binary form must reproduce the above
  13. // copyright notice, this list of conditions and the following disclaimer
  14. // in the documentation and/or other materials provided with the
  15. // distribution.
  16. // * Neither the name of Google Inc. nor the names of its
  17. // contributors may be used to endorse or promote products derived from
  18. // this software without specific prior written permission.
  19. //
  20. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. #endregion
  32. using System;
  33. using System.Buffers;
  34. using System.Buffers.Binary;
  35. using System.Collections.Generic;
  36. using System.Diagnostics;
  37. using System.IO;
  38. using System.Runtime.CompilerServices;
  39. using System.Runtime.InteropServices;
  40. using System.Security;
  41. using System.Text;
  42. using Google.Protobuf.Collections;
  43. namespace Google.Protobuf
  44. {
  45. /// <summary>
  46. /// Primitives for parsing protobuf wire format.
  47. /// </summary>
  48. [SecuritySafeCritical]
  49. internal static class ParsingPrimitives
  50. {
  51. private const int StackallocThreshold = 256;
  52. /// <summary>
  53. /// Reads a length for length-delimited data.
  54. /// </summary>
  55. /// <remarks>
  56. /// This is internally just reading a varint, but this method exists
  57. /// to make the calling code clearer.
  58. /// </remarks>
  59. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  60. public static int ParseLength(ref ReadOnlySpan<byte> buffer, ref ParserInternalState state)
  61. {
  62. return (int)ParseRawVarint32(ref buffer, ref state);
  63. }
  64. /// <summary>
  65. /// Parses the next tag.
  66. /// If the end of logical stream was reached, an invalid tag of 0 is returned.
  67. /// </summary>
  68. public static uint ParseTag(ref ReadOnlySpan<byte> buffer, ref ParserInternalState state)
  69. {
  70. // The "nextTag" logic is there only as an optimization for reading non-packed repeated / map
  71. // fields and is strictly speaking not necessary.
  72. // TODO(jtattermusch): look into simplifying the ParseTag logic.
  73. if (state.hasNextTag)
  74. {
  75. state.lastTag = state.nextTag;
  76. state.hasNextTag = false;
  77. return state.lastTag;
  78. }
  79. // Optimize for the incredibly common case of having at least two bytes left in the buffer,
  80. // and those two bytes being enough to get the tag. This will be true for fields up to 4095.
  81. if (state.bufferPos + 2 <= state.bufferSize)
  82. {
  83. int tmp = buffer[state.bufferPos++];
  84. if (tmp < 128)
  85. {
  86. state.lastTag = (uint)tmp;
  87. }
  88. else
  89. {
  90. int result = tmp & 0x7f;
  91. if ((tmp = buffer[state.bufferPos++]) < 128)
  92. {
  93. result |= tmp << 7;
  94. state.lastTag = (uint) result;
  95. }
  96. else
  97. {
  98. // Nope, rewind and go the potentially slow route.
  99. state.bufferPos -= 2;
  100. state.lastTag = ParsingPrimitives.ParseRawVarint32(ref buffer, ref state);
  101. }
  102. }
  103. }
  104. else
  105. {
  106. if (SegmentedBufferHelper.IsAtEnd(ref buffer, ref state))
  107. {
  108. state.lastTag = 0;
  109. return 0;
  110. }
  111. state.lastTag = ParsingPrimitives.ParseRawVarint32(ref buffer, ref state);
  112. }
  113. if (WireFormat.GetTagFieldNumber(state.lastTag) == 0)
  114. {
  115. // If we actually read a tag with a field of 0, that's not a valid tag.
  116. throw InvalidProtocolBufferException.InvalidTag();
  117. }
  118. return state.lastTag;
  119. }
  120. /// <summary>
  121. /// Peeks at the next tag in the stream. If it matches <paramref name="tag"/>,
  122. /// the tag is consumed and the method returns <c>true</c>; otherwise, the
  123. /// stream is left in the original position and the method returns <c>false</c>.
  124. /// </summary>
  125. public static bool MaybeConsumeTag(ref ReadOnlySpan<byte> buffer, ref ParserInternalState state, uint tag)
  126. {
  127. if (PeekTag(ref buffer, ref state) == tag)
  128. {
  129. state.hasNextTag = false;
  130. return true;
  131. }
  132. return false;
  133. }
  134. /// <summary>
  135. /// Peeks at the next field tag. This is like calling <see cref="ParseTag"/>, but the
  136. /// tag is not consumed. (So a subsequent call to <see cref="ParseTag"/> will return the
  137. /// same value.)
  138. /// </summary>
  139. public static uint PeekTag(ref ReadOnlySpan<byte> buffer, ref ParserInternalState state)
  140. {
  141. if (state.hasNextTag)
  142. {
  143. return state.nextTag;
  144. }
  145. uint savedLast = state.lastTag;
  146. state.nextTag = ParseTag(ref buffer, ref state);
  147. state.hasNextTag = true;
  148. state.lastTag = savedLast; // Undo the side effect of ReadTag
  149. return state.nextTag;
  150. }
  151. /// <summary>
  152. /// Parses a raw varint.
  153. /// </summary>
  154. public static ulong ParseRawVarint64(ref ReadOnlySpan<byte> buffer, ref ParserInternalState state)
  155. {
  156. if (state.bufferPos + 10 > state.bufferSize)
  157. {
  158. return ParseRawVarint64SlowPath(ref buffer, ref state);
  159. }
  160. ulong result = buffer[state.bufferPos++];
  161. if (result < 128)
  162. {
  163. return result;
  164. }
  165. result &= 0x7f;
  166. int shift = 7;
  167. do
  168. {
  169. byte b = buffer[state.bufferPos++];
  170. result |= (ulong)(b & 0x7F) << shift;
  171. if (b < 0x80)
  172. {
  173. return result;
  174. }
  175. shift += 7;
  176. }
  177. while (shift < 64);
  178. throw InvalidProtocolBufferException.MalformedVarint();
  179. }
  180. private static ulong ParseRawVarint64SlowPath(ref ReadOnlySpan<byte> buffer, ref ParserInternalState state)
  181. {
  182. int shift = 0;
  183. ulong result = 0;
  184. do
  185. {
  186. byte b = ReadRawByte(ref buffer, ref state);
  187. result |= (ulong)(b & 0x7F) << shift;
  188. if (b < 0x80)
  189. {
  190. return result;
  191. }
  192. shift += 7;
  193. }
  194. while (shift < 64);
  195. throw InvalidProtocolBufferException.MalformedVarint();
  196. }
  197. /// <summary>
  198. /// Parses a raw Varint. If larger than 32 bits, discard the upper bits.
  199. /// This method is optimised for the case where we've got lots of data in the buffer.
  200. /// That means we can check the size just once, then just read directly from the buffer
  201. /// without constant rechecking of the buffer length.
  202. /// </summary>
  203. public static uint ParseRawVarint32(ref ReadOnlySpan<byte> buffer, ref ParserInternalState state)
  204. {
  205. if (state.bufferPos + 5 > state.bufferSize)
  206. {
  207. return ParseRawVarint32SlowPath(ref buffer, ref state);
  208. }
  209. int tmp = buffer[state.bufferPos++];
  210. if (tmp < 128)
  211. {
  212. return (uint)tmp;
  213. }
  214. int result = tmp & 0x7f;
  215. if ((tmp = buffer[state.bufferPos++]) < 128)
  216. {
  217. result |= tmp << 7;
  218. }
  219. else
  220. {
  221. result |= (tmp & 0x7f) << 7;
  222. if ((tmp = buffer[state.bufferPos++]) < 128)
  223. {
  224. result |= tmp << 14;
  225. }
  226. else
  227. {
  228. result |= (tmp & 0x7f) << 14;
  229. if ((tmp = buffer[state.bufferPos++]) < 128)
  230. {
  231. result |= tmp << 21;
  232. }
  233. else
  234. {
  235. result |= (tmp & 0x7f) << 21;
  236. result |= (tmp = buffer[state.bufferPos++]) << 28;
  237. if (tmp >= 128)
  238. {
  239. // Discard upper 32 bits.
  240. // Note that this has to use ReadRawByte() as we only ensure we've
  241. // got at least 5 bytes at the start of the method. This lets us
  242. // use the fast path in more cases, and we rarely hit this section of code.
  243. for (int i = 0; i < 5; i++)
  244. {
  245. if (ReadRawByte(ref buffer, ref state) < 128)
  246. {
  247. return (uint) result;
  248. }
  249. }
  250. throw InvalidProtocolBufferException.MalformedVarint();
  251. }
  252. }
  253. }
  254. }
  255. return (uint)result;
  256. }
  257. private static uint ParseRawVarint32SlowPath(ref ReadOnlySpan<byte> buffer, ref ParserInternalState state)
  258. {
  259. int tmp = ReadRawByte(ref buffer, ref state);
  260. if (tmp < 128)
  261. {
  262. return (uint) tmp;
  263. }
  264. int result = tmp & 0x7f;
  265. if ((tmp = ReadRawByte(ref buffer, ref state)) < 128)
  266. {
  267. result |= tmp << 7;
  268. }
  269. else
  270. {
  271. result |= (tmp & 0x7f) << 7;
  272. if ((tmp = ReadRawByte(ref buffer, ref state)) < 128)
  273. {
  274. result |= tmp << 14;
  275. }
  276. else
  277. {
  278. result |= (tmp & 0x7f) << 14;
  279. if ((tmp = ReadRawByte(ref buffer, ref state)) < 128)
  280. {
  281. result |= tmp << 21;
  282. }
  283. else
  284. {
  285. result |= (tmp & 0x7f) << 21;
  286. result |= (tmp = ReadRawByte(ref buffer, ref state)) << 28;
  287. if (tmp >= 128)
  288. {
  289. // Discard upper 32 bits.
  290. for (int i = 0; i < 5; i++)
  291. {
  292. if (ReadRawByte(ref buffer, ref state) < 128)
  293. {
  294. return (uint) result;
  295. }
  296. }
  297. throw InvalidProtocolBufferException.MalformedVarint();
  298. }
  299. }
  300. }
  301. }
  302. return (uint) result;
  303. }
  304. /// <summary>
  305. /// Parses a 32-bit little-endian integer.
  306. /// </summary>
  307. public static uint ParseRawLittleEndian32(ref ReadOnlySpan<byte> buffer, ref ParserInternalState state)
  308. {
  309. const int uintLength = sizeof(uint);
  310. const int ulongLength = sizeof(ulong);
  311. if (state.bufferPos + ulongLength > state.bufferSize)
  312. {
  313. return ParseRawLittleEndian32SlowPath(ref buffer, ref state);
  314. }
  315. // ReadUInt32LittleEndian is many times slower than ReadUInt64LittleEndian (at least on some runtimes)
  316. // so it's faster better to use ReadUInt64LittleEndian and truncate the result.
  317. uint result = (uint) BinaryPrimitives.ReadUInt64LittleEndian(buffer.Slice(state.bufferPos, ulongLength));
  318. state.bufferPos += uintLength;
  319. return result;
  320. }
  321. private static uint ParseRawLittleEndian32SlowPath(ref ReadOnlySpan<byte> buffer, ref ParserInternalState state)
  322. {
  323. uint b1 = ReadRawByte(ref buffer, ref state);
  324. uint b2 = ReadRawByte(ref buffer, ref state);
  325. uint b3 = ReadRawByte(ref buffer, ref state);
  326. uint b4 = ReadRawByte(ref buffer, ref state);
  327. return b1 | (b2 << 8) | (b3 << 16) | (b4 << 24);
  328. }
  329. /// <summary>
  330. /// Parses a 64-bit little-endian integer.
  331. /// </summary>
  332. public static ulong ParseRawLittleEndian64(ref ReadOnlySpan<byte> buffer, ref ParserInternalState state)
  333. {
  334. const int length = sizeof(ulong);
  335. if (state.bufferPos + length > state.bufferSize)
  336. {
  337. return ParseRawLittleEndian64SlowPath(ref buffer, ref state);
  338. }
  339. ulong result = BinaryPrimitives.ReadUInt64LittleEndian(buffer.Slice(state.bufferPos, length));
  340. state.bufferPos += length;
  341. return result;
  342. }
  343. private static ulong ParseRawLittleEndian64SlowPath(ref ReadOnlySpan<byte> buffer, ref ParserInternalState state)
  344. {
  345. ulong b1 = ReadRawByte(ref buffer, ref state);
  346. ulong b2 = ReadRawByte(ref buffer, ref state);
  347. ulong b3 = ReadRawByte(ref buffer, ref state);
  348. ulong b4 = ReadRawByte(ref buffer, ref state);
  349. ulong b5 = ReadRawByte(ref buffer, ref state);
  350. ulong b6 = ReadRawByte(ref buffer, ref state);
  351. ulong b7 = ReadRawByte(ref buffer, ref state);
  352. ulong b8 = ReadRawByte(ref buffer, ref state);
  353. return b1 | (b2 << 8) | (b3 << 16) | (b4 << 24)
  354. | (b5 << 32) | (b6 << 40) | (b7 << 48) | (b8 << 56);
  355. }
  356. /// <summary>
  357. /// Parses a double value.
  358. /// </summary>
  359. public static double ParseDouble(ref ReadOnlySpan<byte> buffer, ref ParserInternalState state)
  360. {
  361. const int length = sizeof(double);
  362. if (!BitConverter.IsLittleEndian || state.bufferPos + length > state.bufferSize)
  363. {
  364. return BitConverter.Int64BitsToDouble((long)ParseRawLittleEndian64(ref buffer, ref state));
  365. }
  366. // ReadUnaligned uses processor architecture for endianness.
  367. double result = Unsafe.ReadUnaligned<double>(ref MemoryMarshal.GetReference(buffer.Slice(state.bufferPos, length)));
  368. state.bufferPos += length;
  369. return result;
  370. }
  371. /// <summary>
  372. /// Parses a float value.
  373. /// </summary>
  374. public static float ParseFloat(ref ReadOnlySpan<byte> buffer, ref ParserInternalState state)
  375. {
  376. const int length = sizeof(float);
  377. if (!BitConverter.IsLittleEndian || state.bufferPos + length > state.bufferSize)
  378. {
  379. return ParseFloatSlow(ref buffer, ref state);
  380. }
  381. // ReadUnaligned uses processor architecture for endianness.
  382. float result = Unsafe.ReadUnaligned<float>(ref MemoryMarshal.GetReference(buffer.Slice(state.bufferPos, length)));
  383. state.bufferPos += length;
  384. return result;
  385. }
  386. private static unsafe float ParseFloatSlow(ref ReadOnlySpan<byte> buffer, ref ParserInternalState state)
  387. {
  388. const int length = sizeof(float);
  389. byte* stackBuffer = stackalloc byte[length];
  390. Span<byte> tempSpan = new Span<byte>(stackBuffer, length);
  391. for (int i = 0; i < length; i++)
  392. {
  393. tempSpan[i] = ReadRawByte(ref buffer, ref state);
  394. }
  395. // Content is little endian. Reverse if needed to match endianness of architecture.
  396. if (!BitConverter.IsLittleEndian)
  397. {
  398. tempSpan.Reverse();
  399. }
  400. return Unsafe.ReadUnaligned<float>(ref MemoryMarshal.GetReference(tempSpan));
  401. }
  402. /// <summary>
  403. /// Reads a fixed size of bytes from the input.
  404. /// </summary>
  405. /// <exception cref="InvalidProtocolBufferException">
  406. /// the end of the stream or the current limit was reached
  407. /// </exception>
  408. public static byte[] ReadRawBytes(ref ReadOnlySpan<byte> buffer, ref ParserInternalState state, int size)
  409. {
  410. if (size < 0)
  411. {
  412. throw InvalidProtocolBufferException.NegativeSize();
  413. }
  414. if (size <= state.bufferSize - state.bufferPos)
  415. {
  416. // We have all the bytes we need already.
  417. byte[] bytes = new byte[size];
  418. buffer.Slice(state.bufferPos, size).CopyTo(bytes);
  419. state.bufferPos += size;
  420. return bytes;
  421. }
  422. return ReadRawBytesSlow(ref buffer, ref state, size);
  423. }
  424. private static byte[] ReadRawBytesSlow(ref ReadOnlySpan<byte> buffer, ref ParserInternalState state, int size)
  425. {
  426. ValidateCurrentLimit(ref buffer, ref state, size);
  427. if ((!state.segmentedBufferHelper.TotalLength.HasValue && size < buffer.Length) ||
  428. IsDataAvailableInSource(ref state, size))
  429. {
  430. // Reading more bytes than are in the buffer, but not an excessive number
  431. // of bytes. We can safely allocate the resulting array ahead of time.
  432. byte[] bytes = new byte[size];
  433. ReadRawBytesIntoSpan(ref buffer, ref state, size, bytes);
  434. return bytes;
  435. }
  436. else
  437. {
  438. // The size is very large. For security reasons, we can't allocate the
  439. // entire byte array yet. The size comes directly from the input, so a
  440. // maliciously-crafted message could provide a bogus very large size in
  441. // order to trick the app into allocating a lot of memory. We avoid this
  442. // by allocating and reading only a small chunk at a time, so that the
  443. // malicious message must actually *be* extremely large to cause
  444. // problems. Meanwhile, we limit the allowed size of a message elsewhere.
  445. List<byte[]> chunks = new List<byte[]>();
  446. int pos = state.bufferSize - state.bufferPos;
  447. byte[] firstChunk = new byte[pos];
  448. buffer.Slice(state.bufferPos, pos).CopyTo(firstChunk);
  449. chunks.Add(firstChunk);
  450. state.bufferPos = state.bufferSize;
  451. // Read all the rest of the bytes we need.
  452. int sizeLeft = size - pos;
  453. while (sizeLeft > 0)
  454. {
  455. state.segmentedBufferHelper.RefillBuffer(ref buffer, ref state, true);
  456. byte[] chunk = new byte[Math.Min(sizeLeft, state.bufferSize)];
  457. buffer.Slice(0, chunk.Length)
  458. .CopyTo(chunk);
  459. state.bufferPos += chunk.Length;
  460. sizeLeft -= chunk.Length;
  461. chunks.Add(chunk);
  462. }
  463. // OK, got everything. Now concatenate it all into one buffer.
  464. byte[] bytes = new byte[size];
  465. int newPos = 0;
  466. foreach (byte[] chunk in chunks)
  467. {
  468. Buffer.BlockCopy(chunk, 0, bytes, newPos, chunk.Length);
  469. newPos += chunk.Length;
  470. }
  471. // Done.
  472. return bytes;
  473. }
  474. }
  475. /// <summary>
  476. /// Reads and discards <paramref name="size"/> bytes.
  477. /// </summary>
  478. /// <exception cref="InvalidProtocolBufferException">the end of the stream
  479. /// or the current limit was reached</exception>
  480. public static void SkipRawBytes(ref ReadOnlySpan<byte> buffer, ref ParserInternalState state, int size)
  481. {
  482. if (size < 0)
  483. {
  484. throw InvalidProtocolBufferException.NegativeSize();
  485. }
  486. ValidateCurrentLimit(ref buffer, ref state, size);
  487. if (size <= state.bufferSize - state.bufferPos)
  488. {
  489. // We have all the bytes we need already.
  490. state.bufferPos += size;
  491. }
  492. else
  493. {
  494. // Skipping more bytes than are in the buffer. First skip what we have.
  495. int pos = state.bufferSize - state.bufferPos;
  496. state.bufferPos = state.bufferSize;
  497. // TODO: If our segmented buffer is backed by a Stream that is seekable, we could skip the bytes more efficiently
  498. // by simply updating stream's Position property. This used to be supported in the past, but the support was dropped
  499. // because it would make the segmentedBufferHelper more complex. Support can be reintroduced if needed.
  500. state.segmentedBufferHelper.RefillBuffer(ref buffer, ref state, true);
  501. while (size - pos > state.bufferSize)
  502. {
  503. pos += state.bufferSize;
  504. state.bufferPos = state.bufferSize;
  505. state.segmentedBufferHelper.RefillBuffer(ref buffer, ref state, true);
  506. }
  507. state.bufferPos = size - pos;
  508. }
  509. }
  510. /// <summary>
  511. /// Reads a string field value from the input.
  512. /// </summary>
  513. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  514. public static string ReadString(ref ReadOnlySpan<byte> buffer, ref ParserInternalState state)
  515. {
  516. int length = ParsingPrimitives.ParseLength(ref buffer, ref state);
  517. return ParsingPrimitives.ReadRawString(ref buffer, ref state, length);
  518. }
  519. /// <summary>
  520. /// Reads a bytes field value from the input.
  521. /// </summary>
  522. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  523. public static ByteString ReadBytes(ref ReadOnlySpan<byte> buffer, ref ParserInternalState state)
  524. {
  525. int length = ParsingPrimitives.ParseLength(ref buffer, ref state);
  526. return ByteString.AttachBytes(ParsingPrimitives.ReadRawBytes(ref buffer, ref state, length));
  527. }
  528. /// <summary>
  529. /// Reads a UTF-8 string from the next "length" bytes.
  530. /// </summary>
  531. /// <exception cref="InvalidProtocolBufferException">
  532. /// the end of the stream or the current limit was reached
  533. /// </exception>
  534. [SecuritySafeCritical]
  535. public static string ReadRawString(ref ReadOnlySpan<byte> buffer, ref ParserInternalState state, int length)
  536. {
  537. // No need to read any data for an empty string.
  538. if (length == 0)
  539. {
  540. return string.Empty;
  541. }
  542. if (length < 0)
  543. {
  544. throw InvalidProtocolBufferException.NegativeSize();
  545. }
  546. #if GOOGLE_PROTOBUF_SUPPORT_FAST_STRING
  547. if (length <= state.bufferSize - state.bufferPos)
  548. {
  549. // Fast path: all bytes to decode appear in the same span.
  550. ReadOnlySpan<byte> data = buffer.Slice(state.bufferPos, length);
  551. string value;
  552. unsafe
  553. {
  554. fixed (byte* sourceBytes = &MemoryMarshal.GetReference(data))
  555. {
  556. value = WritingPrimitives.Utf8Encoding.GetString(sourceBytes, length);
  557. }
  558. }
  559. state.bufferPos += length;
  560. return value;
  561. }
  562. #endif
  563. return ReadStringSlow(ref buffer, ref state, length);
  564. }
  565. /// <summary>
  566. /// Reads a string assuming that it is spread across multiple spans in a <see cref="ReadOnlySequence{T}"/>.
  567. /// </summary>
  568. private static string ReadStringSlow(ref ReadOnlySpan<byte> buffer, ref ParserInternalState state, int length)
  569. {
  570. ValidateCurrentLimit(ref buffer, ref state, length);
  571. #if GOOGLE_PROTOBUF_SUPPORT_FAST_STRING
  572. if (IsDataAvailable(ref state, length))
  573. {
  574. // Read string data into a temporary buffer, either stackalloc'ed or from ArrayPool
  575. // Once all data is read then call Encoding.GetString on buffer and return to pool if needed.
  576. byte[] byteArray = null;
  577. Span<byte> byteSpan = length <= StackallocThreshold ?
  578. stackalloc byte[length] :
  579. (byteArray = ArrayPool<byte>.Shared.Rent(length));
  580. try
  581. {
  582. unsafe
  583. {
  584. fixed (byte* pByteSpan = &MemoryMarshal.GetReference(byteSpan))
  585. {
  586. // Compiler doesn't like that a potentially stackalloc'd Span<byte> is being used
  587. // in a method with a "ref Span<byte> buffer" argument. If the stackalloc'd span was assigned
  588. // to the ref argument then bad things would happen. We'll never do that so it is ok.
  589. // Make compiler happy by passing a new span created from pointer.
  590. var tempSpan = new Span<byte>(pByteSpan, byteSpan.Length);
  591. ReadRawBytesIntoSpan(ref buffer, ref state, length, tempSpan);
  592. return WritingPrimitives.Utf8Encoding.GetString(pByteSpan, length);
  593. }
  594. }
  595. }
  596. finally
  597. {
  598. if (byteArray != null)
  599. {
  600. ArrayPool<byte>.Shared.Return(byteArray);
  601. }
  602. }
  603. }
  604. #endif
  605. // Slow path: Build a byte array first then copy it.
  606. // This will be called when reading from a Stream because we don't know the length of the stream,
  607. // or there is not enough data in the sequence. If there is not enough data then ReadRawBytes will
  608. // throw an exception.
  609. return WritingPrimitives.Utf8Encoding.GetString(ReadRawBytes(ref buffer, ref state, length), 0, length);
  610. }
  611. /// <summary>
  612. /// Validates that the specified size doesn't exceed the current limit. If it does then remaining bytes
  613. /// are skipped and an error is thrown.
  614. /// </summary>
  615. private static void ValidateCurrentLimit(ref ReadOnlySpan<byte> buffer, ref ParserInternalState state, int size)
  616. {
  617. if (state.totalBytesRetired + state.bufferPos + size > state.currentLimit)
  618. {
  619. // Read to the end of the stream (up to the current limit) anyway.
  620. SkipRawBytes(ref buffer, ref state, state.currentLimit - state.totalBytesRetired - state.bufferPos);
  621. // Then fail.
  622. throw InvalidProtocolBufferException.TruncatedMessage();
  623. }
  624. }
  625. [SecuritySafeCritical]
  626. private static byte ReadRawByte(ref ReadOnlySpan<byte> buffer, ref ParserInternalState state)
  627. {
  628. if (state.bufferPos == state.bufferSize)
  629. {
  630. state.segmentedBufferHelper.RefillBuffer(ref buffer, ref state, true);
  631. }
  632. return buffer[state.bufferPos++];
  633. }
  634. /// <summary>
  635. /// Reads a varint from the input one byte at a time, so that it does not
  636. /// read any bytes after the end of the varint. If you simply wrapped the
  637. /// stream in a CodedInputStream and used ReadRawVarint32(Stream)
  638. /// then you would probably end up reading past the end of the varint since
  639. /// CodedInputStream buffers its input.
  640. /// </summary>
  641. /// <param name="input"></param>
  642. /// <returns></returns>
  643. public static uint ReadRawVarint32(Stream input)
  644. {
  645. int result = 0;
  646. int offset = 0;
  647. for (; offset < 32; offset += 7)
  648. {
  649. int b = input.ReadByte();
  650. if (b == -1)
  651. {
  652. throw InvalidProtocolBufferException.TruncatedMessage();
  653. }
  654. result |= (b & 0x7f) << offset;
  655. if ((b & 0x80) == 0)
  656. {
  657. return (uint) result;
  658. }
  659. }
  660. // Keep reading up to 64 bits.
  661. for (; offset < 64; offset += 7)
  662. {
  663. int b = input.ReadByte();
  664. if (b == -1)
  665. {
  666. throw InvalidProtocolBufferException.TruncatedMessage();
  667. }
  668. if ((b & 0x80) == 0)
  669. {
  670. return (uint) result;
  671. }
  672. }
  673. throw InvalidProtocolBufferException.MalformedVarint();
  674. }
  675. /// <summary>
  676. /// Decode a 32-bit value with ZigZag encoding.
  677. /// </summary>
  678. /// <remarks>
  679. /// ZigZag encodes signed integers into values that can be efficiently
  680. /// encoded with varint. (Otherwise, negative values must be
  681. /// sign-extended to 32 bits to be varint encoded, thus always taking
  682. /// 5 bytes on the wire.)
  683. /// </remarks>
  684. public static int DecodeZigZag32(uint n)
  685. {
  686. return (int)(n >> 1) ^ -(int)(n & 1);
  687. }
  688. /// <summary>
  689. /// Decode a 64-bit value with ZigZag encoding.
  690. /// </summary>
  691. /// <remarks>
  692. /// ZigZag encodes signed integers into values that can be efficiently
  693. /// encoded with varint. (Otherwise, negative values must be
  694. /// sign-extended to 64 bits to be varint encoded, thus always taking
  695. /// 10 bytes on the wire.)
  696. /// </remarks>
  697. public static long DecodeZigZag64(ulong n)
  698. {
  699. return (long)(n >> 1) ^ -(long)(n & 1);
  700. }
  701. /// <summary>
  702. /// Checks whether there is known data available of the specified size remaining to parse.
  703. /// When parsing from a Stream this can return false because we have no knowledge of the amount
  704. /// of data remaining in the stream until it is read.
  705. /// </summary>
  706. public static bool IsDataAvailable(ref ParserInternalState state, int size)
  707. {
  708. // Data fits in remaining buffer
  709. if (size <= state.bufferSize - state.bufferPos)
  710. {
  711. return true;
  712. }
  713. return IsDataAvailableInSource(ref state, size);
  714. }
  715. /// <summary>
  716. /// Checks whether there is known data available of the specified size remaining to parse
  717. /// in the underlying data source.
  718. /// When parsing from a Stream this will return false because we have no knowledge of the amount
  719. /// of data remaining in the stream until it is read.
  720. /// </summary>
  721. private static bool IsDataAvailableInSource(ref ParserInternalState state, int size)
  722. {
  723. // Data fits in remaining source data.
  724. // Note that this will never be true when reading from a stream as the total length is unknown.
  725. return size <= state.segmentedBufferHelper.TotalLength - state.totalBytesRetired - state.bufferPos;
  726. }
  727. /// <summary>
  728. /// Read raw bytes of the specified length into a span. The amount of data available and the current limit should
  729. /// be checked before calling this method.
  730. /// </summary>
  731. private static void ReadRawBytesIntoSpan(ref ReadOnlySpan<byte> buffer, ref ParserInternalState state, int length, Span<byte> byteSpan)
  732. {
  733. int remainingByteLength = length;
  734. while (remainingByteLength > 0)
  735. {
  736. if (state.bufferSize - state.bufferPos == 0)
  737. {
  738. state.segmentedBufferHelper.RefillBuffer(ref buffer, ref state, true);
  739. }
  740. ReadOnlySpan<byte> unreadSpan = buffer.Slice(state.bufferPos, Math.Min(remainingByteLength, state.bufferSize - state.bufferPos));
  741. unreadSpan.CopyTo(byteSpan.Slice(length - remainingByteLength));
  742. remainingByteLength -= unreadSpan.Length;
  743. state.bufferPos += unreadSpan.Length;
  744. }
  745. }
  746. }
  747. }