input_stream.py 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211
  1. # Protocol Buffers - Google's data interchange format
  2. # Copyright 2008 Google Inc.
  3. # http://code.google.com/p/protobuf/
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. """InputStream is the primitive interface for reading bits from the wire.
  17. All protocol buffer deserialization can be expressed in terms of
  18. the InputStream primitives provided here.
  19. """
  20. __author__ = 'robinson@google.com (Will Robinson)'
  21. import struct
  22. from google.protobuf import message
  23. from google.protobuf.internal import wire_format
  24. # Note that much of this code is ported from //net/proto/ProtocolBuffer, and
  25. # that the interface is strongly inspired by CodedInputStream from the C++
  26. # proto2 implementation.
  27. class InputStream(object):
  28. """Contains all logic for reading bits, and dealing with stream position.
  29. If an InputStream method ever raises an exception, the stream is left
  30. in an indeterminate state and is not safe for further use.
  31. """
  32. def __init__(self, s):
  33. # What we really want is something like array('B', s), where elements we
  34. # read from the array are already given to us as one-byte integers. BUT
  35. # using array() instead of buffer() would force full string copies to result
  36. # from each GetSubBuffer() call.
  37. #
  38. # So, if the N serialized bytes of a single protocol buffer object are
  39. # split evenly between 2 child messages, and so on recursively, using
  40. # array('B', s) instead of buffer() would incur an additional N*logN bytes
  41. # copied during deserialization.
  42. #
  43. # The higher constant overhead of having to ord() for every byte we read
  44. # from the buffer in _ReadVarintHelper() could definitely lead to worse
  45. # performance in many real-world scenarios, even if the asymptotic
  46. # complexity is better. However, our real answer is that the mythical
  47. # Python/C extension module output mode for the protocol compiler will
  48. # be blazing-fast and will eliminate most use of this class anyway.
  49. self._buffer = buffer(s)
  50. self._pos = 0
  51. def EndOfStream(self):
  52. """Returns true iff we're at the end of the stream.
  53. If this returns true, then a call to any other InputStream method
  54. will raise an exception.
  55. """
  56. return self._pos >= len(self._buffer)
  57. def Position(self):
  58. """Returns the current position in the stream, or equivalently, the
  59. number of bytes read so far.
  60. """
  61. return self._pos
  62. def GetSubBuffer(self, size=None):
  63. """Returns a sequence-like object that represents a portion of our
  64. underlying sequence.
  65. Position 0 in the returned object corresponds to self.Position()
  66. in this stream.
  67. If size is specified, then the returned object ends after the
  68. next "size" bytes in this stream. If size is not specified,
  69. then the returned object ends at the end of this stream.
  70. We guarantee that the returned object R supports the Python buffer
  71. interface (and thus that the call buffer(R) will work).
  72. Note that the returned buffer is read-only.
  73. The intended use for this method is for nested-message and nested-group
  74. deserialization, where we want to make a recursive MergeFromString()
  75. call on the portion of the original sequence that contains the serialized
  76. nested message. (And we'd like to do so without making unnecessary string
  77. copies).
  78. REQUIRES: size is nonnegative.
  79. """
  80. # Note that buffer() doesn't perform any actual string copy.
  81. if size is None:
  82. return buffer(self._buffer, self._pos)
  83. else:
  84. if size < 0:
  85. raise message.DecodeError('Negative size %d' % size)
  86. return buffer(self._buffer, self._pos, size)
  87. def SkipBytes(self, num_bytes):
  88. """Skip num_bytes bytes ahead, or go to the end of the stream, whichever
  89. comes first.
  90. REQUIRES: num_bytes is nonnegative.
  91. """
  92. if num_bytes < 0:
  93. raise message.DecodeError('Negative num_bytes %d' % num_bytes)
  94. self._pos += num_bytes
  95. self._pos = min(self._pos, len(self._buffer))
  96. def ReadString(self, size):
  97. """Reads up to 'size' bytes from the stream, stopping early
  98. only if we reach the end of the stream. Returns the bytes read
  99. as a string.
  100. """
  101. if size < 0:
  102. raise message.DecodeError('Negative size %d' % size)
  103. s = (self._buffer[self._pos : self._pos + size])
  104. self._pos += len(s) # Only advance by the number of bytes actually read.
  105. return s
  106. def ReadLittleEndian32(self):
  107. """Interprets the next 4 bytes of the stream as a little-endian
  108. encoded, unsiged 32-bit integer, and returns that integer.
  109. """
  110. try:
  111. i = struct.unpack(wire_format.FORMAT_UINT32_LITTLE_ENDIAN,
  112. self._buffer[self._pos : self._pos + 4])
  113. self._pos += 4
  114. return i[0] # unpack() result is a 1-element tuple.
  115. except struct.error, e:
  116. raise message.DecodeError(e)
  117. def ReadLittleEndian64(self):
  118. """Interprets the next 8 bytes of the stream as a little-endian
  119. encoded, unsiged 64-bit integer, and returns that integer.
  120. """
  121. try:
  122. i = struct.unpack(wire_format.FORMAT_UINT64_LITTLE_ENDIAN,
  123. self._buffer[self._pos : self._pos + 8])
  124. self._pos += 8
  125. return i[0] # unpack() result is a 1-element tuple.
  126. except struct.error, e:
  127. raise message.DecodeError(e)
  128. def ReadVarint32(self):
  129. """Reads a varint from the stream, interprets this varint
  130. as a signed, 32-bit integer, and returns the integer.
  131. """
  132. i = self.ReadVarint64()
  133. if not wire_format.INT32_MIN <= i <= wire_format.INT32_MAX:
  134. raise message.DecodeError('Value out of range for int32: %d' % i)
  135. return int(i)
  136. def ReadVarUInt32(self):
  137. """Reads a varint from the stream, interprets this varint
  138. as an unsigned, 32-bit integer, and returns the integer.
  139. """
  140. i = self.ReadVarUInt64()
  141. if i > wire_format.UINT32_MAX:
  142. raise message.DecodeError('Value out of range for uint32: %d' % i)
  143. return i
  144. def ReadVarint64(self):
  145. """Reads a varint from the stream, interprets this varint
  146. as a signed, 64-bit integer, and returns the integer.
  147. """
  148. i = self.ReadVarUInt64()
  149. if i > wire_format.INT64_MAX:
  150. i -= (1 << 64)
  151. return i
  152. def ReadVarUInt64(self):
  153. """Reads a varint from the stream, interprets this varint
  154. as an unsigned, 64-bit integer, and returns the integer.
  155. """
  156. i = self._ReadVarintHelper()
  157. if not 0 <= i <= wire_format.UINT64_MAX:
  158. raise message.DecodeError('Value out of range for uint64: %d' % i)
  159. return i
  160. def _ReadVarintHelper(self):
  161. """Helper for the various varint-reading methods above.
  162. Reads an unsigned, varint-encoded integer from the stream and
  163. returns this integer.
  164. Does no bounds checking except to ensure that we read at most as many bytes
  165. as could possibly be present in a varint-encoded 64-bit number.
  166. """
  167. result = 0
  168. shift = 0
  169. while 1:
  170. if shift >= 64:
  171. raise message.DecodeError('Too many bytes when decoding varint.')
  172. try:
  173. b = ord(self._buffer[self._pos])
  174. except IndexError:
  175. raise message.DecodeError('Truncated varint.')
  176. self._pos += 1
  177. result |= ((b & 0x7f) << shift)
  178. shift += 7
  179. if not (b & 0x80):
  180. return result