GPBCodedInputStream.m 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801
  1. // Protocol Buffers - Google's data interchange format
  2. // Copyright 2008 Google Inc. All rights reserved.
  3. // https://developers.google.com/protocol-buffers/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are
  7. // met:
  8. //
  9. // * Redistributions of source code must retain the above copyright
  10. // notice, this list of conditions and the following disclaimer.
  11. // * Redistributions in binary form must reproduce the above
  12. // copyright notice, this list of conditions and the following disclaimer
  13. // in the documentation and/or other materials provided with the
  14. // distribution.
  15. // * Neither the name of Google Inc. nor the names of its
  16. // contributors may be used to endorse or promote products derived from
  17. // this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. #import "GPBCodedInputStream_PackagePrivate.h"
  31. #import "GPBDictionary_PackagePrivate.h"
  32. #import "GPBMessage_PackagePrivate.h"
  33. #import "GPBUnknownFieldSet_PackagePrivate.h"
  34. #import "GPBUtilities_PackagePrivate.h"
  35. #import "GPBWireFormat.h"
  36. static const NSUInteger kDefaultRecursionLimit = 64;
  37. static inline void CheckSize(GPBCodedInputStreamState *state, size_t size) {
  38. size_t newSize = state->bufferPos + size;
  39. if (newSize > state->bufferSize) {
  40. [NSException raise:NSParseErrorException format:@""];
  41. }
  42. if (newSize > state->currentLimit) {
  43. // Fast forward to end of currentLimit;
  44. state->bufferPos = state->currentLimit;
  45. [NSException raise:NSParseErrorException format:@""];
  46. }
  47. }
  48. static inline int8_t ReadRawByte(GPBCodedInputStreamState *state) {
  49. CheckSize(state, sizeof(int8_t));
  50. return ((int8_t *)state->bytes)[state->bufferPos++];
  51. }
  52. static inline int32_t ReadRawLittleEndian32(GPBCodedInputStreamState *state) {
  53. CheckSize(state, sizeof(int32_t));
  54. int32_t value = OSReadLittleInt32(state->bytes, state->bufferPos);
  55. state->bufferPos += sizeof(int32_t);
  56. return value;
  57. }
  58. static inline int64_t ReadRawLittleEndian64(GPBCodedInputStreamState *state) {
  59. CheckSize(state, sizeof(int64_t));
  60. int64_t value = OSReadLittleInt64(state->bytes, state->bufferPos);
  61. state->bufferPos += sizeof(int64_t);
  62. return value;
  63. }
  64. static inline int32_t ReadRawVarint32(GPBCodedInputStreamState *state) {
  65. int8_t tmp = ReadRawByte(state);
  66. if (tmp >= 0) {
  67. return tmp;
  68. }
  69. int32_t result = tmp & 0x7f;
  70. if ((tmp = ReadRawByte(state)) >= 0) {
  71. result |= tmp << 7;
  72. } else {
  73. result |= (tmp & 0x7f) << 7;
  74. if ((tmp = ReadRawByte(state)) >= 0) {
  75. result |= tmp << 14;
  76. } else {
  77. result |= (tmp & 0x7f) << 14;
  78. if ((tmp = ReadRawByte(state)) >= 0) {
  79. result |= tmp << 21;
  80. } else {
  81. result |= (tmp & 0x7f) << 21;
  82. result |= (tmp = ReadRawByte(state)) << 28;
  83. if (tmp < 0) {
  84. // Discard upper 32 bits.
  85. for (int i = 0; i < 5; i++) {
  86. if (ReadRawByte(state) >= 0) {
  87. return result;
  88. }
  89. }
  90. [NSException raise:NSParseErrorException
  91. format:@"Unable to read varint32"];
  92. }
  93. }
  94. }
  95. }
  96. return result;
  97. }
  98. static inline int64_t ReadRawVarint64(GPBCodedInputStreamState *state) {
  99. int32_t shift = 0;
  100. int64_t result = 0;
  101. while (shift < 64) {
  102. int8_t b = ReadRawByte(state);
  103. result |= (int64_t)(b & 0x7F) << shift;
  104. if ((b & 0x80) == 0) {
  105. return result;
  106. }
  107. shift += 7;
  108. }
  109. [NSException raise:NSParseErrorException format:@"Unable to read varint64"];
  110. return 0;
  111. }
  112. static inline void SkipRawData(GPBCodedInputStreamState *state, size_t size) {
  113. CheckSize(state, size);
  114. state->bufferPos += size;
  115. }
  116. double GPBCodedInputStreamReadDouble(GPBCodedInputStreamState *state) {
  117. int64_t value = ReadRawLittleEndian64(state);
  118. return GPBConvertInt64ToDouble(value);
  119. }
  120. float GPBCodedInputStreamReadFloat(GPBCodedInputStreamState *state) {
  121. int32_t value = ReadRawLittleEndian32(state);
  122. return GPBConvertInt32ToFloat(value);
  123. }
  124. uint64_t GPBCodedInputStreamReadUInt64(GPBCodedInputStreamState *state) {
  125. uint64_t value = ReadRawVarint64(state);
  126. return value;
  127. }
  128. uint32_t GPBCodedInputStreamReadUInt32(GPBCodedInputStreamState *state) {
  129. uint32_t value = ReadRawVarint32(state);
  130. return value;
  131. }
  132. int64_t GPBCodedInputStreamReadInt64(GPBCodedInputStreamState *state) {
  133. int64_t value = ReadRawVarint64(state);
  134. return value;
  135. }
  136. int32_t GPBCodedInputStreamReadInt32(GPBCodedInputStreamState *state) {
  137. int32_t value = ReadRawVarint32(state);
  138. return value;
  139. }
  140. uint64_t GPBCodedInputStreamReadFixed64(GPBCodedInputStreamState *state) {
  141. uint64_t value = ReadRawLittleEndian64(state);
  142. return value;
  143. }
  144. uint32_t GPBCodedInputStreamReadFixed32(GPBCodedInputStreamState *state) {
  145. uint32_t value = ReadRawLittleEndian32(state);
  146. return value;
  147. }
  148. int32_t GPBCodedInputStreamReadEnum(GPBCodedInputStreamState *state) {
  149. int32_t value = ReadRawVarint32(state);
  150. return value;
  151. }
  152. int32_t GPBCodedInputStreamReadSFixed32(GPBCodedInputStreamState *state) {
  153. int32_t value = ReadRawLittleEndian32(state);
  154. return value;
  155. }
  156. int64_t GPBCodedInputStreamReadSFixed64(GPBCodedInputStreamState *state) {
  157. int64_t value = ReadRawLittleEndian64(state);
  158. return value;
  159. }
  160. int32_t GPBCodedInputStreamReadSInt32(GPBCodedInputStreamState *state) {
  161. int32_t value = GPBDecodeZigZag32(ReadRawVarint32(state));
  162. return value;
  163. }
  164. int64_t GPBCodedInputStreamReadSInt64(GPBCodedInputStreamState *state) {
  165. int64_t value = GPBDecodeZigZag64(ReadRawVarint64(state));
  166. return value;
  167. }
  168. BOOL GPBCodedInputStreamReadBool(GPBCodedInputStreamState *state) {
  169. return ReadRawVarint32(state) != 0;
  170. }
  171. int32_t GPBCodedInputStreamReadTag(GPBCodedInputStreamState *state) {
  172. if (GPBCodedInputStreamIsAtEnd(state)) {
  173. state->lastTag = 0;
  174. return 0;
  175. }
  176. state->lastTag = ReadRawVarint32(state);
  177. if (state->lastTag == 0) {
  178. // If we actually read zero, that's not a valid tag.
  179. [NSException raise:NSParseErrorException
  180. format:@"Invalid last tag %d", state->lastTag];
  181. }
  182. return state->lastTag;
  183. }
  184. NSString *GPBCodedInputStreamReadRetainedString(
  185. GPBCodedInputStreamState *state) {
  186. int32_t size = ReadRawVarint32(state);
  187. NSString *result;
  188. if (size == 0) {
  189. result = @"";
  190. } else {
  191. CheckSize(state, size);
  192. result = GPBCreateGPBStringWithUTF8(&state->bytes[state->bufferPos], size);
  193. state->bufferPos += size;
  194. }
  195. return result;
  196. }
  197. NSData *GPBCodedInputStreamReadRetainedData(GPBCodedInputStreamState *state) {
  198. int32_t size = ReadRawVarint32(state);
  199. if (size < 0) return nil;
  200. CheckSize(state, size);
  201. NSData *result = [[NSData alloc] initWithBytes:state->bytes + state->bufferPos
  202. length:size];
  203. state->bufferPos += size;
  204. return result;
  205. }
  206. NSData *GPBCodedInputStreamReadRetainedDataNoCopy(
  207. GPBCodedInputStreamState *state) {
  208. int32_t size = ReadRawVarint32(state);
  209. if (size < 0) return nil;
  210. CheckSize(state, size);
  211. // Cast is safe because freeWhenDone is NO.
  212. NSData *result = [[NSData alloc]
  213. initWithBytesNoCopy:(void *)(state->bytes + state->bufferPos)
  214. length:size
  215. freeWhenDone:NO];
  216. state->bufferPos += size;
  217. return result;
  218. }
  219. size_t GPBCodedInputStreamPushLimit(GPBCodedInputStreamState *state,
  220. size_t byteLimit) {
  221. byteLimit += state->bufferPos;
  222. size_t oldLimit = state->currentLimit;
  223. if (byteLimit > oldLimit) {
  224. [NSException raise:NSInvalidArgumentException
  225. format:@"byteLimit > oldLimit: %tu > %tu", byteLimit, oldLimit];
  226. }
  227. state->currentLimit = byteLimit;
  228. return oldLimit;
  229. }
  230. void GPBCodedInputStreamPopLimit(GPBCodedInputStreamState *state,
  231. size_t oldLimit) {
  232. state->currentLimit = oldLimit;
  233. }
  234. size_t GPBCodedInputStreamBytesUntilLimit(GPBCodedInputStreamState *state) {
  235. if (state->currentLimit == SIZE_T_MAX) {
  236. return state->currentLimit;
  237. }
  238. return state->currentLimit - state->bufferPos;
  239. }
  240. BOOL GPBCodedInputStreamIsAtEnd(GPBCodedInputStreamState *state) {
  241. return (state->bufferPos == state->bufferSize) ||
  242. (state->bufferPos == state->currentLimit);
  243. }
  244. void GPBCodedInputStreamCheckLastTagWas(GPBCodedInputStreamState *state,
  245. int32_t value) {
  246. if (state->lastTag != value) {
  247. [NSException raise:NSParseErrorException
  248. format:@"Last tag: %d should be %d", state->lastTag, value];
  249. }
  250. }
  251. @implementation GPBCodedInputStream
  252. + (instancetype)streamWithData:(NSData *)data {
  253. return [[[self alloc] initWithData:data] autorelease];
  254. }
  255. - (instancetype)initWithData:(NSData *)data {
  256. if ((self = [super init])) {
  257. #ifdef DEBUG
  258. NSCAssert([self class] == [GPBCodedInputStream class],
  259. @"Subclassing of GPBCodedInputStream is not allowed.");
  260. #endif
  261. buffer_ = [data retain];
  262. state_.bytes = (const uint8_t *)[data bytes];
  263. state_.bufferSize = [data length];
  264. state_.currentLimit = NSUIntegerMax;
  265. }
  266. return self;
  267. }
  268. - (void)dealloc {
  269. [buffer_ release];
  270. [super dealloc];
  271. }
  272. - (int32_t)readTag {
  273. return GPBCodedInputStreamReadTag(&state_);
  274. }
  275. - (void)checkLastTagWas:(int32_t)value {
  276. GPBCodedInputStreamCheckLastTagWas(&state_, value);
  277. }
  278. - (BOOL)skipField:(int32_t)tag {
  279. switch (GPBWireFormatGetTagWireType(tag)) {
  280. case GPBWireFormatVarint:
  281. GPBCodedInputStreamReadInt32(&state_);
  282. return YES;
  283. case GPBWireFormatFixed64:
  284. SkipRawData(&state_, sizeof(int64_t));
  285. return YES;
  286. case GPBWireFormatLengthDelimited:
  287. SkipRawData(&state_, ReadRawVarint32(&state_));
  288. return YES;
  289. case GPBWireFormatStartGroup:
  290. [self skipMessage];
  291. GPBCodedInputStreamCheckLastTagWas(
  292. &state_, GPBWireFormatMakeTag(GPBWireFormatGetTagFieldNumber(tag),
  293. GPBWireFormatEndGroup));
  294. return YES;
  295. case GPBWireFormatEndGroup:
  296. return NO;
  297. case GPBWireFormatFixed32:
  298. SkipRawData(&state_, sizeof(int32_t));
  299. return YES;
  300. }
  301. [NSException raise:NSParseErrorException format:@"Invalid tag %d", tag];
  302. return NO;
  303. }
  304. - (void)skipMessage {
  305. while (YES) {
  306. int32_t tag = GPBCodedInputStreamReadTag(&state_);
  307. if (tag == 0 || ![self skipField:tag]) {
  308. return;
  309. }
  310. }
  311. }
  312. - (double)readDouble {
  313. return GPBCodedInputStreamReadDouble(&state_);
  314. }
  315. - (float)readFloat {
  316. return GPBCodedInputStreamReadFloat(&state_);
  317. }
  318. - (uint64_t)readUInt64 {
  319. return GPBCodedInputStreamReadUInt64(&state_);
  320. }
  321. - (int64_t)readInt64 {
  322. return GPBCodedInputStreamReadInt64(&state_);
  323. }
  324. - (int32_t)readInt32 {
  325. return GPBCodedInputStreamReadInt32(&state_);
  326. }
  327. - (uint64_t)readFixed64 {
  328. return GPBCodedInputStreamReadFixed64(&state_);
  329. }
  330. - (uint32_t)readFixed32 {
  331. return GPBCodedInputStreamReadFixed32(&state_);
  332. }
  333. - (BOOL)readBool {
  334. return GPBCodedInputStreamReadBool(&state_);
  335. }
  336. - (NSString *)readString {
  337. return [GPBCodedInputStreamReadRetainedString(&state_) autorelease];
  338. }
  339. - (void)readGroup:(int32_t)fieldNumber
  340. message:(GPBMessage *)message
  341. extensionRegistry:(GPBExtensionRegistry *)extensionRegistry {
  342. if (state_.recursionDepth >= kDefaultRecursionLimit) {
  343. [NSException raise:NSParseErrorException
  344. format:@"recursionDepth(%tu) >= %tu", state_.recursionDepth,
  345. kDefaultRecursionLimit];
  346. }
  347. ++state_.recursionDepth;
  348. [message mergeFromCodedInputStream:self extensionRegistry:extensionRegistry];
  349. GPBCodedInputStreamCheckLastTagWas(
  350. &state_, GPBWireFormatMakeTag(fieldNumber, GPBWireFormatEndGroup));
  351. --state_.recursionDepth;
  352. }
  353. - (void)readUnknownGroup:(int32_t)fieldNumber
  354. message:(GPBUnknownFieldSet *)message {
  355. if (state_.recursionDepth >= kDefaultRecursionLimit) {
  356. [NSException raise:NSParseErrorException
  357. format:@"recursionDepth(%tu) >= %tu", state_.recursionDepth,
  358. kDefaultRecursionLimit];
  359. }
  360. ++state_.recursionDepth;
  361. [message mergeFromCodedInputStream:self];
  362. GPBCodedInputStreamCheckLastTagWas(
  363. &state_, GPBWireFormatMakeTag(fieldNumber, GPBWireFormatEndGroup));
  364. --state_.recursionDepth;
  365. }
  366. - (void)readMessage:(GPBMessage *)message
  367. extensionRegistry:(GPBExtensionRegistry *)extensionRegistry {
  368. int32_t length = ReadRawVarint32(&state_);
  369. if (state_.recursionDepth >= kDefaultRecursionLimit) {
  370. [NSException raise:NSParseErrorException
  371. format:@"recursionDepth(%tu) >= %tu", state_.recursionDepth,
  372. kDefaultRecursionLimit];
  373. }
  374. size_t oldLimit = GPBCodedInputStreamPushLimit(&state_, length);
  375. ++state_.recursionDepth;
  376. [message mergeFromCodedInputStream:self extensionRegistry:extensionRegistry];
  377. GPBCodedInputStreamCheckLastTagWas(&state_, 0);
  378. --state_.recursionDepth;
  379. GPBCodedInputStreamPopLimit(&state_, oldLimit);
  380. }
  381. - (void)readMapEntry:(id)mapDictionary
  382. extensionRegistry:(GPBExtensionRegistry *)extensionRegistry
  383. field:(GPBFieldDescriptor *)field
  384. parentMessage:(GPBMessage *)parentMessage {
  385. int32_t length = ReadRawVarint32(&state_);
  386. if (state_.recursionDepth >= kDefaultRecursionLimit) {
  387. [NSException raise:NSParseErrorException
  388. format:@"recursionDepth(%tu) >= %tu", state_.recursionDepth,
  389. kDefaultRecursionLimit];
  390. }
  391. size_t oldLimit = GPBCodedInputStreamPushLimit(&state_, length);
  392. ++state_.recursionDepth;
  393. GPBDictionaryReadEntry(mapDictionary, self, extensionRegistry, field,
  394. parentMessage);
  395. GPBCodedInputStreamCheckLastTagWas(&state_, 0);
  396. --state_.recursionDepth;
  397. GPBCodedInputStreamPopLimit(&state_, oldLimit);
  398. }
  399. - (NSData *)readData {
  400. return [GPBCodedInputStreamReadRetainedData(&state_) autorelease];
  401. }
  402. - (uint32_t)readUInt32 {
  403. return GPBCodedInputStreamReadUInt32(&state_);
  404. }
  405. - (int32_t)readEnum {
  406. return GPBCodedInputStreamReadEnum(&state_);
  407. }
  408. - (int32_t)readSFixed32 {
  409. return GPBCodedInputStreamReadSFixed32(&state_);
  410. }
  411. - (int64_t)readSFixed64 {
  412. return GPBCodedInputStreamReadSFixed64(&state_);
  413. }
  414. - (int32_t)readSInt32 {
  415. return GPBCodedInputStreamReadSInt32(&state_);
  416. }
  417. - (int64_t)readSInt64 {
  418. return GPBCodedInputStreamReadSInt64(&state_);
  419. }
  420. @end
  421. @implementation GPBString {
  422. @package
  423. CFStringRef string_;
  424. unsigned char *utf8_;
  425. NSUInteger utf8Len_;
  426. // This lock is used to gate access to utf8_. Once GPBStringInitStringValue()
  427. // has been called, string_ will be filled in, and utf8_ will be NULL.
  428. OSSpinLock lock_;
  429. BOOL hasBOM_;
  430. BOOL is7BitAscii_;
  431. }
  432. // Returns true if the passed in bytes are 7 bit ascii.
  433. // This routine needs to be fast.
  434. static inline bool AreBytesIn7BitASCII(const uint8_t *bytes, NSUInteger len) {
  435. // In the loops below, it's more efficient to collect rather than do
  436. // conditional at every step.
  437. #if __LP64__
  438. // Align bytes. This is especially important in case of 3 byte BOM.
  439. while (len > 0 && ((size_t)bytes & 0x07)) {
  440. if (*bytes++ & 0x80) return false;
  441. len--;
  442. }
  443. while (len >= 32) {
  444. uint64_t val = *(const uint64_t *)bytes;
  445. uint64_t hiBits = (val & 0x8080808080808080ULL);
  446. bytes += 8;
  447. val = *(const uint64_t *)bytes;
  448. hiBits |= (val & 0x8080808080808080ULL);
  449. bytes += 8;
  450. val = *(const uint64_t *)bytes;
  451. hiBits |= (val & 0x8080808080808080ULL);
  452. bytes += 8;
  453. val = *(const uint64_t *)bytes;
  454. if (hiBits | (val & 0x8080808080808080ULL)) return false;
  455. bytes += 8;
  456. len -= 32;
  457. }
  458. while (len >= 16) {
  459. uint64_t val = *(const uint64_t *)bytes;
  460. uint64_t hiBits = (val & 0x8080808080808080ULL);
  461. bytes += 8;
  462. val = *(const uint64_t *)bytes;
  463. if (hiBits | (val & 0x8080808080808080ULL)) return false;
  464. bytes += 8;
  465. len -= 16;
  466. }
  467. while (len >= 8) {
  468. uint64_t val = *(const uint64_t *)bytes;
  469. if (val & 0x8080808080808080ULL) return false;
  470. bytes += 8;
  471. len -= 8;
  472. }
  473. #else // __LP64__
  474. // Align bytes. This is especially important in case of 3 byte BOM.
  475. while (len > 0 && ((size_t)bytes & 0x03)) {
  476. if (*bytes++ & 0x80) return false;
  477. len--;
  478. }
  479. while (len >= 16) {
  480. uint32_t val = *(const uint32_t *)bytes;
  481. uint32_t hiBits = (val & 0x80808080U);
  482. bytes += 4;
  483. val = *(const uint32_t *)bytes;
  484. hiBits |= (val & 0x80808080U);
  485. bytes += 4;
  486. val = *(const uint32_t *)bytes;
  487. hiBits |= (val & 0x80808080U);
  488. bytes += 4;
  489. val = *(const uint32_t *)bytes;
  490. if (hiBits | (val & 0x80808080U)) return false;
  491. bytes += 4;
  492. len -= 16;
  493. }
  494. while (len >= 8) {
  495. uint32_t val = *(const uint32_t *)bytes;
  496. uint32_t hiBits = (val & 0x80808080U);
  497. bytes += 4;
  498. val = *(const uint32_t *)bytes;
  499. if (hiBits | (val & 0x80808080U)) return false;
  500. bytes += 4;
  501. len -= 8;
  502. }
  503. #endif // __LP64__
  504. while (len >= 4) {
  505. uint32_t val = *(const uint32_t *)bytes;
  506. if (val & 0x80808080U) return false;
  507. bytes += 4;
  508. len -= 4;
  509. }
  510. while (len--) {
  511. if (*bytes++ & 0x80) return false;
  512. }
  513. return true;
  514. }
  515. static inline void GPBStringInitStringValue(GPBString *string) {
  516. OSSpinLockLock(&string->lock_);
  517. GPBStringInitStringValueAlreadyLocked(string);
  518. OSSpinLockUnlock(&string->lock_);
  519. }
  520. static void GPBStringInitStringValueAlreadyLocked(GPBString *string) {
  521. if (string->string_ == NULL && string->utf8_ != NULL) {
  522. // Using kCFAllocatorMalloc for contentsDeallocator, as buffer in
  523. // string->utf8_ is being handed off.
  524. string->string_ = CFStringCreateWithBytesNoCopy(
  525. NULL, string->utf8_, string->utf8Len_, kCFStringEncodingUTF8, false,
  526. kCFAllocatorMalloc);
  527. if (!string->string_) {
  528. #ifdef DEBUG
  529. // https://developers.google.com/protocol-buffers/docs/proto#scalar
  530. NSLog(@"UTF8 failure, is some field type 'string' when it should be "
  531. @"'bytes'?");
  532. #endif
  533. string->string_ = CFSTR("");
  534. string->utf8Len_ = 0;
  535. // On failure, we have to clean up the buffer.
  536. free(string->utf8_);
  537. }
  538. string->utf8_ = NULL;
  539. }
  540. }
  541. GPBString *GPBCreateGPBStringWithUTF8(const void *bytes, NSUInteger length) {
  542. GPBString *result = [[GPBString alloc] initWithBytes:bytes length:length];
  543. return result;
  544. }
  545. - (instancetype)initWithBytes:(const void *)bytes length:(NSUInteger)length {
  546. self = [super init];
  547. if (self) {
  548. utf8_ = malloc(length);
  549. memcpy(utf8_, bytes, length);
  550. utf8Len_ = length;
  551. lock_ = OS_SPINLOCK_INIT;
  552. is7BitAscii_ = AreBytesIn7BitASCII(bytes, length);
  553. if (length >= 3 && memcmp(utf8_, "\xef\xbb\xbf", 3) == 0) {
  554. // We can't just remove the BOM from the string here, because in the case
  555. // where we have > 1 BOM at the beginning of the string, we will remove one,
  556. // and the internal NSString we create will remove the next one, and we will
  557. // end up with a GPBString != NSString issue.
  558. // We also just can't remove all the BOMs because then we would end up with
  559. // potential cases where a GPBString and an NSString made with the same
  560. // UTF8 buffer would in fact be different.
  561. // We record the fact we have a BOM, and use it as necessary to simulate
  562. // what NSString would return for various calls.
  563. hasBOM_ = YES;
  564. #if DEBUG
  565. // Sending BOMs across the line is just wasting bits.
  566. NSLog(@"Bad data? String should not have BOM!");
  567. #endif // DEBUG
  568. }
  569. }
  570. return self;
  571. }
  572. - (void)dealloc {
  573. if (string_ != NULL) {
  574. CFRelease(string_);
  575. }
  576. if (utf8_ != NULL) {
  577. free(utf8_);
  578. }
  579. [super dealloc];
  580. }
  581. // Required NSString overrides.
  582. - (NSUInteger)length {
  583. if (is7BitAscii_) {
  584. return utf8Len_;
  585. } else {
  586. GPBStringInitStringValue(self);
  587. return CFStringGetLength(string_);
  588. }
  589. }
  590. - (unichar)characterAtIndex:(NSUInteger)anIndex {
  591. OSSpinLockLock(&lock_);
  592. if (is7BitAscii_ && utf8_) {
  593. unichar result = utf8_[anIndex];
  594. OSSpinLockUnlock(&lock_);
  595. return result;
  596. } else {
  597. GPBStringInitStringValueAlreadyLocked(self);
  598. OSSpinLockUnlock(&lock_);
  599. return CFStringGetCharacterAtIndex(string_, anIndex);
  600. }
  601. }
  602. // Override a couple of methods that typically want high performance.
  603. - (id)copyWithZone:(NSZone *)zone {
  604. GPBStringInitStringValue(self);
  605. return [(NSString *)string_ copyWithZone:zone];
  606. }
  607. - (id)mutableCopyWithZone:(NSZone *)zone {
  608. GPBStringInitStringValue(self);
  609. return [(NSString *)string_ mutableCopyWithZone:zone];
  610. }
  611. - (NSUInteger)hash {
  612. // Must convert to string here to make sure that the hash is always
  613. // consistent no matter what state the GPBString is in.
  614. GPBStringInitStringValue(self);
  615. return CFHash(string_);
  616. }
  617. - (BOOL)isEqual:(id)object {
  618. if (self == object) {
  619. return YES;
  620. }
  621. if ([object isKindOfClass:[NSString class]]) {
  622. GPBStringInitStringValue(self);
  623. return CFStringCompare(string_, (CFStringRef)object, 0) ==
  624. kCFCompareEqualTo;
  625. }
  626. return NO;
  627. }
  628. - (void)getCharacters:(unichar *)buffer range:(NSRange)aRange {
  629. OSSpinLockLock(&lock_);
  630. if (is7BitAscii_ && utf8_) {
  631. unsigned char *bytes = &(utf8_[aRange.location]);
  632. for (NSUInteger i = 0; i < aRange.length; ++i) {
  633. buffer[i] = bytes[i];
  634. }
  635. OSSpinLockUnlock(&lock_);
  636. } else {
  637. GPBStringInitStringValueAlreadyLocked(self);
  638. OSSpinLockUnlock(&lock_);
  639. CFStringGetCharacters(string_, CFRangeMake(aRange.location, aRange.length),
  640. buffer);
  641. }
  642. }
  643. - (NSUInteger)lengthOfBytesUsingEncoding:(NSStringEncoding)encoding {
  644. if ((encoding == NSUTF8StringEncoding) ||
  645. (encoding == NSASCIIStringEncoding && is7BitAscii_)) {
  646. return utf8Len_ - (hasBOM_ ? 3 : 0);
  647. } else {
  648. GPBStringInitStringValue(self);
  649. return [(NSString *)string_ lengthOfBytesUsingEncoding:encoding];
  650. }
  651. }
  652. - (BOOL)getBytes:(void *)buffer
  653. maxLength:(NSUInteger)maxLength
  654. usedLength:(NSUInteger *)usedLength
  655. encoding:(NSStringEncoding)encoding
  656. options:(NSStringEncodingConversionOptions)options
  657. range:(NSRange)range
  658. remainingRange:(NSRangePointer)remainingRange {
  659. // [NSString getBytes:maxLength:usedLength:encoding:options:range:remainingRange]
  660. // does not return reliable results if the maxLength argument is 0
  661. // (Radar 16385183). Therefore we have special cased it as a slow case so
  662. // that it behaves however Apple intends it to behave. It should be a rare
  663. // case.
  664. //
  665. // [NSString getBytes:maxLength:usedLength:encoding:options:range:remainingRange]
  666. // does not return reliable results if the range is outside of the strings
  667. // length (Radar 16396177). Therefore we have special cased it as a slow
  668. // case so that it behaves however Apple intends it to behave. It should
  669. // be a rare case.
  670. //
  671. // We can optimize the UTF8StringEncoding and NSASCIIStringEncoding with no
  672. // options cases.
  673. if ((options == 0) &&
  674. (encoding == NSUTF8StringEncoding || encoding == NSASCIIStringEncoding) &&
  675. (maxLength != 0) &&
  676. (NSMaxRange(range) <= utf8Len_)) {
  677. // Might be able to optimize it.
  678. OSSpinLockLock(&lock_);
  679. if (is7BitAscii_ && utf8_) {
  680. NSUInteger length = range.length;
  681. length = (length < maxLength) ? length : maxLength;
  682. memcpy(buffer, utf8_ + range.location, length);
  683. if (usedLength) {
  684. *usedLength = length;
  685. }
  686. if (remainingRange) {
  687. remainingRange->location = range.location + length;
  688. remainingRange->length = range.length - length;
  689. }
  690. OSSpinLockUnlock(&lock_);
  691. if (length > 0) {
  692. return YES;
  693. } else {
  694. return NO;
  695. }
  696. } else {
  697. GPBStringInitStringValueAlreadyLocked(self);
  698. OSSpinLockUnlock(&lock_);
  699. }
  700. } else {
  701. GPBStringInitStringValue(self);
  702. }
  703. return [(NSString *)string_ getBytes:buffer
  704. maxLength:maxLength
  705. usedLength:usedLength
  706. encoding:encoding
  707. options:options
  708. range:range
  709. remainingRange:remainingRange];
  710. }
  711. @end