فهرست منبع

Merge pull request #1934 from thomasvl/objc_strings_with_null

Never use strlen on utf8 runs so null characters work.
Thomas Van Lenten 9 سال پیش
والد
کامیت
30bbbe99e2
3فایلهای تغییر یافته به همراه100 افزوده شده و 33 حذف شده
  1. 5 32
      objectivec/GPBCodedOutputStream.m
  2. 9 1
      objectivec/GPBUtilities.m
  3. 86 0
      objectivec/Tests/GPBCodedOuputStreamTests.m

+ 5 - 32
objectivec/GPBCodedOutputStream.m

@@ -144,22 +144,6 @@ static void GPBWriteRawLittleEndian64(GPBOutputBufferState *state,
   GPBWriteRawByte(state, (int32_t)(value >> 56) & 0xFF);
 }
 
-#if defined(DEBUG) && DEBUG && !defined(NS_BLOCK_ASSERTIONS)
-+ (void)load {
-  // This test exists to verify that CFStrings with embedded NULLs will work
-  // for us. If this Assert fails, all code below that depends on
-  // CFStringGetCStringPtr will NOT work properly on strings that contain
-  // embedded NULLs, and we do get that in some protobufs.
-  // Note that this will not be compiled in release.
-  // We didn't feel that just keeping it in a unit test was sufficient because
-  // the Protobuf unit tests are only run when somebody is actually working
-  // on protobufs.
-  CFStringRef zeroTest = CFSTR("Test\0String");
-  const char *cString = CFStringGetCStringPtr(zeroTest, kCFStringEncodingUTF8);
-  NSAssert(cString == NULL, @"Serious Error");
-}
-#endif  // DEBUG && !defined(NS_BLOCK_ASSERTIONS)
-
 - (void)dealloc {
   [self flush];
   [state_.output close];
@@ -282,19 +266,15 @@ static void GPBWriteRawLittleEndian64(GPBOutputBufferState *state,
 }
 
 - (void)writeStringNoTag:(const NSString *)value {
-  // If you are concerned about embedded NULLs see the test in
-  // +load above.
-  const char *quickString =
-      CFStringGetCStringPtr((CFStringRef)value, kCFStringEncodingUTF8);
-  size_t length = (quickString != NULL)
-                      ? strlen(quickString)
-                      : [value lengthOfBytesUsingEncoding:NSUTF8StringEncoding];
+  size_t length = [value lengthOfBytesUsingEncoding:NSUTF8StringEncoding];
   GPBWriteRawVarint32(&state_, (int32_t)length);
-
   if (length == 0) {
     return;
   }
 
+  const char *quickString =
+      CFStringGetCStringPtr((CFStringRef)value, kCFStringEncodingUTF8);
+
   // Fast path: Most strings are short, if the buffer already has space,
   // add to it directly.
   NSUInteger bufferBytesLeft = state_.size - state_.position;
@@ -1038,14 +1018,7 @@ size_t GPBComputeBoolSizeNoTag(BOOL value) {
 }
 
 size_t GPBComputeStringSizeNoTag(NSString *value) {
-  // If you are concerned about embedded NULLs see the test in
-  // +load above.
-  const char *quickString =
-      CFStringGetCStringPtr((CFStringRef)value, kCFStringEncodingUTF8);
-  NSUInteger length =
-      (quickString != NULL)
-          ? strlen(quickString)
-          : [value lengthOfBytesUsingEncoding:NSUTF8StringEncoding];
+  NSUInteger length = [value lengthOfBytesUsingEncoding:NSUTF8StringEncoding];
   return GPBComputeRawVarint32SizeForInteger(length) + length;
 }
 

+ 9 - 1
objectivec/GPBUtilities.m

@@ -1052,7 +1052,15 @@ static void AppendStringEscaped(NSString *toPrint, NSMutableString *destStr) {
       case '\'': [destStr appendString:@"\\\'"]; break;
       case '\\': [destStr appendString:@"\\\\"]; break;
       default:
-        [destStr appendFormat:@"%C", aChar];
+        // This differs slightly from the C++ code in that the C++ doesn't
+        // generate UTF8; it looks at the string in UTF8, but escapes every
+        // byte > 0x7E.
+        if (aChar < 0x20) {
+          [destStr appendFormat:@"\\%d%d%d",
+                                (aChar / 64), ((aChar % 64) / 8), (aChar % 8)];
+        } else {
+          [destStr appendFormat:@"%C", aChar];
+        }
         break;
     }
   }

+ 86 - 0
objectivec/Tests/GPBCodedOuputStreamTests.m

@@ -193,6 +193,32 @@
   }
 }
 
+- (void)assertWriteStringNoTag:(NSData*)data
+                         value:(NSString *)value
+                       context:(NSString *)contextMessage {
+  NSOutputStream* rawOutput = [NSOutputStream outputStreamToMemory];
+  GPBCodedOutputStream* output =
+      [GPBCodedOutputStream streamWithOutputStream:rawOutput];
+  [output writeStringNoTag:value];
+  [output flush];
+
+  NSData* actual =
+      [rawOutput propertyForKey:NSStreamDataWrittenToMemoryStreamKey];
+  XCTAssertEqualObjects(data, actual, @"%@", contextMessage);
+
+  // Try different block sizes.
+  for (int blockSize = 1; blockSize <= 16; blockSize *= 2) {
+    rawOutput = [NSOutputStream outputStreamToMemory];
+    output = [GPBCodedOutputStream streamWithOutputStream:rawOutput
+                                               bufferSize:blockSize];
+    [output writeStringNoTag:value];
+    [output flush];
+
+    actual = [rawOutput propertyForKey:NSStreamDataWrittenToMemoryStreamKey];
+    XCTAssertEqualObjects(data, actual, @"%@", contextMessage);
+  }
+}
+
 - (void)testWriteVarint1 {
   [self assertWriteVarint:bytes(0x00) value:0];
 }
@@ -337,4 +363,64 @@
   XCTAssertEqualObjects(rawBytes, goldenData);
 }
 
+- (void)testCFStringGetCStringPtrAndStringsWithNullChars {
+  // This test exists to verify that CFStrings with embedded NULLs still expose
+  // their raw buffer if they are backed by UTF8 storage. If this fails, the
+  // quick/direct access paths in GPBCodedOutputStream that depend on
+  // CFStringGetCStringPtr need to be re-evalutated (maybe just removed).
+  // And yes, we do get NULLs in strings from some servers.
+
+  char zeroTest[] = "\0Test\0String";
+  // Note: there is a \0 at the end of this since it is a c-string.
+  NSString *asNSString = [[NSString alloc] initWithBytes:zeroTest
+                                                  length:sizeof(zeroTest)
+                                                encoding:NSUTF8StringEncoding];
+  const char *cString =
+      CFStringGetCStringPtr((CFStringRef)asNSString, kCFStringEncodingUTF8);
+  XCTAssertTrue(cString != NULL);
+  // Again, if the above assert fails, then it means NSString no longer exposes
+  // the raw utf8 storage of a string created from utf8 input, so the code using
+  // CFStringGetCStringPtr in GPBCodedOutputStream will still work (it will take
+  // a different code path); but the optimizations for when
+  // CFStringGetCStringPtr does work could possibly go away.
+
+  XCTAssertEqual(sizeof(zeroTest),
+                 [asNSString lengthOfBytesUsingEncoding:NSUTF8StringEncoding]);
+  XCTAssertTrue(0 == memcmp(cString, zeroTest, sizeof(zeroTest)));
+  [asNSString release];
+}
+
+- (void)testWriteStringsWithZeroChar {
+  // Unicode allows `\0` as a character, and NSString is a class cluster, so
+  // there are a few different classes that could end up beind a given string.
+  // Historically, we've seen differences based on constant strings in code and
+  // strings built via the NSString apis. So this round trips them to ensure
+  // they are acting as expected.
+
+  NSArray<NSString *> *strs = @[
+    @"\0at start",
+    @"in\0middle",
+    @"at end\0",
+  ];
+  int i = 0;
+  for (NSString *str in strs) {
+    NSData *asUTF8 = [str dataUsingEncoding:NSUTF8StringEncoding];
+    NSMutableData *expected = [NSMutableData data];
+    uint8_t lengthByte = (uint8_t)asUTF8.length;
+    [expected appendBytes:&lengthByte length:1];
+    [expected appendData:asUTF8];
+
+    NSString *context = [NSString stringWithFormat:@"Loop %d - Literal", i];
+    [self assertWriteStringNoTag:expected value:str context:context];
+
+    // Force a new string to be built which gets a different class from the
+    // NSString class cluster than the literal did.
+    NSString *str2 = [NSString stringWithFormat:@"%@", str];
+    context = [NSString stringWithFormat:@"Loop %d - Built", i];
+    [self assertWriteStringNoTag:expected value:str2 context:context];
+
+    ++i;
+  }
+}
+
 @end