Sfoglia il codice sorgente

Merge pull request #1391 from thomasvl/string_tweaks

ObjC String followups: one test case only addition, behavior change for invalid UTF-8
Thomas Van Lenten 9 anni fa
parent
commit
28c5c25397

+ 4 - 3
objectivec/GPBCodedInputStream.m

@@ -219,15 +219,16 @@ NSString *GPBCodedInputStreamReadRetainedString(
     result = [[NSString alloc] initWithBytes:&state->bytes[state->bufferPos]
                                       length:size
                                     encoding:NSUTF8StringEncoding];
+    state->bufferPos += size;
     if (!result) {
-      result = @"";
 #ifdef DEBUG
       // https://developers.google.com/protocol-buffers/docs/proto#scalar
-      NSLog(@"UTF8 failure, is some field type 'string' when it should be "
+      NSLog(@"UTF-8 failure, is some field type 'string' when it should be "
             @"'bytes'?");
 #endif
+      [NSException raise:NSParseErrorException
+                  format:@"Invalid UTF-8 for a 'string'"];
     }
-    state->bufferPos += size;
   }
   return result;
 }

+ 43 - 6
objectivec/Tests/GPBCodedInputStreamTests.m

@@ -283,16 +283,53 @@
   [output writeRawData:[NSData dataWithBytes:bytes length:sizeof(bytes)]];
   [output flush];
 
-  NSData* data =
+  NSData *data =
       [rawOutput propertyForKey:NSStreamDataWrittenToMemoryStreamKey];
   GPBCodedInputStream* input = [GPBCodedInputStream streamWithData:data];
+  NSError *error = nil;
   TestAllTypes* message = [TestAllTypes parseFromCodedInputStream:input
                                                 extensionRegistry:nil
-                                                            error:NULL];
-  XCTAssertNotNil(message);
-  // Make sure we can read string properties twice without crashing.
-  XCTAssertEqual([message.defaultString length], (NSUInteger)0);
-  XCTAssertEqualObjects(@"", message.defaultString);
+                                                            error:&error];
+  XCTAssertNotNil(error);
+  XCTAssertNil(message);
+}
+
+- (void)testBOMWithinStrings {
+  // We've seen servers that end up with BOMs within strings (not always at the
+  // start, and sometimes in multiple places), make sure they always parse
+  // correctly. (Again, this is inpart incase a custom string class is ever
+  // used again.)
+  const char* strs[] = {
+    "\xEF\xBB\xBF String with BOM",
+    "String with \xEF\xBB\xBF in middle",
+    "String with end bom \xEF\xBB\xBF",
+    "\xEF\xBB\xBF\xe2\x99\xa1",  // BOM White Heart
+    "\xEF\xBB\xBF\xEF\xBB\xBF String with Two BOM",
+  };
+  for (size_t i = 0; i < GPBARRAYSIZE(strs); ++i) {
+    NSOutputStream* rawOutput = [NSOutputStream outputStreamToMemory];
+    GPBCodedOutputStream* output =
+        [GPBCodedOutputStream streamWithOutputStream:rawOutput];
+
+    int32_t tag = GPBWireFormatMakeTag(TestAllTypes_FieldNumber_DefaultString,
+                                       GPBWireFormatLengthDelimited);
+    [output writeRawVarint32:tag];
+    size_t length = strlen(strs[i]);
+    [output writeRawVarint32:(int32_t)length];
+    [output writeRawData:[NSData dataWithBytes:strs[i] length:length]];
+    [output flush];
+
+    NSData* data =
+        [rawOutput propertyForKey:NSStreamDataWrittenToMemoryStreamKey];
+    GPBCodedInputStream* input = [GPBCodedInputStream streamWithData:data];
+    TestAllTypes* message = [TestAllTypes parseFromCodedInputStream:input
+                                                  extensionRegistry:nil
+                                                              error:NULL];
+    XCTAssertNotNil(message, @"Loop %zd", i);
+    // Ensure the string is there. NSString can consume the BOM in some
+    // cases, so don't actually check the string for exact equality.
+    XCTAssertTrue(message.defaultString.length > 0, @"Loop %zd", i);
+  }
 }
 
 @end