瀏覽代碼

Merge pull request #601 from anandolee/master

ignore UTF-8 BOM
Jie Luo 10 年之前
父節點
當前提交
7648852550
共有 2 個文件被更改,包括 35 次插入0 次删除
  1. 26 0
      src/google/protobuf/compiler/parser_unittest.cc
  2. 9 0
      src/google/protobuf/io/tokenizer.cc

+ 26 - 0
src/google/protobuf/compiler/parser_unittest.cc

@@ -229,6 +229,32 @@ TEST_F(ParserTest, WarnIfSyntaxIdentifierOmmitted) {
 
 typedef ParserTest ParseMessageTest;
 
+TEST_F(ParseMessageTest, IgnoreBOM) {
+  char input[] = "   message TestMessage {\n"
+      "  required int32 foo = 1;\n"
+      "}\n";
+  // Set UTF-8 BOM.
+  input[0] = (char)0xEF;
+  input[1] = (char)0xBB;
+  input[2] = (char)0xBF;
+  ExpectParsesTo(input,
+    "message_type {"
+    "  name: \"TestMessage\""
+    "  field { name:\"foo\" label:LABEL_REQUIRED type:TYPE_INT32 number:1 }"
+    "}");
+}
+
+TEST_F(ParseMessageTest, BOMError) {
+  char input[] = "   message TestMessage {\n"
+      "  required int32 foo = 1;\n"
+      "}\n";
+  input[0] = (char)0xEF;
+  ExpectHasErrors(input,
+                  "0:1: Proto file starts with 0xEF but not UTF-8 BOM. "
+                  "Only UTF-8 is accepted for proto file.\n"
+                  "0:0: Expected top-level statement (e.g. \"message\").\n");
+}
+
 TEST_F(ParseMessageTest, SimpleMessage) {
   ExpectParsesTo(
     "message TestMessage {\n"

+ 9 - 0
src/google/protobuf/io/tokenizer.cc

@@ -762,6 +762,15 @@ bool Tokenizer::NextWithComments(string* prev_trailing_comments,
                              next_leading_comments);
 
   if (current_.type == TYPE_START) {
+    // Ignore unicode byte order mark(BOM) if it appears at the file
+    // beginning. Only UTF-8 BOM (0xEF 0xBB 0xBF) is accepted.
+    if (TryConsume((char)0xEF)) {
+      if (!TryConsume((char)0xBB) || !TryConsume((char)0xBF)) {
+        AddError("Proto file starts with 0xEF but not UTF-8 BOM. "
+                 "Only UTF-8 is accepted for proto file.");
+        return false;
+      }
+    }
     collector.DetachFromPrev();
   } else {
     // A comment appearing on the same line must be attached to the previous