Browse Source

Surrogate checking is unpredictable, so always manually check.

Josh Haberman 9 years ago
parent
commit
4833b4c003
2 changed files with 6 additions and 24 deletions
  1. 0 16
      conformance/failure_list_python.txt
  2. 6 8
      python/google/protobuf/json_format.py

+ 0 - 16
conformance/failure_list_python.txt

@@ -43,21 +43,5 @@ JsonInput.Uint32FieldMaxFloatValue.JsonOutput
 JsonInput.Uint32FieldMaxFloatValue.ProtobufOutput
 JsonInput.ValueAcceptNull.JsonOutput
 JsonInput.ValueAcceptNull.ProtobufOutput
-ProtobufInput.PrematureEofInDelimitedDataForKnownNonRepeatedValue.MESSAGE
-ProtobufInput.PrematureEofInDelimitedDataForKnownRepeatedValue.MESSAGE
-ProtobufInput.PrematureEofInPackedField.BOOL
-ProtobufInput.PrematureEofInPackedField.DOUBLE
-ProtobufInput.PrematureEofInPackedField.ENUM
-ProtobufInput.PrematureEofInPackedField.FIXED32
-ProtobufInput.PrematureEofInPackedField.FIXED64
-ProtobufInput.PrematureEofInPackedField.FLOAT
-ProtobufInput.PrematureEofInPackedField.INT32
-ProtobufInput.PrematureEofInPackedField.INT64
-ProtobufInput.PrematureEofInPackedField.SFIXED32
-ProtobufInput.PrematureEofInPackedField.SFIXED64
-ProtobufInput.PrematureEofInPackedField.SINT32
-ProtobufInput.PrematureEofInPackedField.SINT64
-ProtobufInput.PrematureEofInPackedField.UINT32
-ProtobufInput.PrematureEofInPackedField.UINT64
 TimestampProtoInputTooLarge.JsonOutput
 TimestampProtoInputTooSmall.JsonOutput

+ 6 - 8
python/google/protobuf/json_format.py

@@ -69,10 +69,9 @@ _INFINITY = 'Infinity'
 _NEG_INFINITY = '-Infinity'
 _NAN = 'NaN'
 
-if sys.version_info < (3, 0):
-  _UNPAIRED_SURROGATE_PATTERN = re.compile(six.u(
-      r'[\ud800-\udbff](?![\udc00-\udfff])|(?<![\ud800-\udbff])[\udc00-\udfff]'
-  ))
+_UNPAIRED_SURROGATE_PATTERN = re.compile(six.u(
+    r'[\ud800-\udbff](?![\udc00-\udfff])|(?<![\ud800-\udbff])[\udc00-\udfff]'
+))
 
 class Error(Exception):
   """Top-level module error for json_format."""
@@ -559,13 +558,12 @@ def _ConvertScalarFieldValue(value, field, require_str=False):
   elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:
     if field.type == descriptor.FieldDescriptor.TYPE_BYTES:
       return base64.b64decode(value)
-    elif sys.version_info < (3, 0):
-      # Python 2.x does not detect unpaired surrogates when JSON parsing.
+    else:
+      # Checking for unpaired surrogates appears to be unreliable,
+      # depending on the specific Python version, so we check manually.
       if _UNPAIRED_SURROGATE_PATTERN.search(value):
         raise ParseError('Unpaired surrogate')
       return value
-    else:
-      return value
   elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:
     # Convert an enum value.
     enum_value = field.enum_type.values_by_name.get(value, None)