Просмотр исходного кода

JS: Replaced fromCodePoint/codePointAt with fromCharCode/charCodeAt because of functions limited availability, fixed typo in tests.

Wojciech Mandrysz 9 лет назад
Родитель
Сommit
7332ffb1f0
3 измененных файлов с 24 добавлено и 18 удалено
  1. 9 12
      js/binary/decoder.js
  2. 3 3
      js/binary/decoder_test.js
  3. 12 3
      js/binary/encoder.js

+ 9 - 12
js/binary/decoder.js

@@ -905,12 +905,12 @@ jspb.BinaryDecoder.prototype.readString = function(length) {
   var bytes = this.bytes_;
   var cursor = this.cursor_;
   var end = cursor + length;
-  var codepoints = [];
+  var codeUnits = [];
 
   while (cursor < end) {
     var c = bytes[cursor++];
     if (c < 128) { // Regular 7-bit ASCII.
-      codepoints.push(c);
+      codeUnits.push(c);
     } else if (c < 192) {
       // UTF-8 continuation mark. We are out of sync. This
       // might happen if we attempted to read a character
@@ -918,11 +918,11 @@ jspb.BinaryDecoder.prototype.readString = function(length) {
       continue;
     } else if (c < 224) { // UTF-8 with two bytes.
       var c2 = bytes[cursor++];
-      codepoints.push(((c & 31) << 6) | (c2 & 63));
+      codeUnits.push(((c & 31) << 6) | (c2 & 63));
     } else if (c < 240) { // UTF-8 with three bytes.
       var c2 = bytes[cursor++];
       var c3 = bytes[cursor++];
-      codepoints.push(((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63));
+      codeUnits.push(((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63));
     } else if (c < 248) { // UTF-8 with 4 bytes.
       var c2 = bytes[cursor++];
       var c3 = bytes[cursor++];
@@ -932,20 +932,17 @@ jspb.BinaryDecoder.prototype.readString = function(length) {
       var codepoint = ((c & 7) << 18) | ((c2 & 63) << 12) | ((c3 & 63) << 6) | (c4 & 63);
       // Surrogates formula from wikipedia.
       // 1. Subtract 0x10000 from codepoint
-      codepoint -= 65536;
+      codepoint -= 0x10000;
       // 2. Split this into the high 10-bit value and the low 10-bit value
-      var low = codepoint & 1023;
-      var high = (codepoint >> 10) & 1023;
       // 3. Add 0xD800 to the high value to form the high surrogate
-      high += 55296;
       // 4. Add 0xDC00 to the low value to form the low surrogate:
-      low += 56320;
-      codepoints.push(high);
-      codepoints.push(low);
+      var low = (codepoint & 1023) + 0xDC00;
+      var high = ((codepoint >> 10) & 1023) + 0xD800;
+      codeUnits.push(high, low)
     }
   }
 
-  var result = String.fromCodePoint.apply(null, codepoints);
+  var result = String.fromCharCode.apply(null, codeUnits);
   this.cursor_ = cursor;
   return result;
 };

+ 3 - 3
js/binary/decoder_test.js

@@ -218,19 +218,19 @@ describe('binaryDecoderTest', function() {
 
     var ascii = "ASCII should work in 3, 2, 1..."
     var utf8_two_bytes = "©";
-    var utf8_tree_bytes = "❄";
+    var utf8_three_bytes = "❄";
     var utf8_four_bytes = "😁";
     
     encoder.writeString(ascii);
     encoder.writeString(utf8_two_bytes);
-    encoder.writeString(utf8_tree_bytes);
+    encoder.writeString(utf8_three_bytes);
     encoder.writeString(utf8_four_bytes);
     
     var decoder = jspb.BinaryDecoder.alloc(encoder.end());
     
     assertEquals(ascii, decoder.readString(ascii.length));
     assertEquals(utf8_two_bytes, decoder.readString(utf8_two_bytes.length));
-    assertEquals(utf8_tree_bytes, decoder.readString(utf8_tree_bytes.length));
+    assertEquals(utf8_three_bytes, decoder.readString(utf8_three_bytes.length));
     assertEquals(utf8_four_bytes, decoder.readString(utf8_four_bytes.length));
    });
 

+ 12 - 3
js/binary/encoder.js

@@ -409,10 +409,19 @@ jspb.BinaryEncoder.prototype.writeFixedHash64 = function(hash) {
  */
 jspb.BinaryEncoder.prototype.writeString = function(value) {
   var oldLength = this.buffer_.length;
-
-  // UTF16 to UTF8 conversion loop swiped from goog.crypt.stringToUtf8ByteArray.
+ 
   for (var i = 0; i < value.length; i++) {
-    var c = value.codePointAt(i);
+    
+    var c = value.charCodeAt(i);
+    // Look for surrogates
+    if (c >= 0xD800 && c <= 0xDBFF && i + 1 < value.length) {
+      var second = value.charCodeAt(i + 1);
+      if (second >= 0xDC00 && second <= 0xDFFF) { // low surrogate
+        // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
+        c = (c - 0xD800) * 0x400 + second - 0xDC00 + 0x10000;
+      }
+    }
+
     if (c < 128) {
       this.buffer_.push(c);
     } else if (c < 2048) {