Browse Source

Speed up JSON parsing.

It turns out calling StringOutputStream::Next()/BackUp() repeatedly is
very costly in opensource protobuf because it keeps resize() the string
back and forth. The current JSON conversion API suffers this problem and
leads to ridiculously long parsing time:
https://github.com/google/protobuf/issues/2305#issuecomment-257785492

This change fixes the problem but caching the buffer of Next() and avoid
calling BackUp() as much as possible.
Feng Xiao 9 years ago
parent
commit
bd158fc238
2 changed files with 20 additions and 14 deletions
  1. 16 13
      src/google/protobuf/util/json_util.cc
  2. 4 1
      src/google/protobuf/util/json_util.h

+ 16 - 13
src/google/protobuf/util/json_util.cc

@@ -49,22 +49,25 @@ namespace protobuf {
 namespace util {
 namespace util {
 
 
 namespace internal {
 namespace internal {
+ZeroCopyStreamByteSink::~ZeroCopyStreamByteSink() {
+  stream_->BackUp(buffer_size_);
+}
+
 void ZeroCopyStreamByteSink::Append(const char* bytes, size_t len) {
 void ZeroCopyStreamByteSink::Append(const char* bytes, size_t len) {
-  while (len > 0) {
-    void* buffer;
-    int length;
-    if (!stream_->Next(&buffer, &length)) {
-      // There isn't a way for ByteSink to report errors.
+  while (true) {
+    if (len <= buffer_size_) {
+      memcpy(buffer_, bytes, len);
+      buffer_ = static_cast<char*>(buffer_) + len;
+      buffer_size_ -= len;
       return;
       return;
     }
     }
-    if (len < length) {
-      memcpy(buffer, bytes, len);
-      stream_->BackUp(length - len);
-      break;
-    } else {
-      memcpy(buffer, bytes, length);
-      bytes += length;
-      len -= length;
+    memcpy(buffer_, bytes, buffer_size_);
+    bytes += buffer_size_;
+    len -= buffer_size_;
+    if (!stream_->Next(&buffer_, &buffer_size_)) {
+      // There isn't a way for ByteSink to report errors.
+      buffer_size_ = 0;
+      return;
     }
     }
   }
   }
 }
 }

+ 4 - 1
src/google/protobuf/util/json_util.h

@@ -172,12 +172,15 @@ namespace internal {
 class LIBPROTOBUF_EXPORT ZeroCopyStreamByteSink : public strings::ByteSink {
 class LIBPROTOBUF_EXPORT ZeroCopyStreamByteSink : public strings::ByteSink {
  public:
  public:
   explicit ZeroCopyStreamByteSink(io::ZeroCopyOutputStream* stream)
   explicit ZeroCopyStreamByteSink(io::ZeroCopyOutputStream* stream)
-      : stream_(stream) {}
+      : stream_(stream), buffer_size_(0) {}
+  ~ZeroCopyStreamByteSink();
 
 
   virtual void Append(const char* bytes, size_t len);
   virtual void Append(const char* bytes, size_t len);
 
 
  private:
  private:
   io::ZeroCopyOutputStream* stream_;
   io::ZeroCopyOutputStream* stream_;
+  void* buffer_;
+  int buffer_size_;
 
 
   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ZeroCopyStreamByteSink);
   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ZeroCopyStreamByteSink);
 };
 };