瀏覽代碼

Merge pull request #4699 from BSBandme/add_proto2_to_proto3_plugin

Add proto2 to proto3 util
Yilun Chong 7 年之前
父節點
當前提交
a4d16ed886

+ 38 - 7
benchmarks/Makefile.am

@@ -349,11 +349,11 @@ gogo_proto_middleman: protoc-gen-gogoproto
 	oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I$(srcdir) -I$(top_srcdir) --plugin=protoc-gen-gogoproto --gogoproto_out=$$oldpwd/tmp/gogo_proto $(benchmarks_protoc_inputs) $(benchmarks_protoc_inputs_benchmark_wrapper) $(benchmarks_protoc_inputs_proto2) )
 	oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I$(srcdir) -I$(top_srcdir) --plugin=protoc-gen-gogoproto --gogoproto_out=$$oldpwd/tmp/gogo_proto $(benchmarks_protoc_inputs) $(benchmarks_protoc_inputs_benchmark_wrapper) $(benchmarks_protoc_inputs_proto2) )
 	touch gogo_proto_middleman
 	touch gogo_proto_middleman
 
 
-new_data = $$(for data in $(all_data); do echo "tmp$${data\#$(srcdir)}"; done | xargs)
+gogo_data = $$(for data in $(all_data); do echo "tmp/gogo_data$${data\#$(srcdir)}"; done | xargs)
 
 
 generate_gogo_data: protoc_middleman protoc_middleman2 gogo-data-scrubber
 generate_gogo_data: protoc_middleman protoc_middleman2 gogo-data-scrubber
-	mkdir -p `dirname $(new_data)`
-	./gogo-data-scrubber $(all_data) $(new_data)
+	mkdir -p `dirname $(gogo_data)`
+	./gogo-data-scrubber $(all_data) $(gogo_data)
 	touch generate_gogo_data
 	touch generate_gogo_data
 	
 	
 make_tmp_dir_gogo:
 make_tmp_dir_gogo:
@@ -408,8 +408,6 @@ gogoslick_protoc_middleman: make_tmp_dir_gogo $(top_srcdir)/src/protoc$(EXEEXT)
 	oldpwd=`pwd` && ( cd $(srcdir)/tmp/gogo_proto && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$$oldpwd/$(top_srcdir)/src --gogoslick_out=$$oldpwd/tmp/gogoslick $(benchmarks_protoc_inputs_proto2_message4) )
 	oldpwd=`pwd` && ( cd $(srcdir)/tmp/gogo_proto && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$$oldpwd/$(top_srcdir)/src --gogoslick_out=$$oldpwd/tmp/gogoslick $(benchmarks_protoc_inputs_proto2_message4) )
 	touch gogoslick_protoc_middleman
 	touch gogoslick_protoc_middleman
 
 
-gogo_data = $$(find . -type f -name "dataset.*.pb" -path "./tmp/*")
-
 generate-gogo-benchmark-code:
 generate-gogo-benchmark-code:
 	@echo '#! /bin/bash' > generate-gogo-benchmark-code
 	@echo '#! /bin/bash' > generate-gogo-benchmark-code
 	@echo 'cp $(srcdir)/go/go_benchmark_test.go tmp/$$1/benchmark_code/$$1_benchmark1_test.go' >> generate-gogo-benchmark-code
 	@echo 'cp $(srcdir)/go/go_benchmark_test.go tmp/$$1/benchmark_code/$$1_benchmark1_test.go' >> generate-gogo-benchmark-code
@@ -453,7 +451,7 @@ gogoslick: gogoslick_protoc_middleman  generate_gogo_data gogo-benchmark generat
  
  
 ############ UTIL RULES BEGIN ############
 ############ UTIL RULES BEGIN ############
 
 
-bin_PROGRAMS += protoc-gen-gogoproto gogo-data-scrubber
+bin_PROGRAMS += protoc-gen-gogoproto gogo-data-scrubber protoc-gen-proto2_to_proto3 proto3-data-stripper
 
 
 protoc_gen_gogoproto_LDADD = $(top_srcdir)/src/libprotobuf.la $(top_srcdir)/src/libprotoc.la
 protoc_gen_gogoproto_LDADD = $(top_srcdir)/src/libprotobuf.la $(top_srcdir)/src/libprotoc.la
 protoc_gen_gogoproto_SOURCES = util/protoc-gen-gogoproto.cc
 protoc_gen_gogoproto_SOURCES = util/protoc-gen-gogoproto.cc
@@ -468,9 +466,40 @@ nodist_gogo_data_scrubber_SOURCES =                                        \
 	$(benchmarks_protoc_outputs_proto2)                                      \
 	$(benchmarks_protoc_outputs_proto2)                                      \
 	$(benchmarks_protoc_outputs_proto2_header)                               \
 	$(benchmarks_protoc_outputs_proto2_header)                               \
 	$(benchmarks_protoc_outputs_header)
 	$(benchmarks_protoc_outputs_header)
+
+protoc_gen_proto2_to_proto3_LDADD = $(top_srcdir)/src/libprotobuf.la $(top_srcdir)/src/libprotoc.la
+protoc_gen_proto2_to_proto3_SOURCES = util/protoc-gen-proto2_to_proto3.cc
+protoc_gen_proto2_to_proto3_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir)/cpp -I$(srcdir)/util
+
+proto3_data_stripper_LDADD = $(top_srcdir)/src/libprotobuf.la
+proto3_data_stripper_SOURCES = util/proto3_data_stripper.cc
+proto3_data_stripper_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir)/cpp -I$(srcdir)/util
+util/proto3_data_stripper-proto3_data_stripper.$(OBJEXT): $(benchmarks_protoc_outputs) $(benchmarks_protoc_outputs_proto2) $(benchmarks_protoc_outputs_header) $(benchmarks_protoc_outputs_proto2_header)
+nodist_proto3_data_stripper_SOURCES =                                      \
+	$(benchmarks_protoc_outputs)                                             \
+	$(benchmarks_protoc_outputs_proto2)                                      \
+	$(benchmarks_protoc_outputs_proto2_header)                               \
+	$(benchmarks_protoc_outputs_header)
+
 	
 	
 ############ UTIL RULES END ############
 ############ UTIL RULES END ############
 
 
+############ PROTO3 PREPARATION BEGIN #############
+
+proto3_proto_middleman: protoc-gen-proto2_to_proto3
+	mkdir -p "tmp/proto3_proto"
+	oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I$(srcdir) -I$(top_srcdir) --plugin=protoc-gen-proto2_to_proto3 --proto2_to_proto3_out=$$oldpwd/tmp/proto3_proto $(benchmarks_protoc_inputs) $(benchmarks_protoc_inputs_benchmark_wrapper) $(benchmarks_protoc_inputs_proto2) )
+	touch proto3_proto_middleman
+
+proto3_data = $$(for data in $(all_data); do echo "tmp/proto3_data$${data\#$(srcdir)}"; done | xargs)
+
+generate_proto3_data: protoc_middleman protoc_middleman2 proto3-data-stripper
+	mkdir -p `dirname $(proto3_data)`
+	./proto3-data-stripper $(all_data) $(proto3_data)
+	touch generate_proto3_data
+
+############ PROTO3 PREPARATION END #############
+
 MAINTAINERCLEANFILES =                                                     \
 MAINTAINERCLEANFILES =                                                     \
 	Makefile.in
 	Makefile.in
 
 
@@ -513,7 +542,9 @@ CLEANFILES =                                                               \
 	gogoslick_protoc_middleman                                               \
 	gogoslick_protoc_middleman                                               \
 	gogoslick                                                                \
 	gogoslick                                                                \
 	gogo-benchmark                                                           \
 	gogo-benchmark                                                           \
-	gogo/cpp_no_group/cpp_benchmark.* 
+	gogo/cpp_no_group/cpp_benchmark.*                                        \
+	proto3_proto_middleman                                                   \
+	generate_proto3_data
 	
 	
 
 
 clean-local:
 clean-local:

+ 64 - 0
benchmarks/util/data_proto2_to_proto3_util.h

@@ -0,0 +1,64 @@
+#ifndef PROTOBUF_BENCHMARKS_UTIL_DATA_PROTO2_TO_PROTO3_UTIL_H_
+#define PROTOBUF_BENCHMARKS_UTIL_DATA_PROTO2_TO_PROTO3_UTIL_H_
+
+#include "google/protobuf/message.h"
+#include "google/protobuf/descriptor.h"
+
+using google::protobuf::FieldDescriptor;
+using google::protobuf::Message;
+using google::protobuf::Reflection;
+
+namespace google {
+namespace protobuf {
+namespace util {
+
+class DataStripper {
+ public:
+  void StripMessage(Message *message) {
+    std::vector<const FieldDescriptor*> set_fields;
+    const Reflection* reflection = message->GetReflection();
+    reflection->ListFields(*message, &set_fields);
+
+    for (size_t i = 0; i < set_fields.size(); i++) {
+      const FieldDescriptor* field = set_fields[i];
+      if (ShouldBeClear(field)) {
+        reflection->ClearField(message, field);
+        continue;
+      }
+      if (field->type() == FieldDescriptor::TYPE_MESSAGE) {
+        if (field->is_repeated()) {
+          for (int j = 0; j < reflection->FieldSize(*message, field); j++) {
+            StripMessage(reflection->MutableRepeatedMessage(message, field, j));
+          }
+        } else {
+          StripMessage(reflection->MutableMessage(message, field));
+        }
+      }
+    }
+
+    reflection->MutableUnknownFields(message)->Clear();
+  }
+ private:
+  virtual bool ShouldBeClear(const FieldDescriptor *field) = 0;
+};
+
+class GogoDataStripper : public DataStripper {
+ private:
+  virtual bool ShouldBeClear(const FieldDescriptor *field) {
+    return field->type() == FieldDescriptor::TYPE_GROUP;
+  }
+};
+
+class Proto3DataStripper : public DataStripper {
+ private:
+  virtual bool ShouldBeClear(const FieldDescriptor *field) {
+    return field->type() == FieldDescriptor::TYPE_GROUP ||
+           field->is_extension();
+  }
+};
+
+}  // namespace util
+}  // namespace protobuf
+}  // namespace google
+
+#endif  // PROTOBUF_BENCHMARKS_UTIL_DATA_PROTO2_TO_PROTO3_UTIL_H_

+ 4 - 35
benchmarks/util/gogo_data_scrubber.cc

@@ -4,43 +4,11 @@
 #include "datasets/google_message2/benchmark_message2.pb.h"
 #include "datasets/google_message2/benchmark_message2.pb.h"
 #include "datasets/google_message3/benchmark_message3.pb.h"
 #include "datasets/google_message3/benchmark_message3.pb.h"
 #include "datasets/google_message4/benchmark_message4.pb.h"
 #include "datasets/google_message4/benchmark_message4.pb.h"
-
-#include "google/protobuf/message.h"
-#include "google/protobuf/descriptor.h"
+#include "data_proto2_to_proto3_util.h"
 
 
 #include <fstream>
 #include <fstream>
 
 
-using google::protobuf::FieldDescriptor;
-using google::protobuf::Message;
-using google::protobuf::Reflection;
-
-
-class DataGroupStripper {
- public:
-  static void StripMessage(Message *message) {
-    std::vector<const FieldDescriptor*> set_fields;
-    const Reflection* reflection = message->GetReflection();
-    reflection->ListFields(*message, &set_fields);
-
-    for (size_t i = 0; i < set_fields.size(); i++) {
-      const FieldDescriptor* field = set_fields[i];
-      if (field->type() == FieldDescriptor::TYPE_GROUP) {
-        reflection->ClearField(message, field);
-      }
-      if (field->type() == FieldDescriptor::TYPE_MESSAGE) {
-        if (field->is_repeated()) {
-          for (int j = 0; j < reflection->FieldSize(*message, field); j++) {
-            StripMessage(reflection->MutableRepeatedMessage(message, field, j));
-          }
-        } else {
-          StripMessage(reflection->MutableMessage(message, field));
-        }
-      }
-    }
-
-    reflection->MutableUnknownFields(message)->Clear();
-  }
-};
+using google::protobuf::util::GogoDataStripper;
 
 
 std::string ReadFile(const std::string& name) {
 std::string ReadFile(const std::string& name) {
   std::ifstream file(name.c_str());
   std::ifstream file(name.c_str());
@@ -91,7 +59,8 @@ int main(int argc, char *argv[]) {
 
 
     for (int i = 0; i < dataset.payload_size(); i++) {
     for (int i = 0; i < dataset.payload_size(); i++) {
       message->ParseFromString(dataset.payload(i));
       message->ParseFromString(dataset.payload(i));
-      DataGroupStripper::StripMessage(message);
+      GogoDataStripper stripper;
+      stripper.StripMessage(message);
       dataset.set_payload(i, message->SerializeAsString());
       dataset.set_payload(i, message->SerializeAsString());
     }
     }
 
 

+ 74 - 0
benchmarks/util/proto3_data_stripper.cc

@@ -0,0 +1,74 @@
+#include "benchmarks.pb.h"
+#include "datasets/google_message1/proto2/benchmark_message1_proto2.pb.h"
+#include "datasets/google_message1/proto3/benchmark_message1_proto3.pb.h"
+#include "datasets/google_message2/benchmark_message2.pb.h"
+#include "datasets/google_message3/benchmark_message3.pb.h"
+#include "datasets/google_message4/benchmark_message4.pb.h"
+#include "data_proto2_to_proto3_util.h"
+
+#include <fstream>
+
+using google::protobuf::util::Proto3DataStripper;
+
+std::string ReadFile(const std::string& name) {
+  std::ifstream file(name.c_str());
+  GOOGLE_CHECK(file.is_open()) << "Couldn't find file '"
+      << name
+      << "', please make sure you are running this command from the benchmarks"
+      << " directory.\n";
+  return std::string((std::istreambuf_iterator<char>(file)),
+                     std::istreambuf_iterator<char>());
+}
+
+int main(int argc, char *argv[]) {
+  if (argc % 2 == 0 || argc == 1) {
+    std::cerr << "Usage: [input_files] [output_file_names] where " <<
+        "input_files are one to one mapping to output_file_names." <<
+        std::endl;
+    return 1;
+  }
+
+  for (int i = argc / 2; i > 0; i--) {
+    const std::string &input_file = argv[i];
+    const std::string &output_file = argv[i + argc / 2];
+
+    std::cerr << "Generating " << input_file
+        << " to " << output_file << std::endl;
+    benchmarks::BenchmarkDataset dataset;
+    Message* message;
+    std::string dataset_payload = ReadFile(input_file);
+    GOOGLE_CHECK(dataset.ParseFromString(dataset_payload))
+      << "Can' t parse data file " << input_file;
+
+    if (dataset.message_name() == "benchmarks.proto3.GoogleMessage1") {
+      message = new benchmarks::proto3::GoogleMessage1;
+    } else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage1") {
+      message = new benchmarks::proto2::GoogleMessage1;
+    } else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage2") {
+      message = new benchmarks::proto2::GoogleMessage2;
+    } else if (dataset.message_name() ==
+        "benchmarks.google_message3.GoogleMessage3") {
+      message = new benchmarks::google_message3::GoogleMessage3;
+    } else if (dataset.message_name() ==
+        "benchmarks.google_message4.GoogleMessage4") {
+      message = new benchmarks::google_message4::GoogleMessage4;
+    } else {
+      std::cerr << "Unknown message type: " << dataset.message_name();
+      exit(1);
+    }
+
+    for (int i = 0; i < dataset.payload_size(); i++) {
+      message->ParseFromString(dataset.payload(i));
+      Proto3DataStripper stripper;
+      stripper.StripMessage(message);
+      dataset.set_payload(i, message->SerializeAsString());
+    }
+
+    std::ofstream ofs(output_file);
+    ofs << dataset.SerializeAsString();
+    ofs.close();
+  }
+
+
+  return 0;
+}

+ 2 - 2
benchmarks/util/protoc-gen-gogoproto.cc

@@ -12,7 +12,7 @@ using google::protobuf::FileDescriptor;
 using google::protobuf::DescriptorPool;
 using google::protobuf::DescriptorPool;
 using google::protobuf::io::Printer;
 using google::protobuf::io::Printer;
 using google::protobuf::util::SchemaGroupStripper;
 using google::protobuf::util::SchemaGroupStripper;
-using google::protobuf::util::SchemaAddZeroEnumValue;
+using google::protobuf::util::EnumScrubber;
 
 
 namespace google {
 namespace google {
 namespace protobuf {
 namespace protobuf {
@@ -74,7 +74,7 @@ class GoGoProtoGenerator : public CodeGenerator {
     file->CopyTo(&new_file);
     file->CopyTo(&new_file);
     SchemaGroupStripper::StripFile(file, &new_file);
     SchemaGroupStripper::StripFile(file, &new_file);
 
 
-    SchemaAddZeroEnumValue enum_scrubber;
+    EnumScrubber enum_scrubber;
     enum_scrubber.ScrubFile(&new_file);
     enum_scrubber.ScrubFile(&new_file);
 
 
     string filename = file->name();
     string filename = file->name();

+ 115 - 0
benchmarks/util/protoc-gen-proto2_to_proto3.cc

@@ -0,0 +1,115 @@
+#include "google/protobuf/compiler/code_generator.h"
+#include "google/protobuf/io/zero_copy_stream.h"
+#include "google/protobuf/io/printer.h"
+#include "google/protobuf/descriptor.h"
+#include "google/protobuf/descriptor.pb.h"
+#include "schema_proto2_to_proto3_util.h"
+
+#include "google/protobuf/compiler/plugin.h"
+
+using google::protobuf::FileDescriptorProto;
+using google::protobuf::FileDescriptor;
+using google::protobuf::DescriptorPool;
+using google::protobuf::io::Printer;
+using google::protobuf::util::SchemaGroupStripper;
+using google::protobuf::util::EnumScrubber;
+using google::protobuf::util::ExtensionStripper;
+using google::protobuf::util::FieldScrubber;
+
+namespace google {
+namespace protobuf {
+namespace compiler {
+
+namespace {
+
+string StripProto(string filename) {
+  return filename.substr(0, filename.rfind(".proto"));
+}
+
+DescriptorPool* GetPool() {
+  static DescriptorPool *pool = new DescriptorPool();
+  return pool;
+}
+
+}  // namespace
+
+class Proto2ToProto3Generator final : public CodeGenerator {
+ public:
+  bool GenerateAll(const std::vector<const FileDescriptor*>& files,
+                           const string& parameter,
+                           GeneratorContext* context,
+                           string* error) const {
+    for (int i = 0; i < files.size(); i++) {
+      for (auto file : files) {
+        if (CanGenerate(file)) {
+          Generate(file, parameter, context, error);
+          break;
+        }
+      }
+    }
+
+    return true;
+  }
+
+  bool Generate(const FileDescriptor* file,
+                        const string& parameter,
+                        GeneratorContext* context,
+                        string* error) const {
+    FileDescriptorProto new_file;
+    file->CopyTo(&new_file);
+    SchemaGroupStripper::StripFile(file, &new_file);
+
+    EnumScrubber enum_scrubber;
+    enum_scrubber.ScrubFile(&new_file);
+    ExtensionStripper::StripFile(&new_file);
+    FieldScrubber::ScrubFile(&new_file);
+    new_file.set_syntax("proto3");
+
+    string filename = file->name();
+    string basename = StripProto(filename);
+
+    std::vector<std::pair<string,string>> option_pairs;
+    ParseGeneratorParameter(parameter, &option_pairs);
+
+    std::unique_ptr<google::protobuf::io::ZeroCopyOutputStream> output(
+        context->Open(basename + ".proto"));
+    string content = GetPool()->BuildFile(new_file)->DebugString();
+    Printer printer(output.get(), '$');
+    printer.WriteRaw(content.c_str(), content.size());
+
+    return true;
+  }
+ private:
+  bool CanGenerate(const FileDescriptor* file) const {
+    if (GetPool()->FindFileByName(file->name()) != nullptr) {
+      return false;
+    }
+    for (int j = 0; j < file->dependency_count(); j++) {
+      if (GetPool()->FindFileByName(file->dependency(j)->name()) == nullptr) {
+        return false;
+      }
+    }
+    for (int j = 0; j < file->public_dependency_count(); j++) {
+      if (GetPool()->FindFileByName(
+          file->public_dependency(j)->name()) == nullptr) {
+        return false;
+      }
+    }
+    for (int j = 0; j < file->weak_dependency_count(); j++) {
+      if (GetPool()->FindFileByName(
+          file->weak_dependency(j)->name()) == nullptr) {
+        return false;
+      }
+    }
+    return true;
+  }
+};
+
+}  // namespace compiler
+}  // namespace protobuf
+}  // namespace google
+
+int main(int argc, char* argv[]) {
+  google::protobuf::compiler::Proto2ToProto3Generator generator;
+  return google::protobuf::compiler::PluginMain(argc, argv, &generator);
+}

+ 59 - 2
benchmarks/util/schema_proto2_to_proto3_util.h

@@ -74,10 +74,10 @@ class SchemaGroupStripper {
 
 
 };
 };
 
 
-class SchemaAddZeroEnumValue {
+class EnumScrubber {
 
 
  public:
  public:
-  SchemaAddZeroEnumValue()
+  EnumScrubber()
       : total_added_(0) {
       : total_added_(0) {
   }
   }
 
 
@@ -130,6 +130,63 @@ class SchemaAddZeroEnumValue {
   int total_added_;
   int total_added_;
 };
 };
 
 
+class ExtensionStripper {
+ public:
+  static void StripFile(FileDescriptorProto *file) {
+    for (int i = 0; i < file->mutable_message_type()->size(); i++) {
+      StripMessage(file->mutable_message_type(i));
+    }
+    file->mutable_extension()->Clear();
+  }
+ private:
+  static void StripMessage(DescriptorProto *message_type) {
+    message_type->mutable_extension()->Clear();
+    message_type->clear_extension_range();
+    for (int i = 0; i < message_type->mutable_nested_type()->size(); i++) {
+      StripMessage(message_type->mutable_nested_type(i));
+    }
+  }
+};
+
+
+class FieldScrubber {
+ public:
+  static void ScrubFile(FileDescriptorProto *file) {
+    for (int i = 0; i < file->mutable_message_type()->size(); i++) {
+      ScrubMessage(file->mutable_message_type(i));
+    }
+    for (int i = 0; i < file->mutable_extension()->size(); i++) {
+      file->mutable_extension(i)->clear_default_value();
+      if (ShouldClearLabel(file->mutable_extension(i))) {
+        file->mutable_extension(i)->clear_label();
+      }
+    }
+  }
+ private:
+  static bool ShouldClearLabel(const FieldDescriptorProto *field) {
+    return field->label() == FieldDescriptorProto::LABEL_REQUIRED;
+  }
+
+  static void ScrubMessage(DescriptorProto *message_type) {
+    message_type->mutable_extension()->Clear();
+    for (int i = 0; i < message_type->mutable_extension()->size(); i++) {
+      message_type->mutable_extension(i)->clear_default_value();
+      if (ShouldClearLabel(message_type->mutable_extension(i))) {
+        message_type->mutable_extension(i)->clear_label();
+      }
+    }
+    for (int i = 0; i < message_type->mutable_field()->size(); i++) {
+      message_type->mutable_field(i)->clear_default_value();
+      if (ShouldClearLabel(message_type->mutable_field(i))) {
+        message_type->mutable_field(i)->clear_label();
+      }
+    }
+    for (int i = 0; i < message_type->mutable_nested_type()->size(); i++) {
+      ScrubMessage(message_type->mutable_nested_type(i));
+    }
+  }
+};
+
 }  // namespace util
 }  // namespace util
 }  // namespace protobuf
 }  // namespace protobuf
 }  // namespace google
 }  // namespace google