gogo_data_scrubber.cc 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. #include "benchmarks.pb.h"
  2. #include "datasets/google_message1/proto2/benchmark_message1_proto2.pb.h"
  3. #include "datasets/google_message1/proto3/benchmark_message1_proto3.pb.h"
  4. #include "datasets/google_message2/benchmark_message2.pb.h"
  5. #include "datasets/google_message3/benchmark_message3.pb.h"
  6. #include "datasets/google_message4/benchmark_message4.pb.h"
  7. #include "google/protobuf/message.h"
  8. #include "google/protobuf/descriptor.h"
  9. #include <fstream>
  10. using google::protobuf::FieldDescriptor;
  11. using google::protobuf::Message;
  12. using google::protobuf::Reflection;
  13. class DataGroupStripper {
  14. public:
  15. static void StripMessage(Message *message) {
  16. std::vector<const FieldDescriptor*> set_fields;
  17. const Reflection* reflection = message->GetReflection();
  18. reflection->ListFields(*message, &set_fields);
  19. for (size_t i = 0; i < set_fields.size(); i++) {
  20. const FieldDescriptor* field = set_fields[i];
  21. if (field->type() == FieldDescriptor::TYPE_GROUP) {
  22. reflection->ClearField(message, field);
  23. }
  24. if (field->type() == FieldDescriptor::TYPE_MESSAGE) {
  25. if (field->is_repeated()) {
  26. for (int j = 0; j < reflection->FieldSize(*message, field); j++) {
  27. StripMessage(reflection->MutableRepeatedMessage(message, field, j));
  28. }
  29. } else {
  30. StripMessage(reflection->MutableMessage(message, field));
  31. }
  32. }
  33. }
  34. reflection->MutableUnknownFields(message)->Clear();
  35. }
  36. };
  37. std::string ReadFile(const std::string& name) {
  38. std::ifstream file(name.c_str());
  39. GOOGLE_CHECK(file.is_open()) << "Couldn't find file '"
  40. << name
  41. << "', please make sure you are running this command from the benchmarks"
  42. << " directory.\n";
  43. return std::string((std::istreambuf_iterator<char>(file)),
  44. std::istreambuf_iterator<char>());
  45. }
  46. int main(int argc, char *argv[]) {
  47. if (argc % 2 == 0 || argc == 1) {
  48. std::cerr << "Usage: [input_files] [output_file_names] where " <<
  49. "input_files are one to one mapping to output_file_names." <<
  50. std::endl;
  51. return 1;
  52. }
  53. for (int i = argc / 2; i > 0; i--) {
  54. const std::string &input_file = argv[i];
  55. const std::string &output_file = argv[i + argc / 2];
  56. std::cerr << "Generating " << input_file
  57. << " to " << output_file << std::endl;
  58. benchmarks::BenchmarkDataset dataset;
  59. Message* message;
  60. std::string dataset_payload = ReadFile(input_file);
  61. GOOGLE_CHECK(dataset.ParseFromString(dataset_payload))
  62. << "Can' t parse data file " << input_file;
  63. if (dataset.message_name() == "benchmarks.proto3.GoogleMessage1") {
  64. message = new benchmarks::proto3::GoogleMessage1;
  65. } else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage1") {
  66. message = new benchmarks::proto2::GoogleMessage1;
  67. } else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage2") {
  68. message = new benchmarks::proto2::GoogleMessage2;
  69. } else if (dataset.message_name() ==
  70. "benchmarks.google_message3.GoogleMessage3") {
  71. message = new benchmarks::google_message3::GoogleMessage3;
  72. } else if (dataset.message_name() ==
  73. "benchmarks.google_message4.GoogleMessage4") {
  74. message = new benchmarks::google_message4::GoogleMessage4;
  75. } else {
  76. std::cerr << "Unknown message type: " << dataset.message_name();
  77. exit(1);
  78. }
  79. for (int i = 0; i < dataset.payload_size(); i++) {
  80. message->ParseFromString(dataset.payload(i));
  81. DataGroupStripper::StripMessage(message);
  82. dataset.set_payload(i, message->SerializeAsString());
  83. }
  84. std::ofstream ofs(output_file);
  85. ofs << dataset.SerializeAsString();
  86. ofs.close();
  87. }
  88. return 0;
  89. }