Browse Source

Add python benchmark

Yilun Chong 7 years ago
parent
commit
2fc69b1561

+ 76 - 3
benchmarks/Makefile.am

@@ -39,11 +39,11 @@ else
 # relative to srcdir, which may not be the same as the current directory when
 # relative to srcdir, which may not be the same as the current directory when
 # building out-of-tree.
 # building out-of-tree.
 protoc_middleman: make_tmp_dir $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs) $(well_known_type_protoc_inputs)
 protoc_middleman: make_tmp_dir $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs) $(well_known_type_protoc_inputs)
-	oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd --java_out=$$oldpwd/tmp/java/src/main/java $(benchmarks_protoc_inputs) )
+	oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd --java_out=$$oldpwd/tmp/java/src/main/java --python_out=$$oldpwd/tmp $(benchmarks_protoc_inputs) )
 	touch protoc_middleman
 	touch protoc_middleman
 
 
 protoc_middleman2:  make_tmp_dir $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs_proto2) $(well_known_type_protoc_inputs)
 protoc_middleman2:  make_tmp_dir $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs_proto2) $(well_known_type_protoc_inputs)
-	oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd --java_out=$$oldpwd/tmp/java/src/main/java $(benchmarks_protoc_inputs_proto2) )
+	oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd --java_out=$$oldpwd/tmp/java/src/main/java --python_out=$$oldpwd/tmp $(benchmarks_protoc_inputs_proto2) )
 	touch protoc_middleman2
 	touch protoc_middleman2
 	
 	
 endif
 endif
@@ -155,6 +155,75 @@ java: protoc_middleman protoc_middleman2 java-benchmark
 
 
 ############# JAVA RULES END ##############
 ############# JAVA RULES END ##############
 
 
+
+############# PYTHON RULES ##############
+
+python_add_init: protoc_middleman protoc_middleman2
+	all_file=`find tmp -type f -regex '.*\.py'` &&                   \
+	for file in $${all_file[@]}; do                                  \
+		path="$${file%/*}";                                            \
+		while true; do                                                 \
+			touch "$$path/__init__.py" && chmod +x "$$path/__init__.py"; \
+			if [[ $$path != *"/"* ]]; then break; fi;                    \
+			path=$${path%/*};                                            \
+		done                                                           \
+	done
+
+python_cpp_pkg_flags = `pkg-config --cflags --libs python`
+
+lib_LTLIBRARIES = libbenchmark_messages.la
+libbenchmark_messages_la_SOURCES = python_benchmark_messages.cc
+libbenchmark_messages_la_LIBADD = $(top_srcdir)/src/.libs/libprotobuf.la 
+libbenchmark_messages_la_LDFLAGS = -version-info 1:0:0 -export-dynamic
+libbenchmark_messages_la_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir) $(python_cpp_pkg_flags)
+libbenchmark_messages_la-libbenchmark_messages_la.$(OBJEXT): $(benchmarks_protoc_outputs_header) $(benchmarks_protoc_outputs_proto2_header) $(benchmarks_protoc_outputs) $(benchmarks_protoc_outputs_proto2)
+nodist_libbenchmark_messages_la_SOURCES =                         \
+	$(benchmarks_protoc_outputs)                                    \
+	$(benchmarks_protoc_outputs_proto2)                             \
+	$(benchmarks_protoc_outputs_proto2_header)                      \
+	$(benchmarks_protoc_outputs_header)
+
+python-pure-python-benchmark: python_add_init
+	@echo "Writing shortcut script python-pure-python-benchmark..."
+	@echo '#! /bin/sh' > python-pure-python-benchmark
+	@echo export LD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-pure-python-benchmark
+	@echo export DYLD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-pure-python-benchmark
+	@echo export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=\'python\' >> python-pure-python-benchmark
+	@echo cp py_benchmark.py tmp >> python-pure-python-benchmark
+	@echo python tmp/py_benchmark.py false '$$@' >> python-pure-python-benchmark
+	@chmod +x python-pure-python-benchmark
+
+python-cpp-reflection-benchmark: python_add_init
+	@echo "Writing shortcut script python-cpp-reflection-benchmark..."
+	@echo '#! /bin/sh' > python-cpp-reflection-benchmark
+	@echo export LD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-cpp-reflection-benchmark
+	@echo export DYLD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-cpp-reflection-benchmark
+	@echo export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=\'cpp\' >> python-cpp-reflection-benchmark
+	@echo cp py_benchmark.py tmp >> python-cpp-reflection-benchmark
+	@echo python tmp/py_benchmark.py false '$$@' >> python-cpp-reflection-benchmark
+	@chmod +x python-cpp-reflection-benchmark
+
+python-cpp-generated-code-benchmark: python_add_init libbenchmark_messages.la
+	@echo "Writing shortcut script python-cpp-generated-code-benchmark..."
+	@echo '#! /bin/sh' > python-cpp-generated-code-benchmark
+	@echo export LD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-cpp-generated-code-benchmark
+	@echo export DYLD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-cpp-generated-code-benchmark
+	@echo export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=\'cpp\' >> python-cpp-generated-code-benchmark
+	@echo cp py_benchmark.py tmp >> python-cpp-generated-code-benchmark
+	@echo python tmp/py_benchmark.py true '$$@' >> python-cpp-generated-code-benchmark
+	@chmod +x python-cpp-generated-code-benchmark
+
+python-pure-python: python-pure-python-benchmark
+	./python-pure-python-benchmark $(all_data)
+
+python-cpp-reflection: python-cpp-reflection-benchmark
+	./python-cpp-reflection-benchmark $(all_data)
+
+python-cpp-generated-code: python-cpp-generated-code-benchmark
+	./python-cpp-generated-code-benchmark $(all_data)
+
+############# PYTHON RULES END ##############
+
 MAINTAINERCLEANFILES =                                                     \
 MAINTAINERCLEANFILES =                                                     \
 	Makefile.in
 	Makefile.in
 
 
@@ -168,7 +237,11 @@ CLEANFILES =                                                               \
 	protoc_middleman                                                         \
 	protoc_middleman                                                         \
 	protoc_middleman2                                                        \
 	protoc_middleman2                                                        \
 	javac_middleman                                                          \
 	javac_middleman                                                          \
-	java-benchmark
+	java-benchmark                                                           \
+	python_cpp_proto_library                                                 \
+	python-pure-python-benchmark                                             \
+	python-cpp-reflection-benchmark                                          \
+	python-cpp-generated-code-benchmark
 
 
 clean-local:
 clean-local:
 	-rm -rf tmp/*
 	-rm -rf tmp/*

+ 64 - 5
benchmarks/README.md

@@ -17,12 +17,25 @@ We are using [google/benchmark](https://github.com/google/benchmark) as the
 benchmark tool for testing cpp. This will be automaticly made during build the 
 benchmark tool for testing cpp. This will be automaticly made during build the 
 cpp benchmark.
 cpp benchmark.
 
 
-### JAVA
+### Java
 We're using maven to build the java benchmarks, which is the same as to build 
 We're using maven to build the java benchmarks, which is the same as to build 
 the Java protobuf. There're no other tools need to install. We're using 
 the Java protobuf. There're no other tools need to install. We're using 
 [google/caliper](https://github.com/google/caliper) as benchmark tool, which 
 [google/caliper](https://github.com/google/caliper) as benchmark tool, which 
 can be automaticly included by maven.
 can be automaticly included by maven.
 
 
+### Python
+We're using python C++ API for testing the generated 
+CPP proto version of python protobuf, which is also a prerequisite for Python
+protobuf cpp implementation. You need to install the correct version of Python 
+C++ extension package before run generated CPP proto version of Python 
+protobuf's benchmark. e.g. under Ubuntu, you need to 
+
+```
+$ sudo apt-get install python-dev 
+$ sudo apt-get install python3-dev
+```
+And you also need to make sure `pkg-config` is installed.  
+
 ### Big data
 ### Big data
 
 
 There's some optional big testing data which is not included in the directory initially, you need to 
 There's some optional big testing data which is not included in the directory initially, you need to 
@@ -38,34 +51,80 @@ After doing this the big data file will automaticly generated in the benchmark d
 
 
 To run all the benchmark dataset:
 To run all the benchmark dataset:
 
 
-For java:
+### Java:
 
 
 ```
 ```
 $ make java
 $ make java
 ```
 ```
 
 
-For cpp:
+### CPP:
 
 
 ```
 ```
 $ make cpp
 $ make cpp
 ```
 ```
 
 
+### Python:
+
+We have three versions of python protobuf implementation: pure python, cpp reflection and 
+cpp generated code. To run these version benchmark, you need to:
+
+#### Pure Python:
+
+```
+$ make python-pure-python
+```
+
+#### CPP reflection:
+
+```
+$ make python-cpp-reflection
+```
+
+#### CPP generated code:
+
+```
+$ make python-cpp-generated-code
+```        
+  
 To run a specific dataset:
 To run a specific dataset:
 
 
-For java:
+### Java:
 
 
 ```
 ```
 $ make java-benchmark
 $ make java-benchmark
 $ ./java-benchmark $(specific generated dataset file name) [-- $(caliper option)]
 $ ./java-benchmark $(specific generated dataset file name) [-- $(caliper option)]
 ```
 ```
 
 
-For cpp:
+### CPP:
 
 
 ```
 ```
 $ make cpp-benchmark
 $ make cpp-benchmark
 $ ./cpp-benchmark $(specific generated dataset file name)
 $ ./cpp-benchmark $(specific generated dataset file name)
 ```
 ```
 
 
+### Python:
+
+#### Pure Python:
+
+```
+$ make python-pure-python-benchmark
+$ ./python-pure-python-benchmark $(specific generated dataset file name)
+```
+
+#### CPP reflection:
+
+```
+$ make python-cpp-reflection-benchmark
+$ ./python-cpp-reflection-benchmark $(specific generated dataset file name)
+```
+
+#### CPP generated code:
+
+```
+$ make python-cpp-generated-code-benchmark
+$ ./python-cpp-generated-code-benchmark $(specific generated dataset file name)
+```
+
 ## Benchmark datasets
 ## Benchmark datasets
 
 
 Each data set is in the format of benchmarks.proto:
 Each data set is in the format of benchmarks.proto:

+ 0 - 0
benchmarks/__init__.py


+ 115 - 0
benchmarks/py_benchmark.py

@@ -0,0 +1,115 @@
+import sys
+import os
+import timeit
+import math
+import fnmatch
+
+# CPP generated code must be linked before importing the generated Python code
+# for the descriptor can be found in the pool
+if len(sys.argv) < 2:
+  raise IOError("Need string argument \"true\" or \"false\" for whether to use cpp generated code")
+if sys.argv[1] == "true":
+  sys.path.append( os.path.dirname( os.path.dirname( os.path.abspath(__file__) ) ) + "/.libs" )
+  import libbenchmark_messages
+  sys.path.append( os.path.dirname( os.path.dirname( os.path.abspath(__file__) ) ) + "/tmp" )
+elif sys.argv[1] != "false":
+  raise IOError("Need string argument \"true\" or \"false\" for whether to use cpp generated code")
+
+import datasets.google_message1.benchmark_message1_proto2_pb2 as benchmark_message1_proto2_pb2
+import datasets.google_message1.benchmark_message1_proto3_pb2 as benchmark_message1_proto3_pb2
+import datasets.google_message2.benchmark_message2_pb2 as benchmark_message2_pb2
+import datasets.google_message3.benchmark_message3_pb2 as benchmark_message3_pb2
+import datasets.google_message4.benchmark_message4_pb2 as benchmark_message4_pb2
+import benchmarks_pb2
+
+
+def run_one_test(filename):
+  data = open(os.path.dirname(sys.argv[0]) + "/../" + filename).read()
+  benchmark_dataset = benchmarks_pb2.BenchmarkDataset()
+  benchmark_dataset.ParseFromString(data)
+  benchmark_util = Benchmark(full_iteration=len(benchmark_dataset.payload),
+                             module="py_benchmark",
+                             setup_method="init")
+  print "Message %s of dataset file %s" % \
+    (benchmark_dataset.message_name, filename)
+  benchmark_util.set_test_method("parse_from_benchmark")
+  print benchmark_util.run_benchmark(setup_method_args='"%s"' % (filename))
+  benchmark_util.set_test_method("serialize_to_benchmark")
+  print benchmark_util.run_benchmark(setup_method_args='"%s"' % (filename))
+  print ""
+  
+def init(filename): 
+  global benchmark_dataset, message_class, message_list, counter
+  message_list=[]
+  counter = 0
+  data = open(os.path.dirname(sys.argv[0]) + "/../" + filename).read()
+  benchmark_dataset = benchmarks_pb2.BenchmarkDataset()
+  benchmark_dataset.ParseFromString(data)
+     
+  if benchmark_dataset.message_name == "benchmarks.proto3.GoogleMessage1":
+    message_class = benchmark_message1_proto3_pb2.GoogleMessage1
+  elif benchmark_dataset.message_name == "benchmarks.proto2.GoogleMessage1":
+    message_class = benchmark_message1_proto2_pb2.GoogleMessage1
+  elif benchmark_dataset.message_name == "benchmarks.proto2.GoogleMessage2":
+    message_class = benchmark_message2_pb2.GoogleMessage2
+  elif benchmark_dataset.message_name == "benchmarks.google_message3.GoogleMessage3":
+    message_class = benchmark_message3_pb2.GoogleMessage3  
+  elif benchmark_dataset.message_name == "benchmarks.google_message4.GoogleMessage4":
+    message_class = benchmark_message4_pb2.GoogleMessage4
+  else: 
+    raise IOError("Message %s not found!" % (benchmark_dataset.message_name))
+   
+  for one_payload in benchmark_dataset.payload:
+    temp = message_class()
+    temp.ParseFromString(one_payload)
+    message_list.append(temp)
+  
+def parse_from_benchmark():
+  global counter, message_class, benchmark_dataset
+  m = message_class().ParseFromString(benchmark_dataset.payload[counter % len(benchmark_dataset.payload)])
+  counter = counter + 1
+  
+def serialize_to_benchmark():
+  global counter, message_list, message_class
+  s = message_list[counter % len(benchmark_dataset.payload)].SerializeToString()
+  counter = counter + 1
+ 
+
+class Benchmark:
+  def __init__(self, module=None, test_method=None, 
+               setup_method=None, full_iteration = 1):
+    self.full_iteration = full_iteration
+    self.module = module
+    self.test_method = test_method
+    self.setup_method = setup_method
+    
+  def set_test_method(self, test_method):
+    self.test_method = test_method
+  
+  def full_setup_code(self, setup_method_args=''):
+    setup_code = ""
+    setup_code += "from %s import %s\n" % (self.module, self.test_method)
+    setup_code += "from %s import %s\n" % (self.module, self.setup_method)
+    setup_code += "%s(%s)\n" % (self.setup_method, setup_method_args)
+    return setup_code 
+  
+  def dry_run(self, test_method_args='', setup_method_args=''):
+    return timeit.timeit(stmt="%s(%s)" % (self.test_method, test_method_args),
+                         setup=self.full_setup_code(setup_method_args),
+                         number=self.full_iteration);
+  
+  def run_benchmark(self, test_method_args='', setup_method_args=''):
+    reps = self.full_iteration; 
+    t = self.dry_run(test_method_args, setup_method_args);
+    if t < 3 :
+      reps = int(math.ceil(3 / t)) * self.full_iteration 
+    t = timeit.timeit(stmt="%s(%s)" % (self.test_method, test_method_args),
+                      setup=self.full_setup_code(setup_method_args),
+                      number=reps);
+    return "Average time for %s: %.2f ns" % \
+      (self.test_method, 1.0 * t / reps * (10 ** 9))
+  
+  
+if __name__ == "__main__":
+  for i in range(2, len(sys.argv)):
+    run_one_test(sys.argv[i])

+ 29 - 0
benchmarks/python_benchmark_messages.cc

@@ -0,0 +1,29 @@
+#include <Python.h>
+
+#include "benchmarks.pb.h"
+#include "datasets/google_message1/benchmark_message1_proto2.pb.h"
+#include "datasets/google_message1/benchmark_message1_proto3.pb.h"
+#include "datasets/google_message2/benchmark_message2.pb.h"
+#include "datasets/google_message3/benchmark_message3.pb.h"
+#include "datasets/google_message4/benchmark_message4.pb.h"
+
+static PyMethodDef python_benchmark_methods[] = {
+    {NULL, NULL, 0, NULL}        /* Sentinel */
+};
+
+
+PyMODINIT_FUNC
+initlibbenchmark_messages() {
+  benchmarks::BenchmarkDataset().descriptor();
+  benchmarks::proto3::GoogleMessage1().descriptor();
+  benchmarks::proto2::GoogleMessage1().descriptor();
+  benchmarks::proto2::GoogleMessage2().descriptor();
+  benchmarks::google_message3::GoogleMessage3().descriptor();
+  benchmarks::google_message4::GoogleMessage4().descriptor();
+
+  PyObject *m;
+
+  m = Py_InitModule("libbenchmark_messages", python_benchmark_methods);
+  if (m == NULL)
+      return;
+}