result_parser.py 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
  1. # This import depends on the automake rule protoc_middleman, please make sure
  2. # protoc_middleman has been built before run this file.
  3. import json
  4. import re
  5. import os.path
  6. # BEGIN OPENSOURCE
  7. import sys
  8. sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))
  9. # END OPENSOURCE
  10. import tmp.benchmarks_pb2 as benchmarks_pb2
  11. __file_size_map = {}
  12. def __get_data_size(filename):
  13. if filename[0] != '/':
  14. filename = os.path.dirname(os.path.abspath(__file__)) + "/../" + filename
  15. if filename in __file_size_map:
  16. return __file_size_map[filename]
  17. benchmark_dataset = benchmarks_pb2.BenchmarkDataset()
  18. benchmark_dataset.ParseFromString(
  19. open(filename).read())
  20. size = 0
  21. count = 0
  22. for payload in benchmark_dataset.payload:
  23. size += len(payload)
  24. count += 1
  25. __file_size_map[filename] = (size, 1.0 * size / count)
  26. return size, 1.0 * size / count
  27. def __extract_file_name(file_name):
  28. name_list = re.split("[/\.]", file_name)
  29. short_file_name = ""
  30. for name in name_list:
  31. if name[:14] == "google_message":
  32. short_file_name = name
  33. return short_file_name
  34. __results = []
  35. # CPP results example:
  36. # [
  37. # "benchmarks": [
  38. # {
  39. # "bytes_per_second": int,
  40. # "cpu_time_ns": double,
  41. # "iterations": int,
  42. # "name: string,
  43. # "real_time_ns: double,
  44. # ...
  45. # },
  46. # ...
  47. # ],
  48. # ...
  49. # ]
  50. def __parse_cpp_result(filename):
  51. if filename == "":
  52. return
  53. if filename[0] != '/':
  54. filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename
  55. with open(filename) as f:
  56. results = json.loads(f.read())
  57. for benchmark in results["benchmarks"]:
  58. data_filename = "".join(
  59. re.split("(_parse_|_serialize)", benchmark["name"])[0])
  60. behavior = benchmark["name"][len(data_filename) + 1:]
  61. if data_filename[:2] == "BM":
  62. data_filename = data_filename[3:]
  63. __results.append({
  64. "language": "cpp",
  65. "dataFilename": data_filename,
  66. "behavior": behavior,
  67. "throughput": benchmark["bytes_per_second"] / 2.0 ** 20
  68. })
  69. # Synthetic benchmark results example:
  70. # [
  71. # "benchmarks": [
  72. # {
  73. # "cpu_time_ns": double,
  74. # "iterations": int,
  75. # "name: string,
  76. # "real_time_ns: double,
  77. # ...
  78. # },
  79. # ...
  80. # ],
  81. # ...
  82. # ]
  83. def __parse_synthetic_result(filename):
  84. if filename == "":
  85. return
  86. if filename[0] != "/":
  87. filename = os.path.dirname(os.path.abspath(__file__)) + "/" + filename
  88. with open(filename) as f:
  89. results = json.loads(f.read())
  90. for benchmark in results["benchmarks"]:
  91. __results.append({
  92. "language": "cpp",
  93. "dataFilename": "",
  94. "behavior": "synthetic",
  95. "throughput": 10.0**9 / benchmark["cpu_time_ns"]
  96. })
  97. # Python results example:
  98. # [
  99. # [
  100. # {
  101. # "filename": string,
  102. # "benchmarks": {
  103. # behavior: results,
  104. # ...
  105. # },
  106. # },
  107. # ...
  108. # ], #pure-python
  109. # ...
  110. # ]
  111. def __parse_python_result(filename):
  112. if filename == "":
  113. return
  114. if filename[0] != '/':
  115. filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename
  116. with open(filename) as f:
  117. results_list = json.loads(f.read())
  118. for results in results_list:
  119. for result in results:
  120. _, avg_size = __get_data_size(result["filename"])
  121. for behavior in result["benchmarks"]:
  122. __results.append({
  123. "language": "python",
  124. "dataFilename": __extract_file_name(result["filename"]),
  125. "behavior": behavior,
  126. "throughput": result["benchmarks"][behavior]
  127. })
  128. # Java results example:
  129. # [
  130. # {
  131. # "id": string,
  132. # "instrumentSpec": {...},
  133. # "measurements": [
  134. # {
  135. # "weight": float,
  136. # "value": {
  137. # "magnitude": float,
  138. # "unit": string
  139. # },
  140. # ...
  141. # },
  142. # ...
  143. # ],
  144. # "run": {...},
  145. # "scenario": {
  146. # "benchmarkSpec": {
  147. # "methodName": string,
  148. # "parameters": {
  149. # defined parameters in the benchmark: parameters value
  150. # },
  151. # ...
  152. # },
  153. # ...
  154. # }
  155. #
  156. # },
  157. # ...
  158. # ]
  159. def __parse_java_result(filename):
  160. if filename == "":
  161. return
  162. if filename[0] != '/':
  163. filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename
  164. with open(filename) as f:
  165. results = json.loads(f.read())
  166. for result in results:
  167. total_weight = 0
  168. total_value = 0
  169. for measurement in result["measurements"]:
  170. total_weight += measurement["weight"]
  171. total_value += measurement["value"]["magnitude"]
  172. avg_time = total_value * 1.0 / total_weight
  173. total_size, _ = __get_data_size(
  174. result["scenario"]["benchmarkSpec"]["parameters"]["dataFile"])
  175. __results.append({
  176. "language": "java",
  177. "throughput": total_size / avg_time * 1e9 / 2 ** 20,
  178. "behavior": result["scenario"]["benchmarkSpec"]["methodName"],
  179. "dataFilename": __extract_file_name(
  180. result["scenario"]["benchmarkSpec"]["parameters"]["dataFile"])
  181. })
  182. # Go benchmark results:
  183. #
  184. # goos: linux
  185. # goarch: amd64
  186. # Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Unmarshal-12 3000 705784 ns/op
  187. # Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Marshal-12 2000 634648 ns/op
  188. # Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Size-12 5000 244174 ns/op
  189. # Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Clone-12 300 4120954 ns/op
  190. # Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Merge-12 300 4108632 ns/op
  191. # PASS
  192. # ok _/usr/local/google/home/yilunchong/mygit/protobuf/benchmarks 124.173s
  193. def __parse_go_result(filename):
  194. if filename == "":
  195. return
  196. if filename[0] != '/':
  197. filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename
  198. with open(filename) as f:
  199. for line in f:
  200. result_list = re.split("[\ \t]+", line)
  201. if result_list[0][:9] != "Benchmark":
  202. continue
  203. first_slash_index = result_list[0].find('/')
  204. last_slash_index = result_list[0].rfind('/')
  205. full_filename = result_list[0][first_slash_index+1:last_slash_index]
  206. total_bytes, _ = __get_data_size(full_filename)
  207. behavior_with_suffix = result_list[0][last_slash_index+1:]
  208. last_dash = behavior_with_suffix.rfind("-")
  209. if last_dash == -1:
  210. behavior = behavior_with_suffix
  211. else:
  212. behavior = behavior_with_suffix[:last_dash]
  213. __results.append({
  214. "dataFilename": __extract_file_name(full_filename),
  215. "throughput": total_bytes / float(result_list[2]) * 1e9 / 2 ** 20,
  216. "behavior": behavior,
  217. "language": "go"
  218. })
  219. # Self built json results example:
  220. #
  221. # [
  222. # {
  223. # "filename": string,
  224. # "benchmarks": {
  225. # behavior: results,
  226. # ...
  227. # },
  228. # },
  229. # ...
  230. # ]
  231. def __parse_custom_result(filename, language):
  232. if filename == "":
  233. return
  234. if filename[0] != '/':
  235. filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename
  236. with open(filename) as f:
  237. results = json.loads(f.read())
  238. for result in results:
  239. _, avg_size = __get_data_size(result["filename"])
  240. for behavior in result["benchmarks"]:
  241. __results.append({
  242. "language": language,
  243. "dataFilename": __extract_file_name(result["filename"]),
  244. "behavior": behavior,
  245. "throughput": result["benchmarks"][behavior]
  246. })
  247. def __parse_js_result(filename, language):
  248. return __parse_custom_result(filename, language)
  249. def __parse_php_result(filename, language):
  250. return __parse_custom_result(filename, language)
  251. def get_result_from_file(cpp_file="",
  252. java_file="",
  253. python_file="",
  254. go_file="",
  255. synthetic_file="",
  256. node_file="",
  257. php_c_file="",
  258. php_file=""):
  259. results = {}
  260. if cpp_file != "":
  261. __parse_cpp_result(cpp_file)
  262. if java_file != "":
  263. __parse_java_result(java_file)
  264. if python_file != "":
  265. __parse_python_result(python_file)
  266. if go_file != "":
  267. __parse_go_result(go_file)
  268. if synthetic_file != "":
  269. __parse_synthetic_result(synthetic_file)
  270. if node_file != "":
  271. __parse_js_result(node_file, "node")
  272. if php_file != "":
  273. __parse_php_result(php_file, "php")
  274. if php_c_file != "":
  275. __parse_php_result(php_c_file, "php")
  276. return __results