run_and_upload.py 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292
  1. from __future__ import print_function
  2. from __future__ import absolute_import
  3. import argparse
  4. import os
  5. import re
  6. import copy
  7. import uuid
  8. import calendar
  9. import time
  10. from . import big_query_utils
  11. import datetime
  12. import json
  13. # This import depends on the automake rule protoc_middleman, please make sure
  14. # protoc_middleman has been built before run this file.
  15. import os.path, sys
  16. sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))
  17. import tmp.benchmarks_pb2 as benchmarks_pb2
  18. from click.types import STRING
  19. _PROJECT_ID = 'grpc-testing'
  20. _DATASET = 'protobuf_benchmark_result'
  21. _TABLE = 'opensource_result_v1'
  22. _NOW = "%d%02d%02d" % (datetime.datetime.now().year,
  23. datetime.datetime.now().month,
  24. datetime.datetime.now().day)
  25. file_size_map = {}
  26. def get_data_size(file_name):
  27. if file_name in file_size_map:
  28. return file_size_map[file_name]
  29. benchmark_dataset = benchmarks_pb2.BenchmarkDataset()
  30. benchmark_dataset.ParseFromString(
  31. open(os.path.dirname(os.path.abspath(__file__)) + "/../" + file_name).read())
  32. size = 0
  33. count = 0
  34. for payload in benchmark_dataset.payload:
  35. size += len(payload)
  36. count += 1
  37. file_size_map[file_name] = (size, 1.0 * size / count)
  38. return size, 1.0 * size / count
  39. def extract_file_name(file_name):
  40. name_list = re.split("[/\.]", file_name)
  41. short_file_name = ""
  42. for name in name_list:
  43. if name[:14] == "google_message":
  44. short_file_name = name
  45. return short_file_name
  46. cpp_result = []
  47. python_result = []
  48. java_result = []
  49. go_result = []
  50. # CPP results example:
  51. # [
  52. # "benchmarks": [
  53. # {
  54. # "bytes_per_second": int,
  55. # "cpu_time": int,
  56. # "name: string,
  57. # "time_unit: string,
  58. # ...
  59. # },
  60. # ...
  61. # ],
  62. # ...
  63. # ]
  64. def parse_cpp_result(filename):
  65. global cpp_result
  66. if filename == "":
  67. return
  68. if filename[0] != '/':
  69. filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename
  70. with open(filename) as f:
  71. results = json.loads(f.read())
  72. for benchmark in results["benchmarks"]:
  73. data_filename = "".join(
  74. re.split("(_parse_|_serialize)", benchmark["name"])[0])
  75. behavior = benchmark["name"][len(data_filename) + 1:]
  76. cpp_result.append({
  77. "language": "cpp",
  78. "dataFileName": data_filename,
  79. "behavior": behavior,
  80. "throughput": benchmark["bytes_per_second"] / 2.0 ** 20
  81. })
  82. # Python results example:
  83. # [
  84. # [
  85. # {
  86. # "filename": string,
  87. # "benchmarks": {
  88. # behavior: results,
  89. # ...
  90. # },
  91. # "message_name": STRING
  92. # },
  93. # ...
  94. # ], #pure-python
  95. # ...
  96. # ]
  97. def parse_python_result(filename):
  98. global python_result
  99. if filename == "":
  100. return
  101. if filename[0] != '/':
  102. filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename
  103. with open(filename) as f:
  104. results_list = json.loads(f.read())
  105. for results in results_list:
  106. for result in results:
  107. _, avg_size = get_data_size(result["filename"])
  108. for behavior in result["benchmarks"]:
  109. python_result.append({
  110. "language": "python",
  111. "dataFileName": extract_file_name(result["filename"]),
  112. "behavior": behavior,
  113. "throughput": avg_size /
  114. result["benchmarks"][behavior] * 1e9 / 2 ** 20
  115. })
  116. # Java results example:
  117. # [
  118. # {
  119. # "id": string,
  120. # "instrumentSpec": {...},
  121. # "measurements": [
  122. # {
  123. # "weight": float,
  124. # "value": {
  125. # "magnitude": float,
  126. # "unit": string
  127. # },
  128. # ...
  129. # },
  130. # ...
  131. # ],
  132. # "run": {...},
  133. # "scenario": {
  134. # "benchmarkSpec": {
  135. # "methodName": string,
  136. # "parameters": {
  137. # defined parameters in the benchmark: parameters value
  138. # },
  139. # ...
  140. # },
  141. # ...
  142. # }
  143. #
  144. # },
  145. # ...
  146. # ]
  147. def parse_java_result(filename):
  148. global average_bytes_per_message, java_result
  149. if filename == "":
  150. return
  151. if filename[0] != '/':
  152. filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename
  153. with open(filename) as f:
  154. results = json.loads(f.read())
  155. for result in results:
  156. total_weight = 0
  157. total_value = 0
  158. for measurement in result["measurements"]:
  159. total_weight += measurement["weight"]
  160. total_value += measurement["value"]["magnitude"]
  161. avg_time = total_value * 1.0 / total_weight
  162. total_size, _ = get_data_size(
  163. result["scenario"]["benchmarkSpec"]["parameters"]["dataFile"])
  164. java_result.append({
  165. "language": "java",
  166. "throughput": total_size / avg_time * 1e9 / 2 ** 20,
  167. "behavior": result["scenario"]["benchmarkSpec"]["methodName"],
  168. "dataFileName": extract_file_name(
  169. result["scenario"]["benchmarkSpec"]["parameters"]["dataFile"])
  170. })
  171. # Go benchmark results:
  172. #
  173. # goos: linux
  174. # goarch: amd64
  175. # Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Unmarshal-12 3000 705784 ns/op
  176. # Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Marshal-12 2000 634648 ns/op
  177. # Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Size-12 5000 244174 ns/op
  178. # Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Clone-12 300 4120954 ns/op
  179. # Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Merge-12 300 4108632 ns/op
  180. # PASS
  181. # ok _/usr/local/google/home/yilunchong/mygit/protobuf/benchmarks 124.173s
  182. def parse_go_result(filename):
  183. global go_result
  184. if filename == "":
  185. return
  186. if filename[0] != '/':
  187. filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename
  188. with open(filename) as f:
  189. for line in f:
  190. result_list = re.split("[\ \t]+", line)
  191. if result_list[0][:9] != "Benchmark":
  192. continue
  193. first_slash_index = result_list[0].find('/')
  194. last_slash_index = result_list[0].rfind('/')
  195. full_filename = result_list[0][first_slash_index+4:last_slash_index] # delete ../ prefix
  196. total_bytes, _ = get_data_size(full_filename)
  197. behavior_with_suffix = result_list[0][last_slash_index+1:]
  198. last_dash = behavior_with_suffix.rfind("-")
  199. if last_dash == -1:
  200. behavior = behavior_with_suffix
  201. else:
  202. behavior = behavior_with_suffix[:last_dash]
  203. go_result.append({
  204. "dataFilename": extract_file_name(full_filename),
  205. "throughput": total_bytes / float(result_list[2]) * 1e9 / 2 ** 20,
  206. "behavior": behavior,
  207. "language": "go"
  208. })
  209. def get_metadata():
  210. build_number = os.getenv('BUILD_NUMBER')
  211. build_url = os.getenv('BUILD_URL')
  212. job_name = os.getenv('JOB_NAME')
  213. git_commit = os.getenv('GIT_COMMIT')
  214. # actual commit is the actual head of PR that is getting tested
  215. git_actual_commit = os.getenv('ghprbActualCommit')
  216. utc_timestamp = str(calendar.timegm(time.gmtime()))
  217. metadata = {'created': utc_timestamp}
  218. if build_number:
  219. metadata['buildNumber'] = build_number
  220. if build_url:
  221. metadata['buildUrl'] = build_url
  222. if job_name:
  223. metadata['jobName'] = job_name
  224. if git_commit:
  225. metadata['gitCommit'] = git_commit
  226. if git_actual_commit:
  227. metadata['gitActualCommit'] = git_actual_commit
  228. return metadata
  229. def upload_result(result_list, metadata):
  230. for result in result_list:
  231. new_result = copy.deepcopy(result)
  232. new_result['metadata'] = metadata
  233. bq = big_query_utils.create_big_query()
  234. row = big_query_utils.make_row(str(uuid.uuid4()), new_result)
  235. if not big_query_utils.insert_rows(bq, _PROJECT_ID, _DATASET,
  236. _TABLE + "$" + _NOW,
  237. [row]):
  238. print('Error when uploading result', new_result)
  239. if __name__ == "__main__":
  240. parser = argparse.ArgumentParser()
  241. parser.add_argument("-cpp", "--cpp_input_file",
  242. help="The CPP benchmark result file's name",
  243. default="")
  244. parser.add_argument("-java", "--java_input_file",
  245. help="The Java benchmark result file's name",
  246. default="")
  247. parser.add_argument("-python", "--python_input_file",
  248. help="The Python benchmark result file's name",
  249. default="")
  250. parser.add_argument("-go", "--go_input_file",
  251. help="The golang benchmark result file's name",
  252. default="")
  253. args = parser.parse_args()
  254. parse_cpp_result(args.cpp_input_file)
  255. parse_python_result(args.python_input_file)
  256. parse_java_result(args.java_input_file)
  257. parse_go_result(args.go_input_file)
  258. metadata = get_metadata()
  259. print("uploading cpp results...")
  260. upload_result(cpp_result, metadata)
  261. print("uploading java results...")
  262. upload_result(java_result, metadata)
  263. print("uploading python results...")
  264. upload_result(python_result, metadata)
  265. print("uploading go results...")
  266. upload_result(go_result, metadata)