|  | @@ -33,11 +33,17 @@ import subprocess
 | 
	
		
			
				|  |  |  import sys
 | 
	
		
			
				|  |  |  import time
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +stress_test_utils_dir = os.path.abspath(os.path.join(
 | 
	
		
			
				|  |  | +    os.path.dirname(__file__), '../run_tests/stress_test'))
 | 
	
		
			
				|  |  | +sys.path.append(stress_test_utils_dir)
 | 
	
		
			
				|  |  | +from stress_test_utils import BigQueryHelper
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |  import kubernetes_api
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  GRPC_ROOT = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '../..'))
 | 
	
		
			
				|  |  |  os.chdir(GRPC_ROOT)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |  class BigQuerySettings:
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |    def __init__(self, run_id, dataset_id, summary_table_id, qps_table_id):
 | 
	
	
		
			
				|  | @@ -283,27 +289,16 @@ def _launch_client(gcp_project_id, docker_image_name, bq_settings,
 | 
	
		
			
				|  |  |    return True
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -def _launch_server_and_client(gcp_project_id, docker_image_name,
 | 
	
		
			
				|  |  | +def _launch_server_and_client(bq_settings, gcp_project_id, docker_image_name,
 | 
	
		
			
				|  |  |                                num_client_instances):
 | 
	
		
			
				|  |  | -  # == Big Query tables related settings (Common for both server and client) ==
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -  # Create a unique id for this run (Note: Using timestamp instead of UUID to
 | 
	
		
			
				|  |  | -  # make it easier to deduce the date/time of the run just by looking at the run
 | 
	
		
			
				|  |  | -  # run id. This is useful in debugging when looking at records in Biq query)
 | 
	
		
			
				|  |  | -  run_id = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -  dataset_id = 'stress_test_%s' % run_id
 | 
	
		
			
				|  |  | -  summary_table_id = 'summary'
 | 
	
		
			
				|  |  | -  qps_table_id = 'qps'
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -  bq_settings = BigQuerySettings(run_id, dataset_id, summary_table_id,
 | 
	
		
			
				|  |  | -                                 qps_table_id)
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  |    # Start kubernetes proxy
 | 
	
		
			
				|  |  |    kubernetes_api_port = 9001
 | 
	
		
			
				|  |  |    kubernetes_proxy = KubernetesProxy(kubernetes_api_port)
 | 
	
		
			
				|  |  |    kubernetes_proxy.start()
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +  # num of seconds to wait for the GKE image to start and warmup
 | 
	
		
			
				|  |  | +  image_warmp_secs = 60
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |    server_pod_name = 'stress-server'
 | 
	
		
			
				|  |  |    server_port = 8080
 | 
	
		
			
				|  |  |    is_success = _launch_server(gcp_project_id, docker_image_name, bq_settings,
 | 
	
	
		
			
				|  | @@ -315,7 +310,8 @@ def _launch_server_and_client(gcp_project_id, docker_image_name,
 | 
	
		
			
				|  |  |    # Server takes a while to start.
 | 
	
		
			
				|  |  |    # TODO(sree) Use Kubernetes API to query the status of the server instead of
 | 
	
		
			
				|  |  |    # sleeping
 | 
	
		
			
				|  |  | -  time.sleep(60)
 | 
	
		
			
				|  |  | +  print 'Waiting for %s seconds for the server to start...' % image_warmp_secs
 | 
	
		
			
				|  |  | +  time.sleep(image_warmp_secs)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |    # Launch client
 | 
	
		
			
				|  |  |    server_address = '%s.default.svc.cluster.local:%d' % (server_pod_name,
 | 
	
	
		
			
				|  | @@ -329,6 +325,8 @@ def _launch_server_and_client(gcp_project_id, docker_image_name,
 | 
	
		
			
				|  |  |      print 'Error in launching client(s)'
 | 
	
		
			
				|  |  |      return False
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +  print 'Waiting for %s seconds for the client images to start...' % image_warmp_secs
 | 
	
		
			
				|  |  | +  time.sleep(image_warmp_secs)
 | 
	
		
			
				|  |  |    return True
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -359,31 +357,68 @@ def _build_and_push_docker_image(gcp_project_id, docker_image_name, tag_name):
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  # TODO(sree): This is just to test the above APIs. Rewrite this to make
 | 
	
		
			
				|  |  |  # everything configurable (like image names / number of instances etc)
 | 
	
		
			
				|  |  | -def test_run():
 | 
	
		
			
				|  |  | -  image_name = 'grpc_stress_test'
 | 
	
		
			
				|  |  | -  gcp_project_id = 'sree-gce'
 | 
	
		
			
				|  |  | -  tag_name = 'gcr.io/%s/%s' % (gcp_project_id, image_name)
 | 
	
		
			
				|  |  | -  num_client_instances = 3
 | 
	
		
			
				|  |  | +def run_test(skip_building_image, gcp_project_id, image_name, tag_name,
 | 
	
		
			
				|  |  | +             num_client_instances, poll_interval_secs, total_duration_secs):
 | 
	
		
			
				|  |  | +  if not skip_building_image:
 | 
	
		
			
				|  |  | +    is_success = _build_docker_image(image_name, tag_name)
 | 
	
		
			
				|  |  | +    if not is_success:
 | 
	
		
			
				|  |  | +      return False
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -  is_success = _build_docker_image(image_name, tag_name)
 | 
	
		
			
				|  |  | -  if not is_success:
 | 
	
		
			
				|  |  | -    return
 | 
	
		
			
				|  |  | +    is_success = _push_docker_image_to_gke_registry(tag_name)
 | 
	
		
			
				|  |  | +    if not is_success:
 | 
	
		
			
				|  |  | +      return False
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -  is_success = _push_docker_image_to_gke_registry(tag_name)
 | 
	
		
			
				|  |  | -  if not is_success:
 | 
	
		
			
				|  |  | -    return
 | 
	
		
			
				|  |  | +  # == Big Query tables related settings (Common for both server and client) ==
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +  # Create a unique id for this run (Note: Using timestamp instead of UUID to
 | 
	
		
			
				|  |  | +  # make it easier to deduce the date/time of the run just by looking at the run
 | 
	
		
			
				|  |  | +  # run id. This is useful in debugging when looking at records in Biq query)
 | 
	
		
			
				|  |  | +  run_id = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
 | 
	
		
			
				|  |  | +  dataset_id = 'stress_test_%s' % run_id
 | 
	
		
			
				|  |  | +  summary_table_id = 'summary'
 | 
	
		
			
				|  |  | +  qps_table_id = 'qps'
 | 
	
		
			
				|  |  | +  bq_settings = BigQuerySettings(run_id, dataset_id, summary_table_id,
 | 
	
		
			
				|  |  | +                                 qps_table_id)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -  is_success = _launch_server_and_client(gcp_project_id, tag_name,
 | 
	
		
			
				|  |  | +  bq_helper = BigQueryHelper(run_id, '', '', gcp_project_id, dataset_id,
 | 
	
		
			
				|  |  | +                             summary_table_id, qps_table_id)
 | 
	
		
			
				|  |  | +  bq_helper.initialize()
 | 
	
		
			
				|  |  | +  is_success = _launch_server_and_client(bq_settings, gcp_project_id, tag_name,
 | 
	
		
			
				|  |  |                                           num_client_instances)
 | 
	
		
			
				|  |  | +  if not is_success:
 | 
	
		
			
				|  |  | +    return False
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +  start_time = datetime.datetime.now()
 | 
	
		
			
				|  |  | +  end_time = start_time + datetime.timedelta(seconds=total_duration_secs)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -  # Run the test for 2 mins
 | 
	
		
			
				|  |  | -  time.sleep(120)
 | 
	
		
			
				|  |  | +  while True:
 | 
	
		
			
				|  |  | +    if datetime.datetime.now() > end_time:
 | 
	
		
			
				|  |  | +      print 'Test was run for %d seconds' % total_duration_secs
 | 
	
		
			
				|  |  | +      break
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -  is_success = _delete_server_and_client(num_client_instances)
 | 
	
		
			
				|  |  | +    # Check if either stress server or clients have failed
 | 
	
		
			
				|  |  | +    if not bq_helper.check_if_any_tests_failed():
 | 
	
		
			
				|  |  | +      is_success = False
 | 
	
		
			
				|  |  | +      print 'Some tests failed.'
 | 
	
		
			
				|  |  | +      break
 | 
	
		
			
				|  |  | +    # Things seem to be running fine. Wait until next poll time to check the
 | 
	
		
			
				|  |  | +    # status
 | 
	
		
			
				|  |  | +    time.sleep(poll_interval_secs)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -  if not is_success:
 | 
	
		
			
				|  |  | -    return
 | 
	
		
			
				|  |  | +  # Print BiqQuery tables
 | 
	
		
			
				|  |  | +  bq_helper.print_summary_records()
 | 
	
		
			
				|  |  | +  bq_helper.print_qps_records()
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +  _delete_server_and_client(num_client_instances)
 | 
	
		
			
				|  |  | +  return is_success
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  if __name__ == '__main__':
 | 
	
		
			
				|  |  | -  test_run()
 | 
	
		
			
				|  |  | +  image_name = 'grpc_stress_test'
 | 
	
		
			
				|  |  | +  gcp_project_id = 'sree-gce'
 | 
	
		
			
				|  |  | +  tag_name = 'gcr.io/%s/%s' % (gcp_project_id, image_name)
 | 
	
		
			
				|  |  | +  num_client_instances = 3
 | 
	
		
			
				|  |  | +  poll_interval_secs = 5,
 | 
	
		
			
				|  |  | +  test_duration_secs = 150
 | 
	
		
			
				|  |  | +  run_test(True, gcp_project_id, image_name, tag_name, num_client_instances,
 | 
	
		
			
				|  |  | +           poll_interval_secs, test_duration_secs)
 |