Browse Source

Merge pull request #25264 from menghanl/v1.35.x

Menghan Li 4 years ago
parent
commit
d38a877d49

+ 36 - 0
doc/xds-test-descriptions.md

@@ -463,6 +463,42 @@ Test driver asserts:
 1.  All backends in the primary locality receive at least 1 RPC.
 1.  No backends in the secondary locality receive RPCs.
 
+
+### load_based_failover
+
+This test verifies that traffic is partially diverted to a secondary locality
+when the QPS is greater than the configured RPS in the priority locality.
+
+Client parameters:
+
+1.  --num_channels=1
+1.  --qps=100
+
+Load balancer configuration:
+
+1.  The primary MIG with 2 backends in the same zone as the client
+1.  The secondary MIG with 2 backends in a different zone
+
+Test driver asserts:
+
+1.  All backends in the primary locality receive at least 1 RPC.
+1.  No backends in the secondary locality receive RPCs.
+
+The test driver sets `balancingMode` is `RATE`, and `maxRate` to 20 in the primary locality.
+
+Test driver asserts:
+
+1.  All backends in the primary locality receive at least 1 RPC.
+1.  All backends in the secondary locality receive at least 1 RPC.
+
+The test driver set `maxRate` to 120 in the primary locality.
+
+Test driver asserts:
+
+1.  All backends in the primary locality receive at least 1 RPC.
+1.  No backends in the secondary locality receive RPCs.
+
+
 ### circuit_breaking
 
 This test verifies that the maximum number of outstanding requests is limited

+ 1 - 1
tools/dockerfile/distribtest/python_dev_centos7_x64/Dockerfile

@@ -18,7 +18,7 @@ RUN yum install -y python
 RUN yum install -y python-devel
 RUN yum install -y epel-release
 RUN yum install -y python-pip
-RUN pip install --upgrade pip
+RUN pip install --upgrade pip==19.3.1
 RUN pip install -U virtualenv
 
 # The default gcc of CentOS 7 is gcc 4.8 which is older than gcc 4.9,

+ 1 - 1
tools/internal_ci/linux/grpc_xds.cfg

@@ -16,7 +16,7 @@
 
 # Location of the continuous shell script in repository.
 build_file: "grpc/tools/internal_ci/linux/grpc_xds.sh"
-timeout_mins: 120
+timeout_mins: 180
 env_vars {
   key: "BAZEL_SCRIPT"
   value: "tools/internal_ci/linux/grpc_xds_bazel_test_in_docker.sh"

+ 1 - 1
tools/internal_ci/linux/grpc_xds_bazel_python_test_in_docker.sh

@@ -25,7 +25,7 @@ cd /var/local/git/grpc
 VIRTUAL_ENV=$(mktemp -d)
 virtualenv "$VIRTUAL_ENV"
 PYTHON="$VIRTUAL_ENV"/bin/python
-"$PYTHON" -m pip install --upgrade pip
+"$PYTHON" -m pip install --upgrade pip==19.3.1
 "$PYTHON" -m pip install --upgrade grpcio-tools google-api-python-client google-auth-httplib2 oauth2client
 
 # Prepare generated Python code.

+ 1 - 1
tools/internal_ci/linux/grpc_xds_bazel_test_in_docker.sh

@@ -25,7 +25,7 @@ cd /var/local/git/grpc
 VIRTUAL_ENV=$(mktemp -d)
 virtualenv "$VIRTUAL_ENV"
 PYTHON="$VIRTUAL_ENV"/bin/python
-"$PYTHON" -m pip install --upgrade pip
+"$PYTHON" -m pip install --upgrade pip==19.3.1
 "$PYTHON" -m pip install --upgrade grpcio grpcio-tools google-api-python-client google-auth-httplib2 oauth2client
 
 # Prepare generated Python code.

+ 1 - 1
tools/internal_ci/linux/grpc_xds_csharp.cfg

@@ -16,7 +16,7 @@
 
 # Location of the continuous shell script in repository.
 build_file: "grpc/tools/internal_ci/linux/grpc_xds_csharp.sh"
-timeout_mins: 120
+timeout_mins: 180
 action {
   define_artifacts {
     regex: "**/*sponge_log.*"

+ 1 - 1
tools/internal_ci/linux/grpc_xds_csharp_test_in_docker.sh

@@ -25,7 +25,7 @@ cd /var/local/git/grpc
 VIRTUAL_ENV=$(mktemp -d)
 virtualenv "$VIRTUAL_ENV"
 PYTHON="$VIRTUAL_ENV"/bin/python
-"$PYTHON" -m pip install --upgrade pip
+"$PYTHON" -m pip install --upgrade pip==19.3.1
 "$PYTHON" -m pip install --upgrade grpcio grpcio-tools google-api-python-client google-auth-httplib2 oauth2client
 
 # Prepare generated Python code.

+ 1 - 1
tools/internal_ci/linux/grpc_xds_php_test_in_docker.sh

@@ -25,7 +25,7 @@ cd /var/local/git/grpc
 VIRTUAL_ENV=$(mktemp -d)
 virtualenv "$VIRTUAL_ENV"
 PYTHON="$VIRTUAL_ENV"/bin/python
-"$PYTHON" -m pip install --upgrade pip
+"$PYTHON" -m pip install --upgrade pip==19.3.1
 "$PYTHON" -m pip install --upgrade grpcio-tools google-api-python-client google-auth-httplib2 oauth2client
 
 # Prepare generated Python code.

+ 1 - 1
tools/internal_ci/linux/grpc_xds_python.cfg

@@ -16,7 +16,7 @@
 
 # Location of the continuous shell script in repository.
 build_file: "grpc/tools/internal_ci/linux/grpc_xds.sh"
-timeout_mins: 120
+timeout_mins: 180
 env_vars {
   key: "BAZEL_SCRIPT"
   value: "tools/internal_ci/linux/grpc_xds_bazel_python_test_in_docker.sh"

+ 1 - 1
tools/internal_ci/linux/grpc_xds_ruby_test_in_docker.sh

@@ -25,7 +25,7 @@ cd /var/local/git/grpc
 VIRTUAL_ENV=$(mktemp -d)
 virtualenv "$VIRTUAL_ENV"
 PYTHON="$VIRTUAL_ENV"/bin/python
-"$PYTHON" -m pip install --upgrade pip
+"$PYTHON" -m pip install --upgrade pip==19.3.1
 "$PYTHON" -m pip install --upgrade grpcio-tools google-api-python-client google-auth-httplib2 oauth2client
 
 # Prepare generated Python code.

+ 1 - 1
tools/run_tests/artifacts/build_package_python.sh

@@ -24,7 +24,7 @@ mkdir -p artifacts/
 cp -r "${EXTERNAL_GIT_ROOT}"/input_artifacts/python_*/* artifacts/ || true
 
 apt-get install -y python-pip
-python -m pip install -U pip
+python -m pip install -U pip==19.3.1
 python -m pip install -U wheel
 
 strip_binary_wheel() {

+ 1 - 1
tools/run_tests/helper_scripts/build_python.sh

@@ -153,7 +153,7 @@ fi
 # See https://github.com/grpc/grpc/issues/14815 for more context. We cannot rely
 # on pip to upgrade itself because if pip is too old, it may not have the required
 # TLS version to run `pip install`.
-curl https://bootstrap.pypa.io/get-pip.py | $VENV_PYTHON
+curl https://bootstrap.pypa.io/2.7/get-pip.py | $VENV_PYTHON
 
 # pip-installs the directory specified. Used because on MSYS the vanilla Windows
 # Python gets confused when parsing paths.

+ 172 - 10
tools/run_tests/run_xds_tests.py

@@ -51,6 +51,7 @@ _TEST_CASES = [
     'backends_restart',
     'change_backend_service',
     'gentle_failover',
+    'load_report_based_failover',
     'ping_pong',
     'remove_instance_group',
     'round_robin',
@@ -279,6 +280,9 @@ _TESTS_TO_SEND_METADATA = ['header_matching']
 _TEST_METADATA_KEY = 'xds_md'
 _TEST_METADATA_VALUE_UNARY = 'unary_yranu'
 _TEST_METADATA_VALUE_EMPTY = 'empty_ytpme'
+# Extra RPC metadata whose value is a number, sent with UnaryCall only.
+_TEST_METADATA_NUMERIC_KEY = 'xds_md_numeric'
+_TEST_METADATA_NUMERIC_VALUE = '159'
 _PATH_MATCHER_NAME = 'path-matcher'
 _BASE_TEMPLATE_NAME = 'test-template'
 _BASE_INSTANCE_GROUP_NAME = 'test-ig'
@@ -619,6 +623,56 @@ def test_gentle_failover(gcp,
                                                  _WAIT_FOR_BACKEND_SEC)
 
 
+def test_load_report_based_failover(gcp, backend_service,
+                                    primary_instance_group,
+                                    secondary_instance_group):
+    logger.info('Running test_load_report_based_failover')
+    try:
+        patch_backend_service(
+            gcp, backend_service,
+            [primary_instance_group, secondary_instance_group])
+        primary_instance_names = get_instance_names(gcp, primary_instance_group)
+        secondary_instance_names = get_instance_names(gcp,
+                                                      secondary_instance_group)
+        wait_for_healthy_backends(gcp, backend_service, primary_instance_group)
+        wait_for_healthy_backends(gcp, backend_service,
+                                  secondary_instance_group)
+        wait_until_all_rpcs_go_to_given_backends(primary_instance_names,
+                                                 _WAIT_FOR_STATS_SEC)
+        # Set primary locality's balance mode to RATE, and RPS to 20% of the
+        # client's QPS. The secondary locality will be used.
+        max_rate = int(args.qps * 1 / 5)
+        logger.info('Patching backend service to RATE with %d max_rate',
+                    max_rate)
+        patch_backend_service(
+            gcp,
+            backend_service, [primary_instance_group, secondary_instance_group],
+            balancing_mode='RATE',
+            max_rate=max_rate)
+        wait_until_all_rpcs_go_to_given_backends(
+            primary_instance_names + secondary_instance_names,
+            _WAIT_FOR_BACKEND_SEC)
+
+        # Set primary locality's balance mode to RATE, and RPS to 120% of the
+        # client's QPS. Only the primary locality will be used.
+        max_rate = int(args.qps * 6 / 5)
+        logger.info('Patching backend service to RATE with %d max_rate',
+                    max_rate)
+        patch_backend_service(
+            gcp,
+            backend_service, [primary_instance_group, secondary_instance_group],
+            balancing_mode='RATE',
+            max_rate=max_rate)
+        wait_until_all_rpcs_go_to_given_backends(primary_instance_names,
+                                                 _WAIT_FOR_BACKEND_SEC)
+        logger.info("success")
+    finally:
+        patch_backend_service(gcp, backend_service, [primary_instance_group])
+        instance_names = get_instance_names(gcp, primary_instance_group)
+        wait_until_all_rpcs_go_to_given_backends(instance_names,
+                                                 _WAIT_FOR_BACKEND_SEC)
+
+
 def test_ping_pong(gcp, backend_service, instance_group):
     logger.info('Running test_ping_pong')
     wait_for_healthy_backends(gcp, backend_service, instance_group)
@@ -974,7 +1028,36 @@ def test_path_matching(gcp, original_backend_service, instance_group,
                 {
                     "UnaryCall": original_backend_instances,
                     "EmptyCall": alternate_backend_instances
-                })
+                }),
+            (
+                [{
+                    'priority': 0,
+                    # Regex UnaryCall -> alternate_backend_service.
+                    'matchRules': [{
+                        'regexMatch':
+                            '^\/.*\/UnaryCall$'  # Unary methods with any services.
+                    }],
+                    'service': alternate_backend_service.url
+                }],
+                {
+                    "UnaryCall": alternate_backend_instances,
+                    "EmptyCall": original_backend_instances
+                }),
+            (
+                [{
+                    'priority': 0,
+                    # ignoreCase EmptyCall -> alternate_backend_service.
+                    'matchRules': [{
+                        # Case insensitive matching.
+                        'fullPathMatch': '/gRpC.tEsTinG.tEstseRvice/empTycaLl',
+                        'ignoreCase': True,
+                    }],
+                    'service': alternate_backend_service.url
+                }],
+                {
+                    "UnaryCall": original_backend_instances,
+                    "EmptyCall": alternate_backend_instances
+                }),
         ]
 
         for (route_rules, expected_instances) in test_cases:
@@ -991,8 +1074,8 @@ def test_path_matching(gcp, original_backend_service, instance_group,
                 original_backend_instances + alternate_backend_instances,
                 _WAIT_FOR_STATS_SEC)
 
-            retry_count = 20
-            # Each attempt takes about 10 seconds, 20 retries is equivalent to 200
+            retry_count = 80
+            # Each attempt takes about 5 seconds, 80 retries is equivalent to 400
             # seconds timeout.
             for i in range(retry_count):
                 stats = get_client_stats(_NUM_TEST_RPCS, _WAIT_FOR_STATS_SEC)
@@ -1004,6 +1087,10 @@ def test_path_matching(gcp, original_backend_service, instance_group,
                 if compare_expected_instances(stats, expected_instances):
                     logger.info("success")
                     break
+                elif i == retry_count - 1:
+                    raise Exception(
+                        'timeout waiting for RPCs to the expected instances: %s'
+                        % expected_instances)
     finally:
         patch_url_map_backend_service(gcp, original_backend_service)
         patch_backend_service(gcp, alternate_backend_service, [])
@@ -1084,26 +1171,90 @@ def test_header_matching(gcp, original_backend_service, instance_group,
                     "EmptyCall": alternate_backend_instances,
                     "UnaryCall": original_backend_instances
                 }),
+            (
+                [{
+                    'priority': 0,
+                    # Header 'xds_md_numeric' present -> alternate_backend_service.
+                    # UnaryCall is sent with the metadata, so will be sent to alternative.
+                    'matchRules': [{
+                        'prefixMatch':
+                            '/',
+                        'headerMatches': [{
+                            'headerName': _TEST_METADATA_NUMERIC_KEY,
+                            'presentMatch': True
+                        }]
+                    }],
+                    'service': alternate_backend_service.url
+                }],
+                {
+                    "EmptyCall": original_backend_instances,
+                    "UnaryCall": alternate_backend_instances
+                }),
             (
                 [{
                     'priority': 0,
                     # Header invert ExactMatch -> alternate_backend_service.
-                    # EmptyCall is sent with the metadata, so will be sent to original.
+                    # UnaryCall is sent with the metadata, so will be sent to
+                    # original. EmptyCall will be sent to alternative.
                     'matchRules': [{
                         'prefixMatch':
                             '/',
                         'headerMatches': [{
                             'headerName': _TEST_METADATA_KEY,
-                            'exactMatch': _TEST_METADATA_VALUE_EMPTY,
+                            'exactMatch': _TEST_METADATA_VALUE_UNARY,
                             'invertMatch': True
                         }]
                     }],
                     'service': alternate_backend_service.url
                 }],
+                {
+                    "EmptyCall": alternate_backend_instances,
+                    "UnaryCall": original_backend_instances
+                }),
+            (
+                [{
+                    'priority': 0,
+                    # Header 'xds_md_numeric' range [100,200] -> alternate_backend_service.
+                    # UnaryCall is sent with the metadata in range.
+                    'matchRules': [{
+                        'prefixMatch':
+                            '/',
+                        'headerMatches': [{
+                            'headerName': _TEST_METADATA_NUMERIC_KEY,
+                            'rangeMatch': {
+                                'rangeStart': '100',
+                                'rangeEnd': '200'
+                            }
+                        }]
+                    }],
+                    'service': alternate_backend_service.url
+                }],
                 {
                     "EmptyCall": original_backend_instances,
                     "UnaryCall": alternate_backend_instances
                 }),
+            (
+                [{
+                    'priority': 0,
+                    # Header RegexMatch -> alternate_backend_service.
+                    # EmptyCall is sent with the metadata.
+                    'matchRules': [{
+                        'prefixMatch':
+                            '/',
+                        'headerMatches': [{
+                            'headerName':
+                                _TEST_METADATA_KEY,
+                            'regexMatch':
+                                "^%s.*%s$" % (_TEST_METADATA_VALUE_EMPTY[:2],
+                                              _TEST_METADATA_VALUE_EMPTY[-2:])
+                        }]
+                    }],
+                    'service': alternate_backend_service.url
+                }],
+                {
+                    "EmptyCall": alternate_backend_instances,
+                    "UnaryCall": original_backend_instances
+                }),
         ]
 
         for (route_rules, expected_instances) in test_cases:
@@ -1121,8 +1272,8 @@ def test_header_matching(gcp, original_backend_service, instance_group,
                 original_backend_instances + alternate_backend_instances,
                 _WAIT_FOR_STATS_SEC)
 
-            retry_count = 20
-            # Each attempt takes about 10 seconds, 10 retries is equivalent to 100
+            retry_count = 80
+            # Each attempt takes about 5 seconds, 80 retries is equivalent to 400
             # seconds timeout.
             for i in range(retry_count):
                 stats = get_client_stats(_NUM_TEST_RPCS, _WAIT_FOR_STATS_SEC)
@@ -1134,6 +1285,10 @@ def test_header_matching(gcp, original_backend_service, instance_group,
                 if compare_expected_instances(stats, expected_instances):
                     logger.info("success")
                     break
+                elif i == retry_count - 1:
+                    raise Exception(
+                        'timeout waiting for RPCs to the expected instances: %s'
+                        % expected_instances)
     finally:
         patch_url_map_backend_service(gcp, original_backend_service)
         patch_backend_service(gcp, alternate_backend_service, [])
@@ -1757,6 +1912,7 @@ def patch_backend_service(gcp,
                           backend_service,
                           instance_groups,
                           balancing_mode='UTILIZATION',
+                          max_rate=1,
                           circuit_breakers=None):
     if gcp.alpha_compute:
         compute_to_use = gcp.alpha_compute
@@ -1766,7 +1922,7 @@ def patch_backend_service(gcp,
         'backends': [{
             'group': instance_group.url,
             'balancingMode': balancing_mode,
-            'maxRate': 1 if balancing_mode == 'RATE' else None
+            'maxRate': max_rate if balancing_mode == 'RATE' else None
         } for instance_group in instance_groups],
         'circuitBreakers': circuit_breakers,
     }
@@ -2132,11 +2288,13 @@ try:
                 rpcs_to_send = '--rpc="UnaryCall"'
 
             if test_case in _TESTS_TO_SEND_METADATA:
-                metadata_to_send = '--metadata="EmptyCall:{keyE}:{valueE},UnaryCall:{keyU}:{valueU}"'.format(
+                metadata_to_send = '--metadata="EmptyCall:{keyE}:{valueE},UnaryCall:{keyU}:{valueU},UnaryCall:{keyNU}:{valueNU}"'.format(
                     keyE=_TEST_METADATA_KEY,
                     valueE=_TEST_METADATA_VALUE_EMPTY,
                     keyU=_TEST_METADATA_KEY,
-                    valueU=_TEST_METADATA_VALUE_UNARY)
+                    valueU=_TEST_METADATA_VALUE_UNARY,
+                    keyNU=_TEST_METADATA_NUMERIC_KEY,
+                    valueNU=_TEST_METADATA_NUMERIC_VALUE)
             else:
                 # Setting the arg explicitly to empty with '--metadata=""'
                 # makes C# client fail
@@ -2185,6 +2343,10 @@ try:
                 elif test_case == 'gentle_failover':
                     test_gentle_failover(gcp, backend_service, instance_group,
                                          secondary_zone_instance_group)
+                elif test_case == 'load_report_based_failover':
+                    test_load_report_based_failover(
+                        gcp, backend_service, instance_group,
+                        secondary_zone_instance_group)
                 elif test_case == 'ping_pong':
                     test_ping_pong(gcp, backend_service, instance_group)
                 elif test_case == 'remove_instance_group':