Преглед на файлове

Merge pull request #25267 from menghanl/v1.32.x

Doug Fawley преди 4 години
родител
ревизия
c4095af025

+ 33 - 0
doc/xds-test-descriptions.md

@@ -321,3 +321,36 @@ Assert:
 1. Once all backends receive at least one RPC, the following 1000 RPCs are
 distributed across the 2 backends as a: 20, b: 80.
 
+### load_based_failover
+
+This test verifies that traffic is partially diverted to a secondary locality
+when the QPS is greater than the configured RPS in the priority locality.
+
+Client parameters:
+
+1.  --num_channels=1
+1.  --qps=100
+
+Load balancer configuration:
+
+1.  The primary MIG with 2 backends in the same zone as the client
+1.  The secondary MIG with 2 backends in a different zone
+
+Test driver asserts:
+
+1.  All backends in the primary locality receive at least 1 RPC.
+1.  No backends in the secondary locality receive RPCs.
+
+The test driver sets `balancingMode` is `RATE`, and `maxRate` to 20 in the primary locality.
+
+Test driver asserts:
+
+1.  All backends in the primary locality receive at least 1 RPC.
+1.  All backends in the secondary locality receive at least 1 RPC.
+
+The test driver set `maxRate` to 120 in the primary locality.
+
+Test driver asserts:
+
+1.  All backends in the primary locality receive at least 1 RPC.
+1.  No backends in the secondary locality receive RPCs.

+ 1 - 1
tools/dockerfile/distribtest/python_dev_centos7_x64/Dockerfile

@@ -18,7 +18,7 @@ RUN yum install -y python
 RUN yum install -y python-devel
 RUN yum install -y epel-release
 RUN yum install -y python-pip
-RUN pip install --upgrade pip
+RUN pip install --upgrade pip==19.3.1
 RUN pip install -U virtualenv
 
 # The default gcc of CentOS 7 is gcc 4.8 which is older than gcc 4.9,

+ 1 - 1
tools/internal_ci/linux/grpc_xds.cfg

@@ -16,7 +16,7 @@
 
 # Location of the continuous shell script in repository.
 build_file: "grpc/tools/internal_ci/linux/grpc_xds.sh"
-timeout_mins: 120
+timeout_mins: 180
 env_vars {
   key: "BAZEL_SCRIPT"
   value: "tools/internal_ci/linux/grpc_xds_bazel_test_in_docker.sh"

+ 1 - 1
tools/internal_ci/linux/grpc_xds_bazel_python_test_in_docker.sh

@@ -25,7 +25,7 @@ cd /var/local/git/grpc
 VIRTUAL_ENV=$(mktemp -d)
 virtualenv "$VIRTUAL_ENV"
 PYTHON="$VIRTUAL_ENV"/bin/python
-"$PYTHON" -m pip install --upgrade pip
+"$PYTHON" -m pip install --upgrade pip==19.3.1
 "$PYTHON" -m pip install --upgrade grpcio-tools google-api-python-client google-auth-httplib2 oauth2client
 
 # Prepare generated Python code.

+ 1 - 1
tools/internal_ci/linux/grpc_xds_bazel_test_in_docker.sh

@@ -25,7 +25,7 @@ cd /var/local/git/grpc
 VIRTUAL_ENV=$(mktemp -d)
 virtualenv "$VIRTUAL_ENV"
 PYTHON="$VIRTUAL_ENV"/bin/python
-"$PYTHON" -m pip install --upgrade pip
+"$PYTHON" -m pip install --upgrade pip==19.3.1
 "$PYTHON" -m pip install --upgrade grpcio grpcio-tools google-api-python-client google-auth-httplib2 oauth2client
 
 # Prepare generated Python code.

+ 1 - 1
tools/internal_ci/linux/grpc_xds_csharp.cfg

@@ -16,7 +16,7 @@
 
 # Location of the continuous shell script in repository.
 build_file: "grpc/tools/internal_ci/linux/grpc_xds_csharp.sh"
-timeout_mins: 120
+timeout_mins: 180
 action {
   define_artifacts {
     regex: "**/*sponge_log.*"

+ 1 - 1
tools/internal_ci/linux/grpc_xds_csharp_test_in_docker.sh

@@ -25,7 +25,7 @@ cd /var/local/git/grpc
 VIRTUAL_ENV=$(mktemp -d)
 virtualenv "$VIRTUAL_ENV"
 PYTHON="$VIRTUAL_ENV"/bin/python
-"$PYTHON" -m pip install --upgrade pip
+"$PYTHON" -m pip install --upgrade pip==19.3.1
 "$PYTHON" -m pip install --upgrade grpcio grpcio-tools google-api-python-client google-auth-httplib2 oauth2client
 
 # Prepare generated Python code.

+ 1 - 1
tools/internal_ci/linux/grpc_xds_php_test_in_docker.sh

@@ -25,7 +25,7 @@ cd /var/local/git/grpc
 VIRTUAL_ENV=$(mktemp -d)
 virtualenv "$VIRTUAL_ENV"
 PYTHON="$VIRTUAL_ENV"/bin/python
-"$PYTHON" -m pip install --upgrade pip
+"$PYTHON" -m pip install --upgrade pip==19.3.1
 "$PYTHON" -m pip install --upgrade grpcio-tools google-api-python-client google-auth-httplib2 oauth2client
 
 # Prepare generated Python code.

+ 1 - 1
tools/internal_ci/linux/grpc_xds_python.cfg

@@ -16,7 +16,7 @@
 
 # Location of the continuous shell script in repository.
 build_file: "grpc/tools/internal_ci/linux/grpc_xds.sh"
-timeout_mins: 120
+timeout_mins: 180
 env_vars {
   key: "BAZEL_SCRIPT"
   value: "tools/internal_ci/linux/grpc_xds_bazel_python_test_in_docker.sh"

+ 1 - 1
tools/internal_ci/linux/grpc_xds_ruby_test_in_docker.sh

@@ -25,7 +25,7 @@ cd /var/local/git/grpc
 VIRTUAL_ENV=$(mktemp -d)
 virtualenv "$VIRTUAL_ENV"
 PYTHON="$VIRTUAL_ENV"/bin/python
-"$PYTHON" -m pip install --upgrade pip
+"$PYTHON" -m pip install --upgrade pip==19.3.1
 "$PYTHON" -m pip install --upgrade grpcio-tools google-api-python-client google-auth-httplib2 oauth2client
 
 # Prepare generated Python code.

+ 1 - 1
tools/run_tests/artifacts/build_package_python.sh

@@ -24,7 +24,7 @@ mkdir -p artifacts/
 cp -r "${EXTERNAL_GIT_ROOT}"/input_artifacts/python_*/* artifacts/ || true
 
 apt-get install -y python-pip
-python -m pip install -U pip
+python -m pip install -U pip==19.3.1
 python -m pip install -U wheel
 
 strip_binary_wheel() {

+ 1 - 1
tools/run_tests/helper_scripts/build_python.sh

@@ -153,7 +153,7 @@ fi
 # See https://github.com/grpc/grpc/issues/14815 for more context. We cannot rely
 # on pip to upgrade itself because if pip is too old, it may not have the required
 # TLS version to run `pip install`.
-curl https://bootstrap.pypa.io/get-pip.py | $VENV_PYTHON
+curl https://bootstrap.pypa.io/2.7/get-pip.py | $VENV_PYTHON
 
 # pip-installs the directory specified. Used because on MSYS the vanilla Windows
 # Python gets confused when parsing paths.

+ 239 - 28
tools/run_tests/run_xds_tests.py

@@ -51,6 +51,7 @@ _TEST_CASES = [
     'backends_restart',
     'change_backend_service',
     'gentle_failover',
+    'load_report_based_failover',
     'ping_pong',
     'remove_instance_group',
     'round_robin',
@@ -263,7 +264,11 @@ _TESTS_TO_RUN_MULTIPLE_RPCS = ['path_matching', 'header_matching']
 # Tests that make UnaryCall with test metadata.
 _TESTS_TO_SEND_METADATA = ['header_matching']
 _TEST_METADATA_KEY = 'xds_md'
-_TEST_METADATA_VALUE = 'exact_match'
+_TEST_METADATA_VALUE_UNARY = 'unary_yranu'
+_TEST_METADATA_VALUE_EMPTY = 'empty_ytpme'
+# Extra RPC metadata whose value is a number, sent with UnaryCall only.
+_TEST_METADATA_NUMERIC_KEY = 'xds_md_numeric'
+_TEST_METADATA_NUMERIC_VALUE = '159'
 _PATH_MATCHER_NAME = 'path-matcher'
 _BASE_TEMPLATE_NAME = 'test-template'
 _BASE_INSTANCE_GROUP_NAME = 'test-ig'
@@ -496,6 +501,56 @@ def test_gentle_failover(gcp,
                                                  _WAIT_FOR_BACKEND_SEC)
 
 
+def test_load_report_based_failover(gcp, backend_service,
+                                    primary_instance_group,
+                                    secondary_instance_group):
+    logger.info('Running test_load_report_based_failover')
+    try:
+        patch_backend_instances(
+            gcp, backend_service,
+            [primary_instance_group, secondary_instance_group])
+        primary_instance_names = get_instance_names(gcp, primary_instance_group)
+        secondary_instance_names = get_instance_names(gcp,
+                                                      secondary_instance_group)
+        wait_for_healthy_backends(gcp, backend_service, primary_instance_group)
+        wait_for_healthy_backends(gcp, backend_service,
+                                  secondary_instance_group)
+        wait_until_all_rpcs_go_to_given_backends(primary_instance_names,
+                                                 _WAIT_FOR_STATS_SEC)
+        # Set primary locality's balance mode to RATE, and RPS to 20% of the
+        # client's QPS. The secondary locality will be used.
+        max_rate = int(args.qps * 1 / 5)
+        logger.info('Patching backend service to RATE with %d max_rate',
+                    max_rate)
+        patch_backend_instances(
+            gcp,
+            backend_service, [primary_instance_group, secondary_instance_group],
+            balancing_mode='RATE',
+            max_rate=max_rate)
+        wait_until_all_rpcs_go_to_given_backends(
+            primary_instance_names + secondary_instance_names,
+            _WAIT_FOR_BACKEND_SEC)
+
+        # Set primary locality's balance mode to RATE, and RPS to 120% of the
+        # client's QPS. Only the primary locality will be used.
+        max_rate = int(args.qps * 6 / 5)
+        logger.info('Patching backend service to RATE with %d max_rate',
+                    max_rate)
+        patch_backend_instances(
+            gcp,
+            backend_service, [primary_instance_group, secondary_instance_group],
+            balancing_mode='RATE',
+            max_rate=max_rate)
+        wait_until_all_rpcs_go_to_given_backends(primary_instance_names,
+                                                 _WAIT_FOR_BACKEND_SEC)
+        logger.info("success")
+    finally:
+        patch_backend_instances(gcp, backend_service, [primary_instance_group])
+        instance_names = get_instance_names(gcp, primary_instance_group)
+        wait_until_all_rpcs_go_to_given_backends(instance_names,
+                                                 _WAIT_FOR_BACKEND_SEC)
+
+
 def test_ping_pong(gcp, backend_service, instance_group):
     logger.info('Running test_ping_pong')
     wait_for_healthy_backends(gcp, backend_service, instance_group)
@@ -851,7 +906,21 @@ def test_path_matching(gcp, original_backend_service, instance_group,
                 {
                     "UnaryCall": original_backend_instances,
                     "EmptyCall": alternate_backend_instances
-                })
+                }),
+            (
+                [{
+                    'priority': 0,
+                    # Regex UnaryCall -> alternate_backend_service.
+                    'matchRules': [{
+                        'regexMatch':
+                            '^\/.*\/UnaryCall$'  # Unary methods with any services.
+                    }],
+                    'service': alternate_backend_service.url
+                }],
+                {
+                    "UnaryCall": alternate_backend_instances,
+                    "EmptyCall": original_backend_instances
+                }),
         ]
 
         for (route_rules, expected_instances) in test_cases:
@@ -868,8 +937,8 @@ def test_path_matching(gcp, original_backend_service, instance_group,
                 original_backend_instances + alternate_backend_instances,
                 _WAIT_FOR_STATS_SEC)
 
-            retry_count = 20
-            # Each attempt takes about 10 seconds, 20 retries is equivalent to 200
+            retry_count = 80
+            # Each attempt takes about 5 seconds, 80 retries is equivalent to 400
             # seconds timeout.
             for i in range(retry_count):
                 stats = get_client_stats(_NUM_TEST_RPCS, _WAIT_FOR_STATS_SEC)
@@ -881,6 +950,10 @@ def test_path_matching(gcp, original_backend_service, instance_group,
                 if compare_expected_instances(stats, expected_instances):
                     logger.info("success")
                     break
+                elif i == retry_count - 1:
+                    raise Exception(
+                        'timeout waiting for RPCs to the expected instances: %s'
+                        % expected_instances)
     finally:
         patch_url_map_backend_service(gcp, original_backend_service)
         patch_backend_instances(gcp, alternate_backend_service, [])
@@ -903,25 +976,149 @@ def test_header_matching(gcp, original_backend_service, instance_group,
 
     try:
         # A list of tuples (route_rules, expected_instances).
-        test_cases = [(
-            [{
-                'priority': 0,
-                # Header ExactMatch -> alternate_backend_service.
-                # EmptyCall is sent with the metadata.
-                'matchRules': [{
-                    'prefixMatch':
-                        '/',
-                    'headerMatches': [{
-                        'headerName': _TEST_METADATA_KEY,
-                        'exactMatch': _TEST_METADATA_VALUE
-                    }]
+        test_cases = [
+            (
+                [{
+                    'priority': 0,
+                    # Header ExactMatch -> alternate_backend_service.
+                    # EmptyCall is sent with the metadata.
+                    'matchRules': [{
+                        'prefixMatch':
+                            '/',
+                        'headerMatches': [{
+                            'headerName': _TEST_METADATA_KEY,
+                            'exactMatch': _TEST_METADATA_VALUE_EMPTY
+                        }]
+                    }],
+                    'service': alternate_backend_service.url
                 }],
-                'service': alternate_backend_service.url
-            }],
-            {
-                "EmptyCall": alternate_backend_instances,
-                "UnaryCall": original_backend_instances
-            })]
+                {
+                    "EmptyCall": alternate_backend_instances,
+                    "UnaryCall": original_backend_instances
+                }),
+            (
+                [{
+                    'priority': 0,
+                    # Header PrefixMatch -> alternate_backend_service.
+                    # UnaryCall is sent with the metadata.
+                    'matchRules': [{
+                        'prefixMatch':
+                            '/',
+                        'headerMatches': [{
+                            'headerName': _TEST_METADATA_KEY,
+                            'prefixMatch': _TEST_METADATA_VALUE_UNARY[:2]
+                        }]
+                    }],
+                    'service': alternate_backend_service.url
+                }],
+                {
+                    "EmptyCall": original_backend_instances,
+                    "UnaryCall": alternate_backend_instances
+                }),
+            (
+                [{
+                    'priority': 0,
+                    # Header SuffixMatch -> alternate_backend_service.
+                    # EmptyCall is sent with the metadata.
+                    'matchRules': [{
+                        'prefixMatch':
+                            '/',
+                        'headerMatches': [{
+                            'headerName': _TEST_METADATA_KEY,
+                            'suffixMatch': _TEST_METADATA_VALUE_EMPTY[-2:]
+                        }]
+                    }],
+                    'service': alternate_backend_service.url
+                }],
+                {
+                    "EmptyCall": alternate_backend_instances,
+                    "UnaryCall": original_backend_instances
+                }),
+            (
+                [{
+                    'priority': 0,
+                    # Header 'xds_md_numeric' present -> alternate_backend_service.
+                    # UnaryCall is sent with the metadata, so will be sent to alternative.
+                    'matchRules': [{
+                        'prefixMatch':
+                            '/',
+                        'headerMatches': [{
+                            'headerName': _TEST_METADATA_NUMERIC_KEY,
+                            'presentMatch': True
+                        }]
+                    }],
+                    'service': alternate_backend_service.url
+                }],
+                {
+                    "EmptyCall": original_backend_instances,
+                    "UnaryCall": alternate_backend_instances
+                }),
+            (
+                [{
+                    'priority': 0,
+                    # Header invert ExactMatch -> alternate_backend_service.
+                    # UnaryCall is sent with the metadata, so will be sent to
+                    # original. EmptyCall will be sent to alternative.
+                    'matchRules': [{
+                        'prefixMatch':
+                            '/',
+                        'headerMatches': [{
+                            'headerName': _TEST_METADATA_KEY,
+                            'exactMatch': _TEST_METADATA_VALUE_UNARY,
+                            'invertMatch': True
+                        }]
+                    }],
+                    'service': alternate_backend_service.url
+                }],
+                {
+                    "EmptyCall": alternate_backend_instances,
+                    "UnaryCall": original_backend_instances
+                }),
+            (
+                [{
+                    'priority': 0,
+                    # Header 'xds_md_numeric' range [100,200] -> alternate_backend_service.
+                    # UnaryCall is sent with the metadata in range.
+                    'matchRules': [{
+                        'prefixMatch':
+                            '/',
+                        'headerMatches': [{
+                            'headerName': _TEST_METADATA_NUMERIC_KEY,
+                            'rangeMatch': {
+                                'rangeStart': '100',
+                                'rangeEnd': '200'
+                            }
+                        }]
+                    }],
+                    'service': alternate_backend_service.url
+                }],
+                {
+                    "EmptyCall": original_backend_instances,
+                    "UnaryCall": alternate_backend_instances
+                }),
+            (
+                [{
+                    'priority': 0,
+                    # Header RegexMatch -> alternate_backend_service.
+                    # EmptyCall is sent with the metadata.
+                    'matchRules': [{
+                        'prefixMatch':
+                            '/',
+                        'headerMatches': [{
+                            'headerName':
+                                _TEST_METADATA_KEY,
+                            'regexMatch':
+                                "^%s.*%s$" % (_TEST_METADATA_VALUE_EMPTY[:2],
+                                              _TEST_METADATA_VALUE_EMPTY[-2:])
+                        }]
+                    }],
+                    'service': alternate_backend_service.url
+                }],
+                {
+                    "EmptyCall": alternate_backend_instances,
+                    "UnaryCall": original_backend_instances
+                }),
+        ]
 
         for (route_rules, expected_instances) in test_cases:
             logger.info('patching url map with %s -> alternative',
@@ -938,8 +1135,8 @@ def test_header_matching(gcp, original_backend_service, instance_group,
                 original_backend_instances + alternate_backend_instances,
                 _WAIT_FOR_STATS_SEC)
 
-            retry_count = 10
-            # Each attempt takes about 10 seconds, 10 retries is equivalent to 100
+            retry_count = 80
+            # Each attempt takes about 5 seconds, 80 retries is equivalent to 400
             # seconds timeout.
             for i in range(retry_count):
                 stats = get_client_stats(_NUM_TEST_RPCS, _WAIT_FOR_STATS_SEC)
@@ -951,6 +1148,10 @@ def test_header_matching(gcp, original_backend_service, instance_group,
                 if compare_expected_instances(stats, expected_instances):
                     logger.info("success")
                     break
+                elif i == retry_count - 1:
+                    raise Exception(
+                        'timeout waiting for RPCs to the expected instances: %s'
+                        % expected_instances)
     finally:
         patch_url_map_backend_service(gcp, original_backend_service)
         patch_backend_instances(gcp, alternate_backend_service, [])
@@ -1400,7 +1601,8 @@ def delete_instance_template(gcp):
 def patch_backend_instances(gcp,
                             backend_service,
                             instance_groups,
-                            balancing_mode='UTILIZATION'):
+                            balancing_mode='UTILIZATION',
+                            max_rate=1):
     if gcp.alpha_compute:
         compute_to_use = gcp.alpha_compute
     else:
@@ -1409,7 +1611,7 @@ def patch_backend_instances(gcp,
         'backends': [{
             'group': instance_group.url,
             'balancingMode': balancing_mode,
-            'maxRate': 1 if balancing_mode == 'RATE' else None
+            'maxRate': max_rate if balancing_mode == 'RATE' else None
         } for instance_group in instance_groups],
     }
     logger.debug('Sending GCP request with body=%s', config)
@@ -1767,8 +1969,13 @@ try:
                 rpcs_to_send = '--rpc="UnaryCall"'
 
             if test_case in _TESTS_TO_SEND_METADATA:
-                metadata_to_send = '--metadata="EmptyCall:{key}:{value}"'.format(
-                    key=_TEST_METADATA_KEY, value=_TEST_METADATA_VALUE)
+                metadata_to_send = '--metadata="EmptyCall:{keyE}:{valueE},UnaryCall:{keyU}:{valueU},UnaryCall:{keyNU}:{valueNU}"'.format(
+                    keyE=_TEST_METADATA_KEY,
+                    valueE=_TEST_METADATA_VALUE_EMPTY,
+                    keyU=_TEST_METADATA_KEY,
+                    valueU=_TEST_METADATA_VALUE_UNARY,
+                    keyNU=_TEST_METADATA_NUMERIC_KEY,
+                    valueNU=_TEST_METADATA_NUMERIC_VALUE)
             else:
                 # Setting the arg explicitly to empty with '--metadata=""'
                 # makes C# client fail
@@ -1816,6 +2023,10 @@ try:
                 elif test_case == 'gentle_failover':
                     test_gentle_failover(gcp, backend_service, instance_group,
                                          secondary_zone_instance_group)
+                elif test_case == 'load_report_based_failover':
+                    test_load_report_based_failover(
+                        gcp, backend_service, instance_group,
+                        secondary_zone_instance_group)
                 elif test_case == 'ping_pong':
                     test_ping_pong(gcp, backend_service, instance_group)
                 elif test_case == 'remove_instance_group':