Browse Source

xDS Kubernetes Interop Test Driver

Sergii Tkachenko 5 years ago
parent
commit
86f8792136
39 changed files with 4086 additions and 0 deletions
  1. 3 0
      tools/run_tests/xds_test_driver/.gitignore
  2. 74 0
      tools/run_tests/xds_test_driver/README.md
  3. 13 0
      tools/run_tests/xds_test_driver/bin/__init__.py
  4. 121 0
      tools/run_tests/xds_test_driver/bin/run_channelz.py
  5. 132 0
      tools/run_tests/xds_test_driver/bin/run_td_setup.py
  6. 91 0
      tools/run_tests/xds_test_driver/bin/run_test_client.py
  7. 91 0
      tools/run_tests/xds_test_driver/bin/run_test_server.py
  8. 5 0
      tools/run_tests/xds_test_driver/config/grpc-testing.cfg
  9. 12 0
      tools/run_tests/xds_test_driver/config/local-dev.cfg.example
  10. 13 0
      tools/run_tests/xds_test_driver/framework/__init__.py
  11. 13 0
      tools/run_tests/xds_test_driver/framework/infrastructure/__init__.py
  12. 18 0
      tools/run_tests/xds_test_driver/framework/infrastructure/gcp/__init__.py
  13. 217 0
      tools/run_tests/xds_test_driver/framework/infrastructure/gcp/api.py
  14. 333 0
      tools/run_tests/xds_test_driver/framework/infrastructure/gcp/compute.py
  15. 113 0
      tools/run_tests/xds_test_driver/framework/infrastructure/gcp/network_security.py
  16. 89 0
      tools/run_tests/xds_test_driver/framework/infrastructure/gcp/network_services.py
  17. 324 0
      tools/run_tests/xds_test_driver/framework/infrastructure/k8s.py
  18. 462 0
      tools/run_tests/xds_test_driver/framework/infrastructure/traffic_director.py
  19. 95 0
      tools/run_tests/xds_test_driver/framework/rpc/__init__.py
  20. 175 0
      tools/run_tests/xds_test_driver/framework/rpc/grpc_channelz.py
  21. 47 0
      tools/run_tests/xds_test_driver/framework/rpc/grpc_testing.py
  22. 13 0
      tools/run_tests/xds_test_driver/framework/test_app/__init__.py
  23. 255 0
      tools/run_tests/xds_test_driver/framework/test_app/base_runner.py
  24. 207 0
      tools/run_tests/xds_test_driver/framework/test_app/client_app.py
  25. 245 0
      tools/run_tests/xds_test_driver/framework/test_app/server_app.py
  26. 51 0
      tools/run_tests/xds_test_driver/framework/xds_flags.py
  27. 43 0
      tools/run_tests/xds_test_driver/framework/xds_k8s_flags.py
  28. 395 0
      tools/run_tests/xds_test_driver/framework/xds_k8s_testcase.py
  29. 79 0
      tools/run_tests/xds_test_driver/kubernetes-manifests/client-secure.deployment.yaml
  30. 67 0
      tools/run_tests/xds_test_driver/kubernetes-manifests/client.deployment.yaml
  31. 7 0
      tools/run_tests/xds_test_driver/kubernetes-manifests/namespace.yaml
  32. 78 0
      tools/run_tests/xds_test_driver/kubernetes-manifests/server-secure.deployment.yaml
  33. 34 0
      tools/run_tests/xds_test_driver/kubernetes-manifests/server.deployment.yaml
  34. 17 0
      tools/run_tests/xds_test_driver/kubernetes-manifests/server.service.yaml
  35. 9 0
      tools/run_tests/xds_test_driver/kubernetes-manifests/service-account.yaml
  36. 12 0
      tools/run_tests/xds_test_driver/requirements.txt
  37. 13 0
      tools/run_tests/xds_test_driver/tests/__init__.py
  38. 41 0
      tools/run_tests/xds_test_driver/tests/baseline_test.py
  39. 79 0
      tools/run_tests/xds_test_driver/tests/security_test.py

+ 3 - 0
tools/run_tests/xds_test_driver/.gitignore

@@ -0,0 +1,3 @@
+src/proto
+config/local-dev.cfg
+venv

+ 74 - 0
tools/run_tests/xds_test_driver/README.md

@@ -0,0 +1,74 @@
+# xDS Kubernetes Interop Tests
+
+Proxyless Security Mesh Interop Tests executed on Kubernetes. Work in progress.
+
+## Installation
+
+#### Requrements
+1. Python v3.6+
+2. [Google Cloud SDK](https://cloud.google.com/sdk/docs/install)
+
+#### Configure GKE cluster access
+
+```sh
+# Update gloud sdk
+gcloud -q components update
+
+# Configuring GKE cluster access for kubectl
+gcloud container clusters get-credentials "your_gke_cluster_name" --zone "your_gke_cluster_zone"
+
+# Save generated kube context name
+KUBE_CONTEXT="$(kubectl config current-context)"
+``` 
+
+#### Install python dependencies
+
+```sh
+# Create python virtual environment
+python3.6 -m venv venv
+
+# Activate virtual environment
+. ./venv/bin/activate
+
+# Install requirements
+pip install -r requirements.txt
+
+# Generate protos
+python -m grpc_tools.protoc --proto_path=../../../ \
+    --python_out=. --grpc_python_out=. \
+    src/proto/grpc/testing/empty.proto \
+    src/proto/grpc/testing/messages.proto \
+    src/proto/grpc/testing/test.proto
+```
+
+# Basic usage
+
+### xDS Baseline Tests
+```sh
+# Help
+python -m tests.baseline_test --help
+python -m tests.baseline_test --helpfull
+
+# Run on grpc-testing cluster
+python -m tests.baseline_test \
+  --flagfile="config/grpc-testing.cfg" \
+  --kube_context="${KUBE_CONTEXT}" \
+  --namespace=interop-psm-security \
+  --server_image="gcr.io/grpc-testing/xds-k8s-test-server-java:latest" \
+  --client_image="gcr.io/grpc-testing/xds-k8s-test-client-java:latest" \
+```
+
+### xDS Security Tests
+```sh
+# Help
+python -m tests.security_test --help
+python -m tests.security_test --helpfull
+
+# Run on grpc-testing cluster
+python -m tests.security_test \
+  --flagfile="config/grpc-testing.cfg" \
+  --kube_context="${KUBE_CONTEXT}" \
+  --namespace=interop-psm-security \
+  --server_image="gcr.io/grpc-testing/xds-k8s-test-server-java:latest" \
+  --client_image="gcr.io/grpc-testing/xds-k8s-test-client-java:latest" \
+```

+ 13 - 0
tools/run_tests/xds_test_driver/bin/__init__.py

@@ -0,0 +1,13 @@
+# Copyright 2020 gRPC authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

+ 121 - 0
tools/run_tests/xds_test_driver/bin/run_channelz.py

@@ -0,0 +1,121 @@
+# Copyright 2016 gRPC authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import hashlib
+import logging
+
+from absl import app
+from absl import flags
+
+from framework import xds_flags
+from framework import xds_k8s_flags
+from framework.infrastructure import k8s
+from framework.rpc import grpc_channelz
+from framework.test_app import server_app
+from framework.test_app import client_app
+
+logger = logging.getLogger(__name__)
+# Flags
+_SERVER_RPC_HOST = flags.DEFINE_string(
+    'server_rpc_host', default='127.0.0.1', help='Server RPC host')
+_CLIENT_RPC_HOST = flags.DEFINE_string(
+    'client_rpc_host', default='127.0.0.1', help='Client RPC host')
+flags.adopt_module_key_flags(xds_flags)
+flags.adopt_module_key_flags(xds_k8s_flags)
+
+# Type aliases
+Socket = grpc_channelz.Socket
+XdsTestServer = server_app.XdsTestServer
+XdsTestClient = client_app.XdsTestClient
+
+
+def debug_cert(cert):
+    if not cert:
+        return '<missing>'
+    sha1 = hashlib.sha1(cert)
+    return f'sha1={sha1.hexdigest()}, len={len(cert)}'
+
+
+def debug_sock_tls(tls):
+    return (f'local:  {debug_cert(tls.local_certificate)}\n'
+            f'remote: {debug_cert(tls.remote_certificate)}')
+
+
+def get_deployment_pod_ips(k8s_ns, deployment_name):
+    deployment = k8s_ns.get_deployment(deployment_name)
+    pods = k8s_ns.list_deployment_pods(deployment)
+    return [pod.status.pod_ip for pod in pods]
+
+
+def main(argv):
+    if len(argv) > 1:
+        raise app.UsageError('Too many command-line arguments.')
+
+    k8s_api_manager = k8s.KubernetesApiManager(xds_k8s_flags.KUBE_CONTEXT.value)
+
+    # Namespaces
+    namespace = xds_flags.NAMESPACE.value
+    server_namespace = namespace
+    client_namespace = namespace
+
+    # Server
+    server_k8s_ns = k8s.KubernetesNamespace(k8s_api_manager, server_namespace)
+    server_name = xds_flags.SERVER_NAME.value
+    server_port = xds_flags.SERVER_PORT.value
+    server_pod_ip = get_deployment_pod_ips(server_k8s_ns, server_name)[0]
+    test_server: XdsTestServer = XdsTestServer(
+        ip=server_pod_ip,
+        rpc_port=server_port,
+        xds_host=xds_flags.SERVER_XDS_HOST.value,
+        xds_port=xds_flags.SERVER_XDS_PORT.value,
+        rpc_host=_SERVER_RPC_HOST.value)
+
+    # Client
+    client_k8s_ns = k8s.KubernetesNamespace(k8s_api_manager, client_namespace)
+    client_name = xds_flags.CLIENT_NAME.value
+    client_port = xds_flags.CLIENT_PORT.value
+    client_pod_ip = get_deployment_pod_ips(client_k8s_ns, client_name)[0]
+
+    test_client: XdsTestClient = XdsTestClient(
+        ip=client_pod_ip,
+        server_target=test_server.xds_uri,
+        rpc_port=client_port,
+        rpc_host=_CLIENT_RPC_HOST.value)
+
+    with test_client, test_server:
+        client_socket: Socket = test_client.get_client_socket_with_test_server()
+        server_socket: Socket = test_server.get_server_socket_matching_client(
+            client_socket)
+
+        server_tls = server_socket.security.tls
+        client_tls = client_socket.security.tls
+
+        print(f'\nServer certs:\n{debug_sock_tls(server_tls)}')
+        print(f'\nClient certs:\n{debug_sock_tls(client_tls)}')
+        print()
+
+        if server_tls.local_certificate:
+            eq = server_tls.local_certificate == client_tls.remote_certificate
+            print(f'(TLS)  Server local matches client remote: {eq}')
+        else:
+            print('(TLS)  Not detected')
+
+        if server_tls.remote_certificate:
+            eq = server_tls.remote_certificate == client_tls.local_certificate
+            print(f'(mTLS) Server remote matches client local: {eq}')
+        else:
+            print('(mTLS) Not detected')
+
+
+if __name__ == '__main__':
+    app.run(main)

+ 132 - 0
tools/run_tests/xds_test_driver/bin/run_td_setup.py

@@ -0,0 +1,132 @@
+# Copyright 2016 gRPC authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+
+from absl import app
+from absl import flags
+
+from framework import xds_flags
+from framework import xds_k8s_flags
+from framework.infrastructure import gcp
+from framework.infrastructure import k8s
+from framework.infrastructure import traffic_director
+
+
+logger = logging.getLogger(__name__)
+# Flags
+_CMD = flags.DEFINE_enum(
+    'cmd', default='create',
+    enum_values=['cycle', 'create', 'cleanup',
+                 'backends-add', 'backends-cleanup'],
+    help='Command')
+_SECURITY = flags.DEFINE_enum(
+    'security', default=None, enum_values=['mtls', 'tls', 'plaintext'],
+    help='Configure td with security')
+flags.adopt_module_key_flags(xds_flags)
+flags.adopt_module_key_flags(xds_k8s_flags)
+
+
+def main(argv):
+    if len(argv) > 1:
+        raise app.UsageError('Too many command-line arguments.')
+
+    command = _CMD.value
+    security_mode = _SECURITY.value
+
+    project: str = xds_flags.PROJECT.value
+    network: str = xds_flags.NETWORK.value
+    namespace = xds_flags.NAMESPACE.value
+
+    # Test server
+    server_name = xds_flags.SERVER_NAME.value
+    server_port = xds_flags.SERVER_PORT.value
+    server_xds_host = xds_flags.SERVER_XDS_HOST.value
+    server_xds_port = xds_flags.SERVER_XDS_PORT.value
+
+    gcp_api_manager = gcp.api.GcpApiManager()
+
+    if security_mode is None:
+        td = traffic_director.TrafficDirectorManager(
+            gcp_api_manager,
+            project=project,
+            resource_prefix=namespace,
+            network=network)
+    else:
+        td = traffic_director.TrafficDirectorSecureManager(
+            gcp_api_manager,
+            project=project,
+            resource_prefix=namespace,
+            network=network)
+
+    # noinspection PyBroadException
+    try:
+        if command == 'create' or command == 'cycle':
+            logger.info('Create-only mode')
+            if security_mode is None:
+                logger.info('No security')
+                td.setup_for_grpc(server_xds_host, server_xds_port)
+
+            elif security_mode == 'mtls':
+                logger.info('Setting up mtls')
+                td.setup_for_grpc(server_xds_host, server_xds_port)
+                td.setup_server_security(server_port,
+                                         tls=True, mtls=True)
+                td.setup_client_security(namespace, server_name,
+                                         tls=True, mtls=True)
+
+            elif security_mode == 'tls':
+                logger.info('Setting up tls')
+                td.setup_for_grpc(server_xds_host, server_xds_port)
+                td.setup_server_security(server_port,
+                                         tls=True, mtls=False)
+                td.setup_client_security(namespace, server_name,
+                                         tls=True, mtls=False)
+
+            elif security_mode == 'plaintext':
+                logger.info('Setting up plaintext')
+                td.setup_for_grpc(server_xds_host, server_xds_port)
+                td.setup_server_security(server_port,
+                                         tls=False, mtls=False)
+                td.setup_client_security(namespace, server_name,
+                                         tls=False, mtls=False)
+
+            logger.info('Works!')
+    except Exception:
+        logger.exception('Got error during creation')
+
+    if command == 'cleanup' or command == 'cycle':
+        logger.info('Cleaning up')
+        td.cleanup(force=True)
+
+    if command == 'backends-add':
+        logger.info('Adding backends')
+        k8s_api_manager = k8s.KubernetesApiManager(
+            xds_k8s_flags.KUBE_CONTEXT.value)
+        k8s_namespace = k8s.KubernetesNamespace(k8s_api_manager, namespace)
+
+        neg_name, neg_zones = k8s_namespace.get_service_neg(
+            server_name, server_port)
+
+        # todo(sergiitk): figure out how to confirm NEG is ready to be added
+        # time.sleep(30)
+        td.load_backend_service()
+        td.backend_service_add_neg_backends(neg_name, neg_zones)
+        # todo(sergiitk): wait until client reports rpc health
+    elif command == 'backends-cleanup':
+        td.load_backend_service()
+        td.backend_service_remove_all_backends()
+
+
+if __name__ == '__main__':
+    app.run(main)

+ 91 - 0
tools/run_tests/xds_test_driver/bin/run_test_client.py

@@ -0,0 +1,91 @@
+# Copyright 2016 gRPC authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+
+from absl import app
+from absl import flags
+
+from framework import xds_flags
+from framework import xds_k8s_flags
+from framework.infrastructure import k8s
+from framework.test_app import client_app
+
+logger = logging.getLogger(__name__)
+# Flags
+_CMD = flags.DEFINE_enum(
+    'cmd', default='run', enum_values=['run', 'cleanup'],
+    help='Command')
+_SECURE = flags.DEFINE_bool(
+    "secure", default=False,
+    help="Run client in the secure mode")
+_QPS = flags.DEFINE_integer('qps', default=25, help='Queries per second')
+_PRINT_RESPONSE = flags.DEFINE_bool(
+    "print_response", default=False,
+    help="Client prints responses")
+_REUSE_NAMESPACE = flags.DEFINE_bool(
+    "reuse_namespace", default=True,
+    help="Use existing namespace if exists")
+_CLEANUP_NAMESPACE = flags.DEFINE_bool(
+    "cleanup_namespace", default=False,
+    help="Delete namespace during resource cleanup")
+flags.adopt_module_key_flags(xds_flags)
+flags.adopt_module_key_flags(xds_k8s_flags)
+
+
+def main(argv):
+    if len(argv) > 1:
+        raise app.UsageError('Too many command-line arguments.')
+
+    # Base namespace
+    namespace = xds_flags.NAMESPACE.value
+    client_namespace = namespace
+
+    runner_kwargs = dict(
+        deployment_name=xds_flags.CLIENT_NAME.value,
+        image_name=xds_k8s_flags.CLIENT_IMAGE.value,
+        gcp_service_account=xds_k8s_flags.GCP_SERVICE_ACCOUNT.value,
+        network=xds_flags.NETWORK.value,
+        td_bootstrap_image=xds_k8s_flags.TD_BOOTSTRAP_IMAGE.value,
+        stats_port=xds_flags.CLIENT_PORT.value,
+        reuse_namespace=_REUSE_NAMESPACE.value)
+
+    if _SECURE.value:
+        runner_kwargs.update(
+            deployment_template='client-secure.deployment.yaml')
+
+    k8s_api_manager = k8s.KubernetesApiManager(xds_k8s_flags.KUBE_CONTEXT.value)
+    client_runner = client_app.KubernetesClientRunner(
+        k8s.KubernetesNamespace(k8s_api_manager, client_namespace),
+        **runner_kwargs)
+
+    # Server target
+    server_xds_host = xds_flags.SERVER_XDS_HOST.value
+    server_xds_port = xds_flags.SERVER_XDS_PORT.value
+
+    if _CMD.value == 'run':
+        logger.info('Run client, secure_mode=%s', _SECURE.value)
+        client_runner.run(
+            server_target=f'xds:///{server_xds_host}:{server_xds_port}',
+            qps=_QPS.value,
+            print_response=_PRINT_RESPONSE.value,
+            secure_mode=_SECURE.value)
+
+    elif _CMD.value == 'cleanup':
+        logger.info('Cleanup client')
+        client_runner.cleanup(force=True,
+                              force_namespace=_CLEANUP_NAMESPACE.value)
+
+
+if __name__ == '__main__':
+    app.run(main)

+ 91 - 0
tools/run_tests/xds_test_driver/bin/run_test_server.py

@@ -0,0 +1,91 @@
+#  Copyright 2020 gRPC authors.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+
+from absl import app
+from absl import flags
+
+from framework import xds_flags
+from framework import xds_k8s_flags
+from framework.infrastructure import k8s
+from framework.test_app import server_app
+
+logger = logging.getLogger(__name__)
+# Flags
+_CMD = flags.DEFINE_enum(
+    'cmd', default='run', enum_values=['run', 'cleanup'],
+    help='Command')
+_SECURE = flags.DEFINE_bool(
+    "secure", default=False,
+    help="Run server in the secure mode")
+_REUSE_NAMESPACE = flags.DEFINE_bool(
+    "reuse_namespace", default=True,
+    help="Use existing namespace if exists")
+_CLEANUP_NAMESPACE = flags.DEFINE_bool(
+    "cleanup_namespace", default=False,
+    help="Delete namespace during resource cleanup")
+flags.adopt_module_key_flags(xds_flags)
+flags.adopt_module_key_flags(xds_k8s_flags)
+
+
+def main(argv):
+    if len(argv) > 1:
+        raise app.UsageError('Too many command-line arguments.')
+
+    # Base namespace
+    namespace = xds_flags.NAMESPACE.value
+    server_namespace = namespace
+
+    runner_kwargs = dict(
+        deployment_name=xds_flags.SERVER_NAME.value,
+        image_name=xds_k8s_flags.SERVER_IMAGE.value,
+        gcp_service_account=xds_k8s_flags.GCP_SERVICE_ACCOUNT.value,
+        network=xds_flags.NETWORK.value,
+        reuse_namespace=_REUSE_NAMESPACE.value)
+
+    if _SECURE.value:
+        runner_kwargs.update(
+            td_bootstrap_image=xds_k8s_flags.TD_BOOTSTRAP_IMAGE.value,
+            deployment_template='server-secure.deployment.yaml')
+
+    k8s_api_manager = k8s.KubernetesApiManager(xds_k8s_flags.KUBE_CONTEXT.value)
+    server_runner = server_app.KubernetesServerRunner(
+        k8s.KubernetesNamespace(k8s_api_manager, server_namespace),
+        **runner_kwargs)
+
+    if _CMD.value == 'run':
+        logger.info('Run server, secure_mode=%s', _SECURE.value)
+        server_runner.run(test_port=xds_flags.SERVER_PORT.value,
+                          secure_mode=_SECURE.value)
+
+    elif _CMD.value == 'cleanup':
+        logger.info('Cleanup server')
+        server_runner.cleanup(force=True,
+                              force_namespace=_CLEANUP_NAMESPACE.value)
+
+
+if __name__ == '__main__':
+    app.run(main)

+ 5 - 0
tools/run_tests/xds_test_driver/config/grpc-testing.cfg

@@ -0,0 +1,5 @@
+--project=grpc-testing
+--network=default-vpc
+--gcp_service_account=830293263384-compute@developer.gserviceaccount.com
+# Dev version with security support
+--td_bootstrap_image=gcr.io/grpc-testing/td-grpc-bootstrap:66de7ea0e170351c9fae17232b81adbfb3e80ec3

+ 12 - 0
tools/run_tests/xds_test_driver/config/local-dev.cfg.example

@@ -0,0 +1,12 @@
+# Copy to local-dev.cfg
+# Local dev settings
+--flagfile=config/grpc-testing.cfg
+--namespace=interop-psm-security
+--kube_context=gke_grpc-testing_us-central1-a_interop-test-psm-sec1-us-central1
+# Test images
+--server_image=gcr.io/grpc-testing/xds-k8s-test-server-java:latest
+--client_image=gcr.io/grpc-testing/xds-k8s-test-client-java:latest
+# Debug settings
+--client_debug_use_port_forwarding
+--logger_levels=__main__:DEBUG,framework:DEBUG
+--verbosity=0

+ 13 - 0
tools/run_tests/xds_test_driver/framework/__init__.py

@@ -0,0 +1,13 @@
+# Copyright 2020 gRPC authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

+ 13 - 0
tools/run_tests/xds_test_driver/framework/infrastructure/__init__.py

@@ -0,0 +1,13 @@
+# Copyright 2020 gRPC authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

+ 18 - 0
tools/run_tests/xds_test_driver/framework/infrastructure/gcp/__init__.py

@@ -0,0 +1,18 @@
+# Copyright 2020 gRPC authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from framework.infrastructure.gcp import api
+from framework.infrastructure.gcp import compute
+from framework.infrastructure.gcp import network_security
+from framework.infrastructure.gcp import network_services
+

+ 217 - 0
tools/run_tests/xds_test_driver/framework/infrastructure/gcp/api.py

@@ -0,0 +1,217 @@
+# Copyright 2020 gRPC authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import contextlib
+import functools
+import logging
+import os
+
+# For some reason without `import grpc`, google.protobuf.json_format produces
+# "Segmentation fault"
+import grpc
+from absl import flags
+from google.longrunning import operations_pb2
+from google.protobuf import json_format
+from google.rpc import code_pb2
+from googleapiclient import discovery
+import googleapiclient.errors
+import tenacity
+
+logger = logging.getLogger(__name__)
+V1_DISCOVERY_URI = flags.DEFINE_string(
+    "v1_discovery_uri", default=discovery.V1_DISCOVERY_URI,
+    help="Override v1 Discovery URI")
+V2_DISCOVERY_URI = flags.DEFINE_string(
+    "v2_discovery_uri", default=discovery.V2_DISCOVERY_URI,
+    help="Override v2 Discovery URI")
+COMPUTE_V1_DISCOVERY_FILE = flags.DEFINE_string(
+    "compute_v1_discovery_file", default=None,
+    help="Load compute v1 from discovery file")
+
+# Type aliases
+Operation = operations_pb2.Operation
+
+
+class GcpApiManager:
+    def __init__(self, *,
+                 v1_discovery_uri=None,
+                 v2_discovery_uri=None,
+                 compute_v1_discovery_file=None,
+                 private_api_key=None):
+        self.v1_discovery_uri = v1_discovery_uri or V1_DISCOVERY_URI.value
+        self.v2_discovery_uri = v2_discovery_uri or V2_DISCOVERY_URI.value
+        self.compute_v1_discovery_file = (compute_v1_discovery_file or
+                                          COMPUTE_V1_DISCOVERY_FILE.value)
+        self.private_api_key = private_api_key or os.getenv('PRIVATE_API_KEY')
+        self._exit_stack = contextlib.ExitStack()
+
+    def close(self):
+        self._exit_stack.close()
+
+    @functools.lru_cache(None)
+    def compute(self, version):
+        api_name = 'compute'
+        if version == 'v1':
+            if self.compute_v1_discovery_file:
+                return self._build_from_file(self.compute_v1_discovery_file)
+            else:
+                return self._build_from_discovery_v1(api_name, version)
+
+        raise NotImplementedError(f'Compute {version} not supported')
+
+    @functools.lru_cache(None)
+    def networksecurity(self, version):
+        api_name = 'networksecurity'
+        if version == 'v1alpha1':
+            return self._build_from_discovery_v2(
+                api_name, version, api_key=self.private_api_key)
+
+        raise NotImplementedError(f'Network Security {version} not supported')
+
+    @functools.lru_cache(None)
+    def networkservices(self, version):
+        api_name = 'networkservices'
+        if version == 'v1alpha1':
+            return self._build_from_discovery_v2(
+                api_name, version, api_key=self.private_api_key)
+
+        raise NotImplementedError(f'Network Services {version} not supported')
+
+    def _build_from_discovery_v1(self, api_name, version):
+        api = discovery.build(
+            api_name, version, cache_discovery=False,
+            discoveryServiceUrl=self.v1_discovery_uri)
+        self._exit_stack.enter_context(api)
+        return api
+
+    def _build_from_discovery_v2(self, api_name, version, *, api_key=None):
+        key_arg = f'&key={api_key}' if api_key else ''
+        api = discovery.build(
+            api_name, version, cache_discovery=False,
+            discoveryServiceUrl=f'{self.v2_discovery_uri}{key_arg}')
+        self._exit_stack.enter_context(api)
+        return api
+
+    def _build_from_file(self, discovery_file):
+        with open(discovery_file, 'r') as f:
+            api = discovery.build_from_document(f.read())
+        self._exit_stack.enter_context(api)
+        return api
+
+
+class Error(Exception):
+    """Base error class for GCP API errors"""
+
+
+class OperationError(Error):
+    """
+    Operation was not successful.
+
+    Assuming Operation based on Google API Style Guide:
+    https://cloud.google.com/apis/design/design_patterns#long_running_operations
+    https://github.com/googleapis/googleapis/blob/master/google/longrunning/operations.proto
+    """
+    def __init__(self, api_name, operation_response, message=None):
+        self.api_name = api_name
+        operation = json_format.ParseDict(operation_response, Operation())
+        self.name = operation.name or 'unknown'
+        self.error = operation.error
+        self.code_name = code_pb2.Code.Name(operation.error.code)
+        if message is None:
+            message = (f'{api_name} operation "{self.name}" failed. Error '
+                       f'code: {self.error.code} ({self.code_name}), '
+                       f'message: {self.error.message}')
+        self.message = message
+        super().__init__(message)
+
+
+class GcpProjectApiResource:
+    # todo(sergiitk): move someplace better
+    _WAIT_FOR_OPERATION_SEC = 60 * 5
+    _WAIT_FIXES_SEC = 2
+    _GCP_API_RETRIES = 5
+
+    def __init__(self, api: discovery.Resource, project: str):
+        self.api: discovery.Resource = api
+        self.project: str = project
+
+    @staticmethod
+    def wait_for_operation(operation_request,
+                           test_success_fn,
+                           timeout_sec=_WAIT_FOR_OPERATION_SEC,
+                           wait_sec=_WAIT_FIXES_SEC):
+        retryer = tenacity.Retrying(
+            retry=(tenacity.retry_if_not_result(test_success_fn) |
+                   tenacity.retry_if_exception_type()),
+            wait=tenacity.wait_fixed(wait_sec),
+            stop=tenacity.stop_after_delay(timeout_sec),
+            after=tenacity.after_log(logger, logging.DEBUG),
+            reraise=True)
+        return retryer(operation_request.execute)
+
+
+class GcpStandardCloudApiResource(GcpProjectApiResource):
+    DEFAULT_GLOBAL = 'global'
+
+    def parent(self, location=None):
+        if not location:
+            location = self.DEFAULT_GLOBAL
+        return f'projects/{self.project}/locations/{location}'
+
+    def resource_full_name(self, name, collection_name):
+        return f'{self.parent()}/{collection_name}/{name}'
+
+    def _create_resource(self, collection: discovery.Resource, body: dict,
+                         **kwargs):
+        logger.debug("Creating %s", body)
+        create_req = collection.create(parent=self.parent(),
+                                       body=body, **kwargs)
+        self._execute(create_req)
+
+    @staticmethod
+    def _get_resource(collection: discovery.Resource, full_name):
+        resource = collection.get(name=full_name).execute()
+        logger.debug("Loaded %r", resource)
+        return resource
+
+    def _delete_resource(self, collection: discovery.Resource, full_name: str):
+        logger.debug("Deleting %s", full_name)
+        try:
+            self._execute(collection.delete(name=full_name))
+        except googleapiclient.errors.HttpError as error:
+            # noinspection PyProtectedMember
+            reason = error._get_reason()
+            logger.info('Delete failed. Error: %s %s',
+                        error.resp.status, reason)
+
+    def _execute(self, request,
+                 timeout_sec=GcpProjectApiResource._WAIT_FOR_OPERATION_SEC):
+        operation = request.execute(num_retries=self._GCP_API_RETRIES)
+        self._wait(operation, timeout_sec)
+
+    def _wait(self, operation,
+              timeout_sec=GcpProjectApiResource._WAIT_FOR_OPERATION_SEC):
+        op_name = operation['name']
+        logger.debug('Waiting for %s operation, timeout %s sec: %s',
+                     self.__class__.__name__, timeout_sec, op_name)
+
+        op_request = self.api.projects().locations().operations().get(
+            name=op_name)
+        operation = self.wait_for_operation(
+            operation_request=op_request,
+            test_success_fn=lambda result: result['done'],
+            timeout_sec=timeout_sec)
+
+        logger.debug('Completed operation: %s', operation)
+        if 'error' in operation:
+            raise OperationError(self.__class__.__name__, operation)

+ 333 - 0
tools/run_tests/xds_test_driver/framework/infrastructure/gcp/compute.py

@@ -0,0 +1,333 @@
+# Copyright 2020 gRPC authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import enum
+import logging
+from typing import Optional, Dict, Any
+
+import dataclasses
+import googleapiclient.errors
+from googleapiclient import discovery
+import retrying
+
+from framework.infrastructure import gcp
+
+logger = logging.getLogger(__name__)
+
+
+class ComputeV1(gcp.api.GcpProjectApiResource):
+    # todo(sergiitk): move someplace better
+    _WAIT_FOR_BACKEND_SEC = 1200
+    _WAIT_FOR_OPERATION_SEC = 1200
+    _GCP_API_RETRIES = 5
+
+    @dataclasses.dataclass(frozen=True)
+    class GcpResource:
+        name: str
+        url: str
+
+    @dataclasses.dataclass(frozen=True)
+    class ZonalGcpResource(GcpResource):
+        zone: str
+
+    def __init__(self, api_manager: gcp.api.GcpApiManager, project: str):
+        super().__init__(api_manager.compute('v1'), project)
+
+    class HealthCheckProtocol(enum.Enum):
+        TCP = enum.auto()
+
+    class BackendServiceProtocol(enum.Enum):
+        HTTP2 = enum.auto()
+        GRPC = enum.auto()
+
+    def create_health_check_tcp(self, name,
+                                use_serving_port=False) -> GcpResource:
+        health_check_settings = {}
+        if use_serving_port:
+            health_check_settings['portSpecification'] = 'USE_SERVING_PORT'
+
+        return self._insert_resource(self.api.healthChecks(), {
+            'name': name,
+            'type': 'TCP',
+            'tcpHealthCheck': health_check_settings,
+        })
+
+    def delete_health_check(self, name):
+        self._delete_resource(self.api.healthChecks(), healthCheck=name)
+
+    def create_backend_service_traffic_director(
+        self,
+        name: str,
+        health_check: GcpResource,
+        protocol: Optional[BackendServiceProtocol] = None
+    ) -> GcpResource:
+        if not isinstance(protocol, self.BackendServiceProtocol):
+            raise TypeError(f'Unexpected Backend Service protocol: {protocol}')
+        return self._insert_resource(self.api.backendServices(), {
+            'name': name,
+            'loadBalancingScheme': 'INTERNAL_SELF_MANAGED',  # Traffic Director
+            'healthChecks': [health_check.url],
+            'protocol': protocol.name,
+        })
+
+    def get_backend_service_traffic_director(self, name: str) -> GcpResource:
+        return self._get_resource(self.api.backendServices(),
+                                  backendService=name)
+
+    def patch_backend_service(self, backend_service, body, **kwargs):
+        self._patch_resource(
+            collection=self.api.backendServices(),
+            backendService=backend_service.name,
+            body=body,
+            **kwargs)
+
+    def backend_service_add_backends(self, backend_service, backends):
+        backend_list = [{
+            'group': backend.url,
+            'balancingMode': 'RATE',
+            'maxRatePerEndpoint': 5
+        } for backend in backends]
+
+        self._patch_resource(
+            collection=self.api.backendServices(),
+            body={'backends': backend_list},
+            backendService=backend_service.name)
+
+    def backend_service_remove_all_backends(self, backend_service):
+        self._patch_resource(
+            collection=self.api.backendServices(),
+            body={'backends': []},
+            backendService=backend_service.name)
+
+    def delete_backend_service(self, name):
+        self._delete_resource(self.api.backendServices(), backendService=name)
+
+    def create_url_map(
+        self,
+        name: str,
+        matcher_name: str,
+        src_hosts,
+        dst_default_backend_service: GcpResource,
+        dst_host_rule_match_backend_service: Optional[GcpResource] = None,
+    ) -> GcpResource:
+        if dst_host_rule_match_backend_service is None:
+            dst_host_rule_match_backend_service = dst_default_backend_service
+        return self._insert_resource(self.api.urlMaps(), {
+            'name': name,
+            'defaultService': dst_default_backend_service.url,
+            'hostRules': [{
+                'hosts': src_hosts,
+                'pathMatcher': matcher_name,
+            }],
+            'pathMatchers': [{
+                'name': matcher_name,
+                'defaultService': dst_host_rule_match_backend_service.url,
+            }],
+        })
+
+    def delete_url_map(self, name):
+        self._delete_resource(self.api.urlMaps(), urlMap=name)
+
+    def create_target_grpc_proxy(
+        self,
+        name: str,
+        url_map: GcpResource,
+    ) -> GcpResource:
+        return self._insert_resource(self.api.targetGrpcProxies(), {
+            'name': name,
+            'url_map': url_map.url,
+            'validate_for_proxyless': True,
+        })
+
+    def delete_target_grpc_proxy(self, name):
+        self._delete_resource(self.api.targetGrpcProxies(),
+                              targetGrpcProxy=name)
+
+    def create_target_http_proxy(
+        self,
+        name: str,
+        url_map: GcpResource,
+    ) -> GcpResource:
+        return self._insert_resource(self.api.targetHttpProxies(), {
+            'name': name,
+            'url_map': url_map.url,
+        })
+
+    def delete_target_http_proxy(self, name):
+        self._delete_resource(self.api.targetHttpProxies(),
+                              targetHttpProxy=name)
+
+    def create_forwarding_rule(
+        self,
+        name: str,
+        src_port: int,
+        target_proxy: GcpResource,
+        network_url: str,
+    ) -> GcpResource:
+        return self._insert_resource(self.api.globalForwardingRules(), {
+            'name': name,
+            'loadBalancingScheme': 'INTERNAL_SELF_MANAGED',  # Traffic Director
+            'portRange': src_port,
+            'IPAddress': '0.0.0.0',
+            'network': network_url,
+            'target': target_proxy.url,
+        })
+
+    def delete_forwarding_rule(self, name):
+        self._delete_resource(self.api.globalForwardingRules(),
+                              forwardingRule=name)
+
+    @staticmethod
+    def _network_endpoint_group_not_ready(neg):
+        return not neg or neg.get('size', 0) == 0
+
+    def wait_for_network_endpoint_group(self, name, zone):
+        @retrying.retry(retry_on_result=self._network_endpoint_group_not_ready,
+                        stop_max_delay=60 * 1000,
+                        wait_fixed=2 * 1000)
+        def _wait_for_network_endpoint_group_ready():
+            try:
+                neg = self.get_network_endpoint_group(name, zone)
+                logger.debug('Waiting for endpoints: NEG %s in zone %s, '
+                             'current count %s',
+                             neg['name'], zone, neg.get('size'))
+            except googleapiclient.errors.HttpError as error:
+                # noinspection PyProtectedMember
+                reason = error._get_reason()
+                logger.debug('Retrying NEG load, got %s, details %s',
+                             error.resp.status, reason)
+                raise
+            return neg
+
+        network_endpoint_group = _wait_for_network_endpoint_group_ready()
+        # @todo(sergiitk): dataclass
+        return self.ZonalGcpResource(
+            network_endpoint_group['name'],
+            network_endpoint_group['selfLink'],
+            zone)
+
+    def get_network_endpoint_group(self, name, zone):
+        neg = self.api.networkEndpointGroups().get(project=self.project,
+                                                   networkEndpointGroup=name,
+                                                   zone=zone).execute()
+        # @todo(sergiitk): dataclass
+        return neg
+
+    def wait_for_backends_healthy_status(
+        self,
+        backend_service,
+        backends,
+        timeout_sec=_WAIT_FOR_BACKEND_SEC,
+        wait_sec=4,
+    ):
+        pending = set(backends)
+
+        @retrying.retry(
+            retry_on_result=lambda result: not result,
+            stop_max_delay=timeout_sec * 1000,
+            wait_fixed=wait_sec * 1000)
+        def _retry_backends_health():
+            for backend in pending:
+                result = self.get_backend_service_backend_health(
+                    backend_service, backend)
+
+                if 'healthStatus' not in result:
+                    logger.debug('Waiting for instances: backend %s, zone %s',
+                                 backend.name, backend.zone)
+                    continue
+
+                backend_healthy = True
+                for instance in result['healthStatus']:
+                    logger.debug(
+                        'Backend %s in zone %s: instance %s:%s health: %s',
+                        backend.name, backend.zone,
+                        instance['ipAddress'], instance['port'],
+                        instance['healthState'])
+                    if instance['healthState'] != 'HEALTHY':
+                        backend_healthy = False
+
+                if backend_healthy:
+                    logger.info('Backend %s in zone %s reported healthy',
+                                backend.name, backend.zone)
+                    pending.remove(backend)
+
+            return not pending
+
+        _retry_backends_health()
+
+    def get_backend_service_backend_health(self, backend_service, backend):
+        return self.api.backendServices().getHealth(
+            project=self.project, backendService=backend_service.name,
+            body={"group": backend.url}).execute()
+
+    def _get_resource(self, collection: discovery.Resource,
+                      **kwargs) -> GcpResource:
+        resp = collection.get(project=self.project, **kwargs).execute()
+        logger.debug("Loaded %r", resp)
+        return self.GcpResource(resp['name'], resp['selfLink'])
+
+    def _insert_resource(
+        self,
+        collection: discovery.Resource,
+        body: Dict[str, Any]
+    ) -> GcpResource:
+        logger.debug("Creating %s", body)
+        resp = self._execute(collection.insert(project=self.project, body=body))
+        return self.GcpResource(body['name'], resp['targetLink'])
+
+    def _patch_resource(self, collection, body, **kwargs):
+        logger.debug("Patching %s", body)
+        self._execute(
+            collection.patch(project=self.project, body=body, **kwargs))
+
+    def _delete_resource(self, collection, **kwargs):
+        try:
+            self._execute(collection.delete(project=self.project, **kwargs))
+            return True
+        except googleapiclient.errors.HttpError as error:
+            # noinspection PyProtectedMember
+            reason = error._get_reason()
+            logger.info('Delete failed. Error: %s %s',
+                        error.resp.status, reason)
+
+    @staticmethod
+    def _operation_status_done(operation):
+        return 'status' in operation and operation['status'] == 'DONE'
+
+    def _execute(self, request, *,
+                 test_success_fn=None,
+                 timeout_sec=_WAIT_FOR_OPERATION_SEC):
+        operation = request.execute(num_retries=self._GCP_API_RETRIES)
+        logger.debug('Response %s', operation)
+
+        # todo(sergiitk) try using wait() here
+        # https://googleapis.github.io/google-api-python-client/docs/dyn/compute_v1.globalOperations.html#wait
+        operation_request = self.api.globalOperations().get(
+            project=self.project, operation=operation['name'])
+
+        if test_success_fn is None:
+            test_success_fn = self._operation_status_done
+
+        logger.debug('Waiting for global operation %s, timeout %s sec',
+                     operation['name'], timeout_sec)
+        response = self.wait_for_operation(
+            operation_request=operation_request,
+            test_success_fn=test_success_fn,
+            timeout_sec=timeout_sec)
+
+        if 'error' in response:
+            logger.debug('Waiting for global operation failed, response: %r',
+                         response)
+            raise Exception(f'Operation {operation["name"]} did not complete '
+                            f'within {timeout_sec}s, error={response["error"]}')
+        return response

+ 113 - 0
tools/run_tests/xds_test_driver/framework/infrastructure/gcp/network_security.py

@@ -0,0 +1,113 @@
+# Copyright 2020 gRPC authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+
+import dataclasses
+from google.rpc import code_pb2
+import tenacity
+
+from framework.infrastructure import gcp
+
+logger = logging.getLogger(__name__)
+
+
+class NetworkSecurityV1Alpha1(gcp.api.GcpStandardCloudApiResource):
+    API_NAME = 'networksecurity'
+    API_VERSION = 'v1alpha1'
+    SERVER_TLS_POLICIES = 'serverTlsPolicies'
+    CLIENT_TLS_POLICIES = 'clientTlsPolicies'
+
+    @dataclasses.dataclass(frozen=True)
+    class ServerTlsPolicy:
+        url: str
+        name: str
+        server_certificate: dict
+        mtls_policy: dict
+        update_time: str
+        create_time: str
+
+    @dataclasses.dataclass(frozen=True)
+    class ClientTlsPolicy:
+        url: str
+        name: str
+        client_certificate: dict
+        server_validation_ca: list
+        update_time: str
+        create_time: str
+
+    def __init__(self, api_manager: gcp.api.GcpApiManager, project: str):
+        super().__init__(api_manager.networksecurity(self.API_VERSION), project)
+        # Shortcut
+        self._api_locations = self.api.projects().locations()
+
+    def create_server_tls_policy(self, name, body: dict):
+        return self._create_resource(
+            self._api_locations.serverTlsPolicies(),
+            body, serverTlsPolicyId=name)
+
+    def get_server_tls_policy(self, name: str) -> ServerTlsPolicy:
+        result = self._get_resource(
+            collection=self._api_locations.serverTlsPolicies(),
+            full_name=self.resource_full_name(name, self.SERVER_TLS_POLICIES))
+
+        return self.ServerTlsPolicy(
+            name=name,
+            url=result['name'],
+            server_certificate=result.get('serverCertificate', {}),
+            mtls_policy=result.get('mtlsPolicy', {}),
+            create_time=result['createTime'],
+            update_time=result['updateTime'])
+
+    def delete_server_tls_policy(self, name):
+        return self._delete_resource(
+            collection=self._api_locations.serverTlsPolicies(),
+            full_name=self.resource_full_name(name, self.SERVER_TLS_POLICIES))
+
+    def create_client_tls_policy(self, name, body: dict):
+        return self._create_resource(
+            self._api_locations.clientTlsPolicies(),
+            body, clientTlsPolicyId=name)
+
+    def get_client_tls_policy(self, name: str) -> ClientTlsPolicy:
+        result = self._get_resource(
+            collection=self._api_locations.clientTlsPolicies(),
+            full_name=self.resource_full_name(name, self.CLIENT_TLS_POLICIES))
+
+        return self.ClientTlsPolicy(
+            name=name,
+            url=result['name'],
+            client_certificate=result.get('clientCertificate', {}),
+            server_validation_ca=result.get('serverValidationCa', []),
+            create_time=result['createTime'],
+            update_time=result['updateTime'])
+
+    def delete_client_tls_policy(self, name):
+        return self._delete_resource(
+            collection=self._api_locations.clientTlsPolicies(),
+            full_name=self.resource_full_name(name, self.CLIENT_TLS_POLICIES))
+
+    def _execute(self, *args, **kwargs):
+        # Workaround TD bug: throttled operations are reported as internal.
+        retryer = tenacity.Retrying(
+            retry=tenacity.retry_if_exception(self._operation_internal_error),
+            wait=tenacity.wait_fixed(10),
+            stop=tenacity.stop_after_delay(5 * 60),
+            before_sleep=tenacity.before_sleep_log(logger, logging.DEBUG),
+            reraise=True)
+        retryer(super()._execute, *args, **kwargs)
+
+    @staticmethod
+    def _operation_internal_error(exception):
+        return (isinstance(exception, gcp.api.OperationError) and
+                exception.error.code == code_pb2.INTERNAL)

+ 89 - 0
tools/run_tests/xds_test_driver/framework/infrastructure/gcp/network_services.py

@@ -0,0 +1,89 @@
+# Copyright 2020 gRPC authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from typing import Optional
+
+import dataclasses
+from google.rpc import code_pb2
+import tenacity
+
+from framework.infrastructure import gcp
+
+logger = logging.getLogger(__name__)
+
+
+class NetworkServicesV1Alpha1(gcp.api.GcpStandardCloudApiResource):
+    API_NAME = 'networkservices'
+    API_VERSION = 'v1alpha1'
+    DEFAULT_GLOBAL = 'global'
+    ENDPOINT_CONFIG_SELECTORS = 'endpointConfigSelectors'
+
+    @dataclasses.dataclass(frozen=True)
+    class EndpointConfigSelector:
+        url: str
+        name: str
+        type: str
+        server_tls_policy: Optional[str]
+        traffic_port_selector: dict
+        endpoint_matcher: dict
+        http_filters: dict
+        update_time: str
+        create_time: str
+
+    def __init__(self, api_manager: gcp.api.GcpApiManager, project: str):
+        super().__init__(api_manager.networkservices(self.API_VERSION), project)
+        # Shortcut
+        self._api_locations = self.api.projects().locations()
+
+    def create_endpoint_config_selector(self, name, body: dict):
+        return self._create_resource(
+            self._api_locations.endpointConfigSelectors(),
+            body, endpointConfigSelectorId=name)
+
+    def get_endpoint_config_selector(self, name: str) -> EndpointConfigSelector:
+        result = self._get_resource(
+            collection=self._api_locations.endpointConfigSelectors(),
+            full_name=self.resource_full_name(name,
+                                              self.ENDPOINT_CONFIG_SELECTORS))
+        return self.EndpointConfigSelector(
+            name=name,
+            url=result['name'],
+            type=result['type'],
+            server_tls_policy=result.get('serverTlsPolicy', None),
+            traffic_port_selector=result['trafficPortSelector'],
+            endpoint_matcher=result['endpointMatcher'],
+            http_filters=result['httpFilters'],
+            update_time=result['updateTime'],
+            create_time=result['createTime'])
+
+    def delete_endpoint_config_selector(self, name):
+        return self._delete_resource(
+            collection=self._api_locations.endpointConfigSelectors(),
+            full_name=self.resource_full_name(name,
+                                              self.ENDPOINT_CONFIG_SELECTORS))
+
+    def _execute(self, *args, **kwargs):
+        # Workaround TD bug: throttled operations are reported as internal.
+        retryer = tenacity.Retrying(
+            retry=tenacity.retry_if_exception(self._operation_internal_error),
+            wait=tenacity.wait_fixed(10),
+            stop=tenacity.stop_after_delay(5 * 60),
+            before_sleep=tenacity.before_sleep_log(logger, logging.DEBUG),
+            reraise=True)
+        retryer(super()._execute, *args, **kwargs)
+
+    @staticmethod
+    def _operation_internal_error(exception):
+        return (isinstance(exception, gcp.api.OperationError) and
+                exception.error.code == code_pb2.INTERNAL)

+ 324 - 0
tools/run_tests/xds_test_driver/framework/infrastructure/k8s.py

@@ -0,0 +1,324 @@
+# Copyright 2016 gRPC authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import functools
+import json
+import logging
+import subprocess
+import time
+from typing import Optional, List, Tuple
+
+import retrying
+import kubernetes.config
+from kubernetes import client
+from kubernetes import utils
+
+logger = logging.getLogger(__name__)
+# Type aliases
+V1Deployment = client.V1Deployment
+V1ServiceAccount = client.V1ServiceAccount
+V1Pod = client.V1Pod
+V1PodList = client.V1PodList
+V1Service = client.V1Service
+V1Namespace = client.V1Namespace
+ApiException = client.ApiException
+
+
+def simple_resource_get(func):
+    def wrap_not_found_return_none(*args, **kwargs):
+        try:
+            return func(*args, **kwargs)
+        except client.ApiException as e:
+            if e.status == 404:
+                # Ignore 404
+                return None
+            raise
+    return wrap_not_found_return_none
+
+
+def label_dict_to_selector(labels: dict) -> str:
+    return ','.join(f'{k}=={v}' for k, v in labels.items())
+
+
+class KubernetesApiManager:
+    def __init__(self, context):
+        self.context = context
+        self.client = self._cached_api_client_for_context(context)
+        self.apps = client.AppsV1Api(self.client)
+        self.core = client.CoreV1Api(self.client)
+
+    def close(self):
+        self.client.close()
+
+    @classmethod
+    @functools.lru_cache(None)
+    def _cached_api_client_for_context(cls, context: str) -> client.ApiClient:
+        return kubernetes.config.new_client_from_config(context=context)
+
+
+class PortForwardingError(Exception):
+    """Error forwarding port"""
+
+
+class KubernetesNamespace:
+    NEG_STATUS_META = 'cloud.google.com/neg-status'
+    PORT_FORWARD_LOCAL_ADDRESS: str = '127.0.0.1'
+    DELETE_GRACE_PERIOD_SEC: int = 5
+
+    def __init__(self, api: KubernetesApiManager, name: str):
+        self.name = name
+        self.api = api
+
+    def apply_manifest(self, manifest):
+        return utils.create_from_dict(self.api.client, manifest,
+                                      namespace=self.name)
+
+    @simple_resource_get
+    def get_service(self, name) -> V1Service:
+        return self.api.core.read_namespaced_service(name, self.name)
+
+    @simple_resource_get
+    def get_service_account(self, name) -> V1Service:
+        return self.api.core.read_namespaced_service_account(name, self.name)
+
+    def delete_service(
+        self,
+        name,
+        grace_period_seconds=DELETE_GRACE_PERIOD_SEC
+    ):
+        self.api.core.delete_namespaced_service(
+            name=name, namespace=self.name,
+            body=client.V1DeleteOptions(
+                propagation_policy='Foreground',
+                grace_period_seconds=grace_period_seconds))
+
+    def delete_service_account(
+        self,
+        name,
+        grace_period_seconds=DELETE_GRACE_PERIOD_SEC
+    ):
+        self.api.core.delete_namespaced_service_account(
+            name=name, namespace=self.name,
+            body=client.V1DeleteOptions(
+                propagation_policy='Foreground',
+                grace_period_seconds=grace_period_seconds))
+
+    @simple_resource_get
+    def get(self) -> V1Namespace:
+        return self.api.core.read_namespace(self.name)
+
+    def delete(self, grace_period_seconds=DELETE_GRACE_PERIOD_SEC):
+        self.api.core.delete_namespace(
+            name=self.name,
+            body=client.V1DeleteOptions(
+                propagation_policy='Foreground',
+                grace_period_seconds=grace_period_seconds))
+
+    def wait_for_service_deleted(self, name: str,
+                                 timeout_sec=60, wait_sec=1):
+        @retrying.retry(retry_on_result=lambda r: r is not None,
+                        stop_max_delay=timeout_sec * 1000,
+                        wait_fixed=wait_sec * 1000)
+        def _wait_for_deleted_service_with_retry():
+            service = self.get_service(name)
+            if service is not None:
+                logger.info('Waiting for service %s to be deleted',
+                            service.metadata.name)
+            return service
+        _wait_for_deleted_service_with_retry()
+
+    def wait_for_service_account_deleted(self, name: str,
+                                         timeout_sec=60, wait_sec=1):
+        @retrying.retry(retry_on_result=lambda r: r is not None,
+                        stop_max_delay=timeout_sec * 1000,
+                        wait_fixed=wait_sec * 1000)
+        def _wait_for_deleted_service_account_with_retry():
+            service_account = self.get_service_account(name)
+            if service_account is not None:
+                logger.info('Waiting for service account %s to be deleted',
+                            service_account.metadata.name)
+            return service_account
+        _wait_for_deleted_service_account_with_retry()
+
+    def wait_for_namespace_deleted(self,
+                                   timeout_sec=240, wait_sec=2):
+        @retrying.retry(retry_on_result=lambda r: r is not None,
+                        stop_max_delay=timeout_sec * 1000,
+                        wait_fixed=wait_sec * 1000)
+        def _wait_for_deleted_namespace_with_retry():
+            namespace = self.get()
+            if namespace is not None:
+                logger.info('Waiting for namespace %s to be deleted',
+                            namespace.metadata.name)
+            return namespace
+        _wait_for_deleted_namespace_with_retry()
+
+    def wait_for_service_neg(self, name: str,
+                             timeout_sec=60, wait_sec=1):
+        @retrying.retry(retry_on_result=lambda r: not r,
+                        stop_max_delay=timeout_sec * 1000,
+                        wait_fixed=wait_sec * 1000)
+        def _wait_for_service_neg():
+            service = self.get_service(name)
+            if self.NEG_STATUS_META not in service.metadata.annotations:
+                logger.info('Waiting for service %s NEG',
+                            service.metadata.name)
+                return False
+            return True
+        _wait_for_service_neg()
+
+    def get_service_neg(
+        self,
+        service_name: str,
+        service_port: int
+    ) -> Tuple[str, List[str]]:
+        service = self.get_service(service_name)
+        neg_info: dict = json.loads(
+            service.metadata.annotations[self.NEG_STATUS_META])
+        neg_name: str = neg_info['network_endpoint_groups'][str(service_port)]
+        neg_zones: List[str] = neg_info['zones']
+        return neg_name, neg_zones
+
+    @simple_resource_get
+    def get_deployment(self, name) -> V1Deployment:
+        return self.api.apps.read_namespaced_deployment(name, self.name)
+
+    def delete_deployment(
+        self,
+        name,
+        grace_period_seconds=DELETE_GRACE_PERIOD_SEC
+    ):
+        self.api.apps.delete_namespaced_deployment(
+            name=name, namespace=self.name,
+            body=client.V1DeleteOptions(
+                propagation_policy='Foreground',
+                grace_period_seconds=grace_period_seconds))
+
+    def list_deployment_pods(self, deployment: V1Deployment) -> List[V1Pod]:
+        # V1LabelSelector.match_expressions not supported at the moment
+        return self.list_pods_with_labels(deployment.spec.selector.match_labels)
+
+    def wait_for_deployment_available_replicas(self, name, count=1,
+                                               timeout_sec=60, wait_sec=1):
+        @retrying.retry(
+            retry_on_result=lambda r: not self._replicas_available(r, count),
+            stop_max_delay=timeout_sec * 1000,
+            wait_fixed=wait_sec * 1000)
+        def _wait_for_deployment_available_replicas():
+            deployment = self.get_deployment(name)
+            logger.info('Waiting for deployment %s to have %s available '
+                        'replicas, current count %s',
+                        deployment.metadata.name,
+                        count, deployment.status.available_replicas)
+            return deployment
+        _wait_for_deployment_available_replicas()
+
+    def wait_for_deployment_deleted(self, deployment_name: str,
+                                    timeout_sec=60, wait_sec=1):
+        @retrying.retry(retry_on_result=lambda r: r is not None,
+                        stop_max_delay=timeout_sec * 1000,
+                        wait_fixed=wait_sec * 1000)
+        def _wait_for_deleted_deployment_with_retry():
+            deployment = self.get_deployment(deployment_name)
+            if deployment is not None:
+                logger.info('Waiting for deployment %s to be deleted. '
+                            'Non-terminated replicas: %s',
+                            deployment.metadata.name,
+                            deployment.status.replicas)
+            return deployment
+        _wait_for_deleted_deployment_with_retry()
+
+    def list_pods_with_labels(self, labels: dict) -> List[V1Pod]:
+        pod_list: V1PodList = self.api.core.list_namespaced_pod(
+            self.name, label_selector=label_dict_to_selector(labels))
+        return pod_list.items
+
+    def get_pod(self, name) -> client.V1Pod:
+        return self.api.core.read_namespaced_pod(name, self.name)
+
+    def wait_for_pod_started(self, pod_name, timeout_sec=60, wait_sec=1):
+        @retrying.retry(retry_on_result=lambda r: not self._pod_started(r),
+                        stop_max_delay=timeout_sec * 1000,
+                        wait_fixed=wait_sec * 1000)
+        def _wait_for_pod_started():
+            pod = self.get_pod(pod_name)
+            logger.info('Waiting for pod %s to start, current phase: %s',
+                        pod.metadata.name,
+                        pod.status.phase)
+            return pod
+        _wait_for_pod_started()
+
+    def port_forward_pod(
+        self,
+        pod: V1Pod,
+        remote_port: int,
+        local_port: Optional[int] = None,
+        local_address: Optional[str] = None,
+    ) -> subprocess.Popen:
+        """Experimental"""
+        local_address = local_address or self.PORT_FORWARD_LOCAL_ADDRESS
+        local_port = local_port or remote_port
+        cmd = [
+            "kubectl", "--context", self.api.context,
+            "--namespace", self.name,
+            "port-forward", "--address", local_address,
+            f"pod/{pod.metadata.name}", f"{local_port}:{remote_port}"
+        ]
+        pf = subprocess.Popen(cmd, stdout=subprocess.PIPE,
+                              stderr=subprocess.STDOUT,
+                              universal_newlines=True)
+        # Wait for stdout line indicating successful start.
+        expected = (f"Forwarding from {local_address}:{local_port}"
+                    f" -> {remote_port}")
+        try:
+            while True:
+                time.sleep(0.05)
+                output = pf.stdout.readline().strip()
+                if not output:
+                    return_code = pf.poll()
+                    if return_code is not None:
+                        errors = [error for error in pf.stdout.readlines()]
+                        raise PortForwardingError(
+                            'Error forwarding port, kubectl return '
+                            f'code {return_code}, output {errors}')
+                elif output != expected:
+                    raise PortForwardingError(
+                        f'Error forwarding port, unexpected output {output}')
+                else:
+                    logger.info(output)
+                    break
+        except Exception:
+            self.port_forward_stop(pf)
+            raise
+
+        # todo(sergiitk): return new PortForwarder object
+        return pf
+
+    @staticmethod
+    def port_forward_stop(pf):
+        logger.info('Shutting down port forwarding, pid %s', pf.pid)
+        pf.kill()
+        stdout, _stderr = pf.communicate(timeout=5)
+        logger.info('Port forwarding stopped')
+        # todo(sergiitk): make debug
+        logger.info('Port forwarding remaining stdout: %s', stdout)
+
+    @staticmethod
+    def _pod_started(pod: V1Pod):
+        return pod.status.phase not in ('Pending', 'Unknown')
+
+    @staticmethod
+    def _replicas_available(deployment, count):
+        return (deployment is not None and
+                deployment.status.available_replicas is not None and
+                deployment.status.available_replicas >= count)

+ 462 - 0
tools/run_tests/xds_test_driver/framework/infrastructure/traffic_director.py

@@ -0,0 +1,462 @@
+# Copyright 2016 gRPC authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from typing import Optional, Set
+
+from framework.infrastructure import gcp
+
+logger = logging.getLogger(__name__)
+
+# Type aliases
+# Compute
+ComputeV1 = gcp.compute.ComputeV1
+HealthCheckProtocol = ComputeV1.HealthCheckProtocol
+BackendServiceProtocol = ComputeV1.BackendServiceProtocol
+GcpResource = ComputeV1.GcpResource
+ZonalGcpResource = ComputeV1.ZonalGcpResource
+
+# Network Security
+NetworkSecurityV1Alpha1 = gcp.network_security.NetworkSecurityV1Alpha1
+ServerTlsPolicy = NetworkSecurityV1Alpha1.ServerTlsPolicy
+ClientTlsPolicy = NetworkSecurityV1Alpha1.ClientTlsPolicy
+
+# Network Services
+NetworkServicesV1Alpha1 = gcp.network_services.NetworkServicesV1Alpha1
+EndpointConfigSelector = NetworkServicesV1Alpha1.EndpointConfigSelector
+
+
+class TrafficDirectorManager:
+    compute: ComputeV1
+    BACKEND_SERVICE_NAME = "backend-service"
+    HEALTH_CHECK_NAME = "health-check"
+    URL_MAP_NAME = "url-map"
+    URL_MAP_PATH_MATCHER_NAME = "path-matcher"
+    TARGET_PROXY_NAME = "target-proxy"
+    FORWARDING_RULE_NAME = "forwarding-rule"
+
+    def __init__(
+        self,
+        gcp_api_manager: gcp.api.GcpApiManager,
+        project: str,
+        *,
+        resource_prefix: str,
+        network: str = 'default',
+    ):
+        # API
+        self.compute = ComputeV1(gcp_api_manager, project)
+
+        # Settings
+        self.project: str = project
+        self.network: str = network
+        self.resource_prefix: str = resource_prefix
+
+        # Managed resources
+        self.health_check: Optional[GcpResource] = None
+        self.backend_service: Optional[GcpResource] = None
+        self.url_map: Optional[GcpResource] = None
+        self.target_proxy: Optional[GcpResource] = None
+        # todo(sergiitk): fix
+        self.target_proxy_is_http: bool = False
+        self.forwarding_rule: Optional[GcpResource] = None
+        self.backends: Set[ZonalGcpResource] = set()
+
+    @property
+    def network_url(self):
+        return f'global/networks/{self.network}'
+
+    def setup_for_grpc(
+        self,
+        service_host,
+        service_port,
+        *,
+        backend_protocol=BackendServiceProtocol.GRPC
+    ):
+        self.create_health_check()
+        self.create_backend_service(protocol=backend_protocol)
+        self.create_url_map(service_host, service_port)
+        if backend_protocol is BackendServiceProtocol.GRPC:
+            self.create_target_grpc_proxy()
+        else:
+            self.create_target_http_proxy()
+        self.create_forwarding_rule(service_port)
+
+    def cleanup(self, *, force=False):
+        # Cleanup in the reverse order of creation
+        self.delete_forwarding_rule(force=force)
+        if self.target_proxy_is_http:
+            self.delete_target_http_proxy(force=force)
+        else:
+            self.delete_target_grpc_proxy(force=force)
+        self.delete_url_map(force=force)
+        self.delete_backend_service(force=force)
+        self.delete_health_check(force=force)
+
+    def _ns_name(self, name):
+        return f'{self.resource_prefix}-{name}'
+
+    def create_health_check(self, protocol=HealthCheckProtocol.TCP):
+        if self.health_check:
+            raise ValueError('Health check %s already created, delete it first',
+                             self.health_check.name)
+        name = self._ns_name(self.HEALTH_CHECK_NAME)
+        logger.info('Creating %s Health Check %s', protocol.name, name)
+        if protocol is HealthCheckProtocol.TCP:
+            resource = self.compute.create_health_check_tcp(
+                name, use_serving_port=True)
+        else:
+            raise ValueError('Unexpected protocol')
+        self.health_check = resource
+
+    def delete_health_check(self, force=False):
+        if force:
+            name = self._ns_name(self.HEALTH_CHECK_NAME)
+        elif self.health_check:
+            name = self.health_check.name
+        else:
+            return
+        logger.info('Deleting Health Check %s', name)
+        self.compute.delete_health_check(name)
+        self.health_check = None
+
+    def create_backend_service(
+        self,
+        protocol: BackendServiceProtocol = BackendServiceProtocol.GRPC
+    ):
+        name = self._ns_name(self.BACKEND_SERVICE_NAME)
+        logger.info('Creating %s Backend Service %s', protocol.name, name)
+        resource = self.compute.create_backend_service_traffic_director(
+            name, health_check=self.health_check, protocol=protocol)
+        self.backend_service = resource
+
+    def load_backend_service(self):
+        name = self._ns_name(self.BACKEND_SERVICE_NAME)
+        resource = self.compute.get_backend_service_traffic_director(name)
+        self.backend_service = resource
+
+    def delete_backend_service(self, force=False):
+        if force:
+            name = self._ns_name(self.BACKEND_SERVICE_NAME)
+        elif self.backend_service:
+            name = self.backend_service.name
+        else:
+            return
+        logger.info('Deleting Backend Service %s', name)
+        self.compute.delete_backend_service(name)
+        self.backend_service = None
+
+    def backend_service_add_neg_backends(self, name, zones):
+        logger.info('Loading NEGs')
+        for zone in zones:
+            backend = self.compute.wait_for_network_endpoint_group(name, zone)
+            logger.info('Loaded NEG %s in zone %s', backend.name, backend.zone)
+            self.backends.add(backend)
+
+        self.backend_service_add_backends()
+        self.wait_for_backends_healthy_status()
+
+    def backend_service_add_backends(self):
+        logging.info('Adding backends to Backend Service %s: %r',
+                     self.backend_service.name, self.backends)
+        self.compute.backend_service_add_backends(
+            self.backend_service, self.backends)
+
+    def backend_service_remove_all_backends(self):
+        logging.info('Removing backends from Backend Service %s',
+                     self.backend_service.name)
+        self.compute.backend_service_remove_all_backends(self.backend_service)
+
+    def wait_for_backends_healthy_status(self):
+        logger.debug(
+            "Waiting for Backend Service %s to report all backends healthy %r",
+            self.backend_service, self.backends)
+        self.compute.wait_for_backends_healthy_status(
+            self.backend_service, self.backends)
+
+    def create_url_map(
+        self,
+        src_host: str,
+        src_port: int,
+    ) -> GcpResource:
+        src_address = f'{src_host}:{src_port}'
+        name = self._ns_name(self.URL_MAP_NAME)
+        matcher_name = self._ns_name(self.URL_MAP_PATH_MATCHER_NAME)
+        logger.info('Creating URL map %s %s -> %s',
+                    name, src_address, self.backend_service.name)
+        resource = self.compute.create_url_map(
+            name, matcher_name, [src_address], self.backend_service)
+        self.url_map = resource
+        return resource
+
+    def delete_url_map(self, force=False):
+        if force:
+            name = self._ns_name(self.URL_MAP_NAME)
+        elif self.url_map:
+            name = self.url_map.name
+        else:
+            return
+        logger.info('Deleting URL Map %s', name)
+        self.compute.delete_url_map(name)
+        self.url_map = None
+
+    def create_target_grpc_proxy(self):
+        # todo: different kinds
+        name = self._ns_name(self.TARGET_PROXY_NAME)
+        logger.info('Creating target GRPC proxy %s to url map %s',
+                    name, self.url_map.name)
+        resource = self.compute.create_target_grpc_proxy(
+            name, self.url_map)
+        self.target_proxy = resource
+
+    def delete_target_grpc_proxy(self, force=False):
+        if force:
+            name = self._ns_name(self.TARGET_PROXY_NAME)
+        elif self.target_proxy:
+            name = self.target_proxy.name
+        else:
+            return
+        logger.info('Deleting Target GRPC proxy %s', name)
+        self.compute.delete_target_grpc_proxy(name)
+        self.target_proxy = None
+        self.target_proxy_is_http = False
+
+    def create_target_http_proxy(self):
+        # todo: different kinds
+        name = self._ns_name(self.TARGET_PROXY_NAME)
+        logger.info('Creating target HTTP proxy %s to url map %s',
+                    name, self.url_map.name)
+        resource = self.compute.create_target_http_proxy(
+            name, self.url_map)
+        self.target_proxy = resource
+        self.target_proxy_is_http = True
+
+    def delete_target_http_proxy(self, force=False):
+        if force:
+            name = self._ns_name(self.TARGET_PROXY_NAME)
+        elif self.target_proxy:
+            name = self.target_proxy.name
+        else:
+            return
+        logger.info('Deleting HTTP Target proxy %s', name)
+        self.compute.delete_target_http_proxy(name)
+        self.target_proxy = None
+        self.target_proxy_is_http = False
+
+    def create_forwarding_rule(self, src_port: int):
+        name = self._ns_name(self.FORWARDING_RULE_NAME)
+        src_port = int(src_port)
+        logging.info('Creating forwarding rule %s 0.0.0.0:%s -> %s in %s',
+                     name, src_port, self.target_proxy.url, self.network)
+        resource = self.compute.create_forwarding_rule(
+            name, src_port, self.target_proxy, self.network_url)
+        self.forwarding_rule = resource
+        return resource
+
+    def delete_forwarding_rule(self, force=False):
+        if force:
+            name = self._ns_name(self.FORWARDING_RULE_NAME)
+        elif self.forwarding_rule:
+            name = self.forwarding_rule.name
+        else:
+            return
+        logger.info('Deleting Forwarding rule %s', name)
+        self.compute.delete_forwarding_rule(name)
+        self.forwarding_rule = None
+
+
+class TrafficDirectorSecureManager(TrafficDirectorManager):
+    netsec: Optional[NetworkSecurityV1Alpha1]
+    SERVER_TLS_POLICY_NAME = "server-tls-policy"
+    CLIENT_TLS_POLICY_NAME = "client-tls-policy"
+    ENDPOINT_CONFIG_SELECTOR_NAME = "endpoint-config-selector"
+    GRPC_ENDPOINT_TARGET_URI = "unix:/var/cert/node-agent.0"
+
+    def __init__(
+        self,
+        gcp_api_manager: gcp.api.GcpApiManager,
+        project: str,
+        *,
+        resource_prefix: str,
+        network: str = 'default',
+    ):
+        super().__init__(gcp_api_manager, project,
+                         resource_prefix=resource_prefix, network=network)
+
+        # API
+        self.netsec = NetworkSecurityV1Alpha1(gcp_api_manager, project)
+        self.netsvc = NetworkServicesV1Alpha1(gcp_api_manager, project)
+
+        # Managed resources
+        self.server_tls_policy: Optional[ServerTlsPolicy] = None
+        self.ecs: Optional[EndpointConfigSelector] = None
+        self.client_tls_policy: Optional[ClientTlsPolicy] = None
+
+    def setup_for_grpc(
+        self,
+        service_host,
+        service_port,
+        *,
+        backend_protocol=BackendServiceProtocol.HTTP2
+    ):
+        super().setup_for_grpc(service_host, service_port,
+                               backend_protocol=backend_protocol)
+
+    def setup_server_security(self, server_port, *, tls, mtls):
+        self.create_server_tls_policy(tls=tls, mtls=mtls)
+        self.create_endpoint_config_selector(server_port)
+
+    def setup_client_security(self, server_namespace, server_name,
+                              *, tls=True, mtls=True):
+        self.create_client_tls_policy(tls=tls, mtls=mtls)
+        self.backend_service_apply_client_mtls_policy(
+            server_namespace, server_name)
+
+    def cleanup(self, *, force=False):
+        # Cleanup in the reverse order of creation
+        # todo(sergiitk): todo: fix
+        self.target_proxy_is_http = True
+        super().cleanup(force=force)
+        self.delete_endpoint_config_selector(force=force)
+        self.delete_server_tls_policy(force=force)
+        self.delete_client_tls_policy(force=force)
+
+    def create_server_tls_policy(self, *, tls, mtls):
+        name = self._ns_name(self.SERVER_TLS_POLICY_NAME)
+        logger.info('Creating Server TLS Policy %s', name)
+        if not tls and not mtls:
+            logger.warning('Server TLS Policy %s neither TLS, nor mTLS '
+                           'policy. Skipping creation', name)
+            return
+
+        grpc_endpoint = {
+            "grpcEndpoint": {"targetUri": self.GRPC_ENDPOINT_TARGET_URI}}
+
+        policy = {}
+        if tls:
+            policy["serverCertificate"] = grpc_endpoint
+        if mtls:
+            policy["mtlsPolicy"] = {"clientValidationCa": [grpc_endpoint]}
+
+        self.netsec.create_server_tls_policy(name, policy)
+        self.server_tls_policy = self.netsec.get_server_tls_policy(name)
+        logger.debug('Server TLS Policy loaded: %r', self.server_tls_policy)
+
+    def delete_server_tls_policy(self, force=False):
+        if force:
+            name = self._ns_name(self.SERVER_TLS_POLICY_NAME)
+        elif self.server_tls_policy:
+            name = self.server_tls_policy.name
+        else:
+            return
+        logger.info('Deleting Server TLS Policy %s', name)
+        self.netsec.delete_server_tls_policy(name)
+        self.server_tls_policy = None
+
+    def create_endpoint_config_selector(self, server_port):
+        name = self._ns_name(self.ENDPOINT_CONFIG_SELECTOR_NAME)
+        logger.info('Creating Endpoint Config Selector %s', name)
+
+        # todo(sergiitk): user server config value
+        endpoint_matcher_labels = [{
+            "labelName": "version",
+            "labelValue": "production"
+        }]
+        port_selector = {"ports": [str(server_port)]}
+
+        label_matcher_all = {
+            "metadataLabelMatchCriteria": "MATCH_ALL",
+            "metadataLabels": endpoint_matcher_labels
+        }
+        config = {
+            "type": "SIDECAR_PROXY",
+            "httpFilters": {},
+            "trafficPortSelector": port_selector,
+            "endpointMatcher": {"metadataLabelMatcher": label_matcher_all},
+        }
+        if self.server_tls_policy:
+            config["serverTlsPolicy"] = self.server_tls_policy.name
+        else:
+            logger.warning('Creating Endpoint Config Selector %s with '
+                           'no Server TLS policy attached', name)
+
+        self.netsvc.create_endpoint_config_selector(name, config)
+        self.ecs = self.netsvc.get_endpoint_config_selector(name)
+        logger.debug('Loaded Endpoint Config Selector: %r', self.ecs)
+
+    def delete_endpoint_config_selector(self, force=False):
+        if force:
+            name = self._ns_name(self.ENDPOINT_CONFIG_SELECTOR_NAME)
+        elif self.ecs:
+            name = self.ecs.name
+        else:
+            return
+        logger.info('Deleting Endpoint Config Selector %s', name)
+        self.netsvc.delete_endpoint_config_selector(name)
+        self.ecs = None
+
+    def create_client_tls_policy(self, *, tls, mtls):
+        name = self._ns_name(self.CLIENT_TLS_POLICY_NAME)
+        logger.info('Creating Client TLS Policy %s', name)
+        if not tls and not mtls:
+            logger.warning('Client TLS Policy %s neither TLS, nor mTLS '
+                           'policy. Skipping creation', name)
+            return
+
+        grpc_endpoint = {
+            "grpcEndpoint": {"targetUri": self.GRPC_ENDPOINT_TARGET_URI}}
+
+        policy = {}
+        if tls:
+            policy["serverValidationCa"] = [grpc_endpoint]
+        if mtls:
+            policy["clientCertificate"] = grpc_endpoint
+
+        self.netsec.create_client_tls_policy(name, policy)
+        self.client_tls_policy = self.netsec.get_client_tls_policy(name)
+        logger.debug('Client TLS Policy loaded: %r', self.client_tls_policy)
+
+    def delete_client_tls_policy(self, force=False):
+        if force:
+            name = self._ns_name(self.CLIENT_TLS_POLICY_NAME)
+        elif self.client_tls_policy:
+            name = self.client_tls_policy.name
+        else:
+            return
+        logger.info('Deleting Client TLS Policy %s', name)
+        self.netsec.delete_client_tls_policy(name)
+        self.client_tls_policy = None
+
+    def backend_service_apply_client_mtls_policy(
+        self,
+        server_namespace,
+        server_name,
+    ):
+        if not self.client_tls_policy:
+            logger.warning('Client TLS policy not created, '
+                           'skipping attaching to Backend Service %s',
+                           self.backend_service.name)
+            return
+
+        server_spiffe = (f'spiffe://{self.project}.svc.id.goog/'
+                         f'ns/{server_namespace}/sa/{server_name}')
+        logging.info('Adding Client TLS Policy to Backend Service %s: %s, '
+                     'server %s',
+                     self.backend_service.name,
+                     self.client_tls_policy.url,
+                     server_spiffe)
+
+        self.compute.patch_backend_service(self.backend_service, {
+            'securitySettings': {
+                'clientTlsPolicy': self.client_tls_policy.url,
+                'subjectAltNames': [server_spiffe]
+            }})

+ 95 - 0
tools/run_tests/xds_test_driver/framework/rpc/__init__.py

@@ -0,0 +1,95 @@
+# Copyright 2020 gRPC authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import re
+from typing import Optional, ClassVar, Dict
+
+import grpc
+from google.protobuf import json_format
+import google.protobuf.message
+
+logger = logging.getLogger(__name__)
+
+# Type aliases
+Message = google.protobuf.message.Message
+
+
+class GrpcClientHelper:
+    channel: grpc.Channel
+    DEFAULT_CONNECTION_TIMEOUT_SEC = 60
+    DEFAULT_WAIT_FOR_READY_SEC = 60
+
+    def __init__(self, channel: grpc.Channel, stub_class: ClassVar):
+        self.channel = channel
+        self.stub = stub_class(channel)
+        # For better logging
+        self.service_name = re.sub('Stub$', '', self.stub.__class__.__name__)
+
+    def call_unary_when_channel_ready(
+        self, *,
+        rpc: str,
+        req: Message,
+        wait_for_ready_sec: Optional[int] = DEFAULT_WAIT_FOR_READY_SEC,
+        connection_timeout_sec: Optional[int] = DEFAULT_CONNECTION_TIMEOUT_SEC
+    ) -> Message:
+        if wait_for_ready_sec is None:
+            wait_for_ready_sec = self.DEFAULT_WAIT_FOR_READY_SEC
+        if connection_timeout_sec is None:
+            connection_timeout_sec = self.DEFAULT_CONNECTION_TIMEOUT_SEC
+
+        timeout_sec = wait_for_ready_sec + connection_timeout_sec
+        rpc_callable: grpc.UnaryUnaryMultiCallable = getattr(self.stub, rpc)
+
+        call_kwargs = dict(wait_for_ready=True, timeout=timeout_sec)
+        self._log_debug(rpc, req, call_kwargs)
+        return rpc_callable(req, **call_kwargs)
+
+    def _log_debug(self, rpc, req, call_kwargs):
+        logger.debug('RPC %s.%s(request=%s(%r), %s)',
+                     self.service_name, rpc,
+                     req.__class__.__name__, json_format.MessageToDict(req),
+                     ', '.join({f'{k}={v}' for k, v in call_kwargs.items()}))
+
+
+class GrpcApp:
+    channels: Dict[int, grpc.Channel]
+
+    class NotFound(Exception):
+        """Requested resource not found"""
+
+    def __init__(self, rpc_host):
+        self.rpc_host = rpc_host
+        # Cache gRPC channels per port
+        self.channels = dict()
+
+    def _make_channel(self, port) -> grpc.Channel:
+        if port not in self.channels:
+            target = f'{self.rpc_host}:{port}'
+            self.channels[port] = grpc.insecure_channel(target)
+        return self.channels[port]
+
+    def close(self):
+        # Close all channels
+        for channel in self.channels.values():
+            channel.close()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
+        return False
+
+    def __del__(self):
+        self.close()

+ 175 - 0
tools/run_tests/xds_test_driver/framework/rpc/grpc_channelz.py

@@ -0,0 +1,175 @@
+#  Copyright 2020 gRPC authors.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import ipaddress
+import logging
+from typing import Optional, Iterator
+
+import grpc
+from grpc_channelz.v1 import channelz_pb2
+from grpc_channelz.v1 import channelz_pb2_grpc
+
+import framework.rpc
+
+logger = logging.getLogger(__name__)
+
+# Type aliases
+# Channel
+Channel = channelz_pb2.Channel
+ChannelConnectivityState = channelz_pb2.ChannelConnectivityState
+GetTopChannelsRequest = channelz_pb2.GetTopChannelsRequest
+GetTopChannelsResponse = channelz_pb2.GetTopChannelsResponse
+# Subchannel
+Subchannel = channelz_pb2.Subchannel
+GetSubchannelRequest = channelz_pb2.GetSubchannelRequest
+GetSubchannelResponse = channelz_pb2.GetSubchannelResponse
+# Server
+Server = channelz_pb2.Server
+GetServersRequest = channelz_pb2.GetServersRequest
+GetServersResponse = channelz_pb2.GetServersResponse
+# Sockets
+Socket = channelz_pb2.Socket
+SocketRef = channelz_pb2.SocketRef
+GetSocketRequest = channelz_pb2.GetSocketRequest
+GetSocketResponse = channelz_pb2.GetSocketResponse
+Address = channelz_pb2.Address
+Security = channelz_pb2.Security
+# Server Sockets
+GetServerSocketsRequest = channelz_pb2.GetServerSocketsRequest
+GetServerSocketsResponse = channelz_pb2.GetServerSocketsResponse
+
+
+class ChannelzServiceClient(framework.rpc.GrpcClientHelper):
+    stub: channelz_pb2_grpc.ChannelzStub
+
+    def __init__(self, channel: grpc.Channel):
+        super().__init__(channel, channelz_pb2_grpc.ChannelzStub)
+
+    @staticmethod
+    def is_sock_tcpip_address(address: Address):
+        return address.WhichOneof('address') == 'tcpip_address'
+
+    @staticmethod
+    def is_ipv4(tcpip_address: Address.TcpIpAddress):
+        # According to proto, tcpip_address.ip_address is either IPv4 or IPv6.
+        # Correspondingly, it's either 4 bytes or 16 bytes in length.
+        return len(tcpip_address.ip_address) == 4
+
+    @classmethod
+    def sock_address_to_str(cls, address: Address):
+        if cls.is_sock_tcpip_address(address):
+            tcpip_address: Address.TcpIpAddress = address.tcpip_address
+            if cls.is_ipv4(tcpip_address):
+                ip = ipaddress.IPv4Address(tcpip_address.ip_address)
+            else:
+                ip = ipaddress.IPv6Address(tcpip_address.ip_address)
+            return f'{ip}:{tcpip_address.port}'
+        else:
+            raise NotImplementedError('Only tcpip_address implemented')
+
+    @classmethod
+    def sock_addresses_pretty(cls, socket: Socket):
+        return (f'local={cls.sock_address_to_str(socket.local)}, '
+                f'remote={cls.sock_address_to_str(socket.remote)}')
+
+    @staticmethod
+    def find_server_socket_matching_client(
+        server_sockets: Iterator[Socket],
+        client_socket: Socket
+    ) -> Socket:
+        for server_socket in server_sockets:
+            if server_socket.remote == client_socket.local:
+                return server_socket
+        return None
+
+    def find_channels_for_target(self, target: str) -> Iterator[Channel]:
+        return (channel for channel in self.list_channels()
+                if channel.data.target == target)
+
+    def find_server_listening_on_port(self, port: int) -> Optional[Server]:
+        for server in self.list_servers():
+            listen_socket_ref: SocketRef
+            for listen_socket_ref in server.listen_socket:
+                listen_socket = self.get_socket(listen_socket_ref.socket_id)
+                listen_address: Address = listen_socket.local
+                if (self.is_sock_tcpip_address(listen_address) and
+                    listen_address.tcpip_address.port == port):
+                    return server
+        return None
+
+    def list_channels(self) -> Iterator[Channel]:
+        """
+        Iterate over all pages of all root channels.
+
+        Root channels are those which application has directly created.
+        This does not include subchannels nor non-top level channels.
+        """
+        start: int = -1
+        response: Optional[GetTopChannelsResponse] = None
+        while start < 0 or not response.end:
+            # From proto: To request subsequent pages, the client generates this
+            # value by adding 1 to the highest seen result ID.
+            start += 1
+            response = self.call_unary_when_channel_ready(
+                rpc='GetTopChannels',
+                req=GetTopChannelsRequest(start_channel_id=start))
+            for channel in response.channel:
+                start = max(start, channel.ref.channel_id)
+                yield channel
+
+    def list_servers(self) -> Iterator[Server]:
+        """Iterate over all pages of all servers that exist in the process."""
+        start: int = -1
+        response: Optional[GetServersResponse] = None
+        while start < 0 or not response.end:
+            # From proto: To request subsequent pages, the client generates this
+            # value by adding 1 to the highest seen result ID.
+            start += 1
+            response = self.call_unary_when_channel_ready(
+                rpc='GetServers',
+                req=GetServersRequest(start_server_id=start))
+            for server in response.server:
+                start = max(start, server.ref.server_id)
+                yield server
+
+    def list_server_sockets(self, server_id) -> Iterator[Socket]:
+        """Iterate over all server sockets that exist in server process."""
+        start: int = -1
+        response: Optional[GetServerSocketsResponse] = None
+        while start < 0 or not response.end:
+            # From proto: To request subsequent pages, the client generates this
+            # value by adding 1 to the highest seen result ID.
+            start += 1
+            response = self.call_unary_when_channel_ready(
+                rpc='GetServerSockets',
+                req=GetServerSocketsRequest(server_id=server_id,
+                                            start_socket_id=start))
+            socket_ref: SocketRef
+            for socket_ref in response.socket_ref:
+                start = max(start, socket_ref.socket_id)
+                # Yield actual socket
+                yield self.get_socket(socket_ref.socket_id)
+
+    def get_subchannel(self, subchannel_id) -> Subchannel:
+        """Return a single Subchannel, otherwise raises RpcError."""
+        response: GetSubchannelResponse = self.call_unary_when_channel_ready(
+            rpc='GetSubchannel',
+            req=GetSubchannelRequest(subchannel_id=subchannel_id))
+        return response.subchannel
+
+    def get_socket(self, socket_id) -> Socket:
+        """Return a single Socket, otherwise raises RpcError."""
+        response: GetSocketResponse = self.call_unary_when_channel_ready(
+            rpc='GetSocket',
+            req=GetSocketRequest(socket_id=socket_id))
+        return response.socket

+ 47 - 0
tools/run_tests/xds_test_driver/framework/rpc/grpc_testing.py

@@ -0,0 +1,47 @@
+#  Copyright 2020 gRPC authors.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+from typing import Optional
+
+import grpc
+
+import framework.rpc
+from src.proto.grpc.testing import test_pb2_grpc
+from src.proto.grpc.testing import messages_pb2
+
+
+# Type aliases
+LoadBalancerStatsRequest = messages_pb2.LoadBalancerStatsRequest
+LoadBalancerStatsResponse = messages_pb2.LoadBalancerStatsResponse
+
+
+class LoadBalancerStatsServiceClient(framework.rpc.GrpcClientHelper):
+    stub: test_pb2_grpc.LoadBalancerStatsServiceStub
+    STATS_PARTIAL_RESULTS_TIMEOUT_SEC = 1200
+
+    def __init__(self, channel: grpc.Channel):
+        super().__init__(channel, test_pb2_grpc.LoadBalancerStatsServiceStub)
+
+    def get_client_stats(
+        self, *,
+        num_rpcs: int,
+        timeout_sec: Optional[int] = STATS_PARTIAL_RESULTS_TIMEOUT_SEC,
+    ) -> LoadBalancerStatsResponse:
+        if timeout_sec is None:
+            timeout_sec = self.STATS_PARTIAL_RESULTS_TIMEOUT_SEC
+
+        return self.call_unary_when_channel_ready(
+            rpc='GetClientStats',
+            wait_for_ready_sec=timeout_sec,
+            req=LoadBalancerStatsRequest(num_rpcs=num_rpcs,
+                                         timeout_sec=timeout_sec))

+ 13 - 0
tools/run_tests/xds_test_driver/framework/test_app/__init__.py

@@ -0,0 +1,13 @@
+# Copyright 2020 gRPC authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

+ 255 - 0
tools/run_tests/xds_test_driver/framework/test_app/base_runner.py

@@ -0,0 +1,255 @@
+# Copyright 2016 gRPC authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import contextlib
+import logging
+import pathlib
+from typing import Optional
+
+import mako.template
+import yaml
+
+from framework.infrastructure import k8s
+
+logger = logging.getLogger(__name__)
+
+
+class RunnerError(Exception):
+    """Error running app"""
+
+
+TEMPLATE_DIR = '../../kubernetes-manifests'
+
+
+class KubernetesBaseRunner:
+    def __init__(self,
+                 k8s_namespace,
+                 namespace_template=None,
+                 reuse_namespace=False):
+        # Kubernetes namespaced resources manager
+        self.k8s_namespace: k8s.KubernetesNamespace = k8s_namespace
+        self.reuse_namespace = reuse_namespace
+        self.namespace_template = namespace_template or 'namespace.yaml'
+
+        # Mutable state
+        self.namespace: Optional[k8s.V1Namespace] = None
+
+    def run(self, **kwargs):
+        if self.reuse_namespace:
+            self.namespace = self._reuse_namespace()
+        if not self.namespace:
+            self.namespace = self._create_namespace(
+                self.namespace_template,
+                namespace_name=self.k8s_namespace.name)
+
+    def cleanup(self, *, force=False):
+        if (self.namespace and not self.reuse_namespace) or force:
+            self._delete_namespace()
+            self.namespace = None
+
+    @staticmethod
+    def _render_template(template_file, **kwargs):
+        template = mako.template.Template(filename=str(template_file))
+        return template.render(**kwargs)
+
+    @staticmethod
+    def _manifests_from_yaml_file(yaml_file):
+        # Parse yaml
+        with open(yaml_file) as f:
+            with contextlib.closing(yaml.safe_load_all(f)) as yml:
+                for manifest in yml:
+                    yield manifest
+
+    @staticmethod
+    def _manifests_from_str(document):
+        with contextlib.closing(yaml.safe_load_all(document)) as yml:
+            for manifest in yml:
+                yield manifest
+
+    @staticmethod
+    def _template_file_from_name(template_name):
+        templates_path = pathlib.Path(__file__).parent / TEMPLATE_DIR
+        return templates_path.joinpath(template_name).absolute()
+
+    def _create_from_template(self, template_name, **kwargs):
+        template_file = self._template_file_from_name(template_name)
+        logger.info("Loading template: %s", template_file)
+
+        yaml_doc = self._render_template(template_file, **kwargs)
+        logger.info("Rendered template:\n%s\n", yaml_doc)
+
+        manifests = self._manifests_from_str(yaml_doc)
+        manifest = next(manifests)
+        # Error out on multi-document yaml
+        if next(manifests, False):
+            raise RunnerError('Exactly one document expected in manifest '
+                              f'{template_file}')
+        # Apply the manifest
+        k8s_objects = self.k8s_namespace.apply_manifest(manifest)
+
+        # Check correctness
+        if len(k8s_objects) != 1:
+            raise RunnerError('Expected exactly one object must created from '
+                              f'manifest {template_file}')
+
+        logger.info('%s %s created', k8s_objects[0].kind,
+                    k8s_objects[0].metadata.name)
+
+        return k8s_objects[0]
+
+    def _reuse_deployment(self, deployment_name) -> k8s.V1Deployment:
+        deployment = self.k8s_namespace.get_deployment(deployment_name)
+        # todo(sergiitk): check if good or must be recreated
+        return deployment
+
+    def _reuse_service(self, service_name) -> k8s.V1Service:
+        service = self.k8s_namespace.get_service(service_name)
+        # todo(sergiitk): check if good or must be recreated
+        return service
+
+    def _reuse_namespace(self) -> k8s.V1Namespace:
+        return self.k8s_namespace.get()
+
+    def _create_namespace(self, template, **kwargs) -> k8s.V1Namespace:
+        namespace = self._create_from_template(template, **kwargs)
+        if not isinstance(namespace, k8s.V1Namespace):
+            raise RunnerError('Expected V1Namespace to be created '
+                              f'from manifest {template}')
+        if namespace.metadata.name != kwargs['namespace_name']:
+            raise RunnerError(
+                'Namespace created with unexpected name: '
+                f'{namespace.metadata.name}')
+        logger.info('Deployment %s created at %s',
+                    namespace.metadata.self_link,
+                    namespace.metadata.creation_timestamp)
+        return namespace
+
+    def _create_service_account(
+        self,
+        template,
+        **kwargs
+    ) -> k8s.V1ServiceAccount:
+        resource = self._create_from_template(template, **kwargs)
+        if not isinstance(resource, k8s.V1ServiceAccount):
+            raise RunnerError('Expected V1ServiceAccount to be created '
+                              f'from manifest {template}')
+        if resource.metadata.name != kwargs['service_account_name']:
+            raise RunnerError(
+                'V1ServiceAccount created with unexpected name: '
+                f'{resource.metadata.name}')
+        logger.info('V1ServiceAccount %s created at %s',
+                    resource.metadata.self_link,
+                    resource.metadata.creation_timestamp)
+        return resource
+
+    def _create_deployment(self, template, **kwargs) -> k8s.V1Deployment:
+        deployment = self._create_from_template(template, **kwargs)
+        if not isinstance(deployment, k8s.V1Deployment):
+            raise RunnerError('Expected V1Deployment to be created '
+                              f'from manifest {template}')
+        if deployment.metadata.name != kwargs['deployment_name']:
+            raise RunnerError(
+                'Deployment created with unexpected name: '
+                f'{deployment.metadata.name}')
+        logger.info('Deployment %s created at %s',
+                    deployment.metadata.self_link,
+                    deployment.metadata.creation_timestamp)
+        return deployment
+
+    def _create_service(self, template, **kwargs) -> k8s.V1Service:
+        service = self._create_from_template(template, **kwargs)
+        if not isinstance(service, k8s.V1Service):
+            raise RunnerError('Expected V1Service to be created '
+                              f'from manifest {template}')
+        if service.metadata.name != kwargs['service_name']:
+            raise RunnerError(
+                'Service created with unexpected name: '
+                f'{service.metadata.name}')
+        logger.info('Service %s created at %s',
+                    service.metadata.self_link,
+                    service.metadata.creation_timestamp)
+        return service
+
+    def _delete_deployment(self, name, wait_for_deletion=True):
+        try:
+            self.k8s_namespace.delete_deployment(name)
+        except k8s.ApiException as e:
+            logger.info('Deployment %s deletion failed, error: %s %s',
+                        name, e.status, e.reason)
+            return
+
+        if wait_for_deletion:
+            self.k8s_namespace.wait_for_deployment_deleted(name)
+        logger.info('Deployment %s deleted', name)
+
+    def _delete_service(self, name, wait_for_deletion=True):
+        try:
+            self.k8s_namespace.delete_service(name)
+        except k8s.ApiException as e:
+            logger.info('Service %s deletion failed, error: %s %s',
+                        name, e.status, e.reason)
+            return
+
+        if wait_for_deletion:
+            self.k8s_namespace.wait_for_service_deleted(name)
+        logger.info('Service %s deleted', name)
+
+    def _delete_service_account(self, name, wait_for_deletion=True):
+        try:
+            self.k8s_namespace.delete_service_account(name)
+        except k8s.ApiException as e:
+            logger.info('Service account %s deletion failed, error: %s %s',
+                        name, e.status, e.reason)
+            return
+
+        if wait_for_deletion:
+            self.k8s_namespace.wait_for_service_account_deleted(name)
+        logger.info('Service account %s deleted', name)
+
+    def _delete_namespace(self, wait_for_deletion=True):
+        try:
+            self.k8s_namespace.delete()
+        except k8s.ApiException as e:
+            logger.info('Namespace %s deletion failed, error: %s %s',
+                        self.k8s_namespace.name, e.status, e.reason)
+            return
+
+        if wait_for_deletion:
+            self.k8s_namespace.wait_for_namespace_deleted()
+        logger.info('Namespace %s deleted', self.k8s_namespace.name)
+
+    def _wait_deployment_with_available_replicas(self, name, count=1, **kwargs):
+        logger.info('Waiting for deployment %s to have %s available replicas',
+                    name, count)
+        self.k8s_namespace.wait_for_deployment_available_replicas(name, count,
+                                                                  **kwargs)
+        deployment = self.k8s_namespace.get_deployment(name)
+        logger.info('Deployment %s has %i replicas available',
+                    deployment.metadata.name,
+                    deployment.status.available_replicas)
+
+    def _wait_pod_started(self, name, **kwargs):
+        logger.info('Waiting for pod %s to start', name)
+        self.k8s_namespace.wait_for_pod_started(name, **kwargs)
+        pod = self.k8s_namespace.get_pod(name)
+        logger.info('Pod %s ready, IP: %s', pod.metadata.name,
+                    pod.status.pod_ip)
+
+    def _wait_service_neg(self, name, service_port, **kwargs):
+        logger.info('Waiting for NEG for service %s', name)
+        self.k8s_namespace.wait_for_service_neg(name, **kwargs)
+        neg_name, neg_zones = self.k8s_namespace.get_service_neg(
+            name, service_port)
+        logger.info("Service %s: detected NEG=%s in zones=%s", name,
+                    neg_name, neg_zones)

+ 207 - 0
tools/run_tests/xds_test_driver/framework/test_app/client_app.py

@@ -0,0 +1,207 @@
+# Copyright 2016 gRPC authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import functools
+import logging
+from typing import Optional, Iterator
+
+import tenacity
+
+from framework.infrastructure import k8s
+import framework.rpc
+from framework.rpc import grpc_channelz
+from framework.rpc import grpc_testing
+from framework.test_app import base_runner
+
+logger = logging.getLogger(__name__)
+
+# Type aliases
+ChannelzServiceClient = grpc_channelz.ChannelzServiceClient
+ChannelConnectivityState = grpc_channelz.ChannelConnectivityState
+LoadBalancerStatsServiceClient = grpc_testing.LoadBalancerStatsServiceClient
+
+
+class XdsTestClient(framework.rpc.GrpcApp):
+    def __init__(self, *,
+                 ip: str,
+                 rpc_port: int,
+                 server_target: str,
+                 rpc_host: Optional[str] = None,
+                 maintenance_port: Optional[int] = None):
+        super().__init__(rpc_host=(rpc_host or ip))
+        self.ip = ip
+        self.rpc_port = rpc_port
+        self.server_target = server_target
+        self.maintenance_port = maintenance_port or rpc_port
+
+    @property
+    @functools.lru_cache(None)
+    def load_balancer_stats(self) -> LoadBalancerStatsServiceClient:
+        return LoadBalancerStatsServiceClient(self._make_channel(self.rpc_port))
+
+    @property
+    @functools.lru_cache(None)
+    def channelz(self) -> ChannelzServiceClient:
+        return ChannelzServiceClient(self._make_channel(self.maintenance_port))
+
+    def get_load_balancer_stats(
+        self, *,
+        num_rpcs: int,
+        timeout_sec: Optional[int] = None,
+    ) -> grpc_testing.LoadBalancerStatsResponse:
+        """
+        Shortcut to LoadBalancerStatsServiceClient.get_client_stats()
+        """
+        return self.load_balancer_stats.get_client_stats(
+            num_rpcs=num_rpcs, timeout_sec=timeout_sec)
+
+    def get_server_channels(self) -> Iterator[grpc_channelz.Channel]:
+        return self.channelz.find_channels_for_target(self.server_target)
+
+    def wait_for_active_server_channel(self):
+        retryer = tenacity.Retrying(
+            retry=(tenacity.retry_if_result(lambda r: r is None) |
+                   tenacity.retry_if_exception_type()),
+            wait=tenacity.wait_exponential(max=10),
+            stop=tenacity.stop_after_delay(60 * 3),
+            reraise=True)
+        channel = retryer(self.get_active_server_channel)
+        logger.info(
+            'Active server channel found: channel_id: %s, %s',
+            channel.ref.channel_id, channel.ref.name)
+        logger.debug('Server channel:\n%r', channel)
+
+    def get_active_server_channel(self) -> Optional[grpc_channelz.Channel]:
+        for channel in self.get_server_channels():
+            state: ChannelConnectivityState = channel.data.state
+            logger.debug('Server channel: %s, state: %s',
+                         channel.ref.name,
+                         ChannelConnectivityState.State.Name(state.state))
+            if state.state is ChannelConnectivityState.READY:
+                return channel
+        raise self.NotFound('Client has no active channel with the server')
+
+    def get_client_socket_with_test_server(self) -> grpc_channelz.Socket:
+        channel = self.get_active_server_channel()
+        logger.debug('Retrieving client->server socket: channel %s',
+                     channel.ref.name)
+        # Get the first subchannel of the active server channel
+        subchannel_id = channel.subchannel_ref[0].subchannel_id
+        subchannel = self.channelz.get_subchannel(subchannel_id)
+        logger.debug('Retrieving client->server socket: subchannel %s',
+                     subchannel.ref.name)
+        # Get the first socket of the subchannel
+        socket = self.channelz.get_socket(subchannel.socket_ref[0].socket_id)
+        logger.debug('Found client->server socket: %s', socket.ref.name)
+        return socket
+
+
+class KubernetesClientRunner(base_runner.KubernetesBaseRunner):
+    def __init__(self,
+                 k8s_namespace,
+                 *,
+                 deployment_name,
+                 image_name,
+                 gcp_service_account,
+                 td_bootstrap_image,
+                 service_account_name=None,
+                 stats_port=8079,
+                 network='default',
+                 deployment_template='client.deployment.yaml',
+                 service_account_template='service-account.yaml',
+                 reuse_namespace=False,
+                 namespace_template=None,
+                 debug_use_port_forwarding=False):
+        super().__init__(k8s_namespace, namespace_template, reuse_namespace)
+
+        # Settings
+        self.deployment_name = deployment_name
+        self.image_name = image_name
+        self.gcp_service_account = gcp_service_account
+        self.service_account_name = service_account_name or deployment_name
+        self.stats_port = stats_port
+        # xDS bootstrap generator
+        self.td_bootstrap_image = td_bootstrap_image
+        self.network = network
+        self.deployment_template = deployment_template
+        self.service_account_template = service_account_template
+        self.debug_use_port_forwarding = debug_use_port_forwarding
+
+        # Mutable state
+        self.deployment: Optional[k8s.V1Deployment] = None
+        self.service_account: Optional[k8s.V1ServiceAccount] = None
+        self.port_forwarder = None
+
+    def run(self, *,
+            server_target,
+            rpc='UnaryCall', qps=25,
+            secure_mode=False,
+            print_response=False) -> XdsTestClient:
+        super().run()
+        # todo(sergiitk): make rpc UnaryCall enum or get it from proto
+
+        # Create service account
+        self.service_account = self._create_service_account(
+            self.service_account_template,
+            service_account_name=self.service_account_name,
+            namespace_name=self.k8s_namespace.name,
+            gcp_service_account=self.gcp_service_account)
+
+        # Always create a new deployment
+        self.deployment = self._create_deployment(
+            self.deployment_template,
+            deployment_name=self.deployment_name,
+            image_name=self.image_name,
+            namespace_name=self.k8s_namespace.name,
+            service_account_name=self.service_account_name,
+            td_bootstrap_image=self.td_bootstrap_image,
+            network_name=self.network,
+            stats_port=self.stats_port,
+            server_target=server_target,
+            rpc=rpc,
+            qps=qps,
+            secure_mode=secure_mode,
+            print_response=print_response)
+
+        self._wait_deployment_with_available_replicas(self.deployment_name)
+
+        # Load test client pod. We need only one client at the moment
+        pod = self.k8s_namespace.list_deployment_pods(self.deployment)[0]
+        self._wait_pod_started(pod.metadata.name)
+        pod_ip = pod.status.pod_ip
+        rpc_host = None
+
+        # Experimental, for local debugging.
+        if self.debug_use_port_forwarding:
+            logger.info('Enabling port forwarding from %s:%s',
+                        pod_ip, self.stats_port)
+            self.port_forwarder = self.k8s_namespace.port_forward_pod(
+                pod, remote_port=self.stats_port)
+            rpc_host = self.k8s_namespace.PORT_FORWARD_LOCAL_ADDRESS
+
+        return XdsTestClient(ip=pod_ip,
+                             rpc_port=self.stats_port,
+                             server_target=server_target,
+                             rpc_host=rpc_host)
+
+    def cleanup(self, *, force=False, force_namespace=False):
+        if self.port_forwarder:
+            self.k8s_namespace.port_forward_stop(self.port_forwarder)
+            self.port_forwarder = None
+        if self.deployment or force:
+            self._delete_deployment(self.deployment_name)
+            self.deployment = None
+        if self.service_account or force:
+            self._delete_service_account(self.service_account_name)
+            self.service_account = None
+        super().cleanup(force=force_namespace and force)

+ 245 - 0
tools/run_tests/xds_test_driver/framework/test_app/server_app.py

@@ -0,0 +1,245 @@
+# Copyright 2016 gRPC authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import functools
+import logging
+from typing import Optional
+
+from framework.infrastructure import k8s
+import framework.rpc
+from framework.rpc import grpc_channelz
+from framework.test_app import base_runner
+
+logger = logging.getLogger(__name__)
+
+# Type aliases
+ChannelzServiceClient = grpc_channelz.ChannelzServiceClient
+
+
+class XdsTestServer(framework.rpc.GrpcApp):
+    def __init__(self, *,
+                 ip: str,
+                 rpc_port: int,
+                 maintenance_port: Optional[int] = None,
+                 secure_mode: Optional[bool] = False,
+                 server_id: Optional[str] = None,
+                 xds_host: Optional[str] = None,
+                 xds_port: Optional[int] = None,
+                 rpc_host: Optional[str] = None):
+        super().__init__(rpc_host=(rpc_host or ip))
+        self.ip = ip
+        self.rpc_port = rpc_port
+        self.maintenance_port = maintenance_port or rpc_port
+        self.secure_mode = secure_mode
+        self.server_id = server_id
+        self.xds_host, self.xds_port = xds_host, xds_port
+
+    @property
+    @functools.lru_cache(None)
+    def channelz(self) -> ChannelzServiceClient:
+        return ChannelzServiceClient(self._make_channel(self.maintenance_port))
+
+    def set_xds_address(self, xds_host, xds_port: Optional[int] = None):
+        self.xds_host, self.xds_port = xds_host, xds_port
+
+    @property
+    def xds_address(self) -> str:
+        if not self.xds_host: return ''
+        if not self.xds_port: return self.xds_host
+        return f'{self.xds_host}:{self.xds_port}'
+
+    @property
+    def xds_uri(self) -> str:
+        if not self.xds_host: return ''
+        return f'xds:///{self.xds_address}'
+
+    def get_test_server(self):
+        server = self.channelz.find_server_listening_on_port(self.rpc_port)
+        if not server:
+            raise self.NotFound(
+                f'Server listening on port {self.rpc_port} not found')
+        return server
+
+    def get_test_server_sockets(self):
+        server = self.get_test_server()
+        return self.channelz.list_server_sockets(server.ref.server_id)
+
+    def get_server_socket_matching_client(
+        self,
+        client_socket: grpc_channelz.Socket
+    ):
+        client_local = self.channelz.sock_address_to_str(client_socket.local)
+        logger.debug('Looking for a server socket connected to the client %s',
+                     client_local)
+
+        server_socket = self.channelz.find_server_socket_matching_client(
+            self.get_test_server_sockets(), client_socket)
+        if not server_socket:
+            raise self.NotFound(
+                f'Server socket for client {client_local} not found')
+
+        logger.info('Found matching socket pair: server(%s) <-> client(%s)',
+                    self.channelz.sock_addresses_pretty(server_socket),
+                    self.channelz.sock_addresses_pretty(client_socket))
+        return server_socket
+
+
+class KubernetesServerRunner(base_runner.KubernetesBaseRunner):
+    def __init__(self,
+                 k8s_namespace,
+                 *,
+                 deployment_name,
+                 image_name,
+                 gcp_service_account,
+                 service_account_name=None,
+                 service_name=None,
+                 neg_name=None,
+                 td_bootstrap_image=None,
+                 network='default',
+                 deployment_template='server.deployment.yaml',
+                 service_account_template='service-account.yaml',
+                 service_template='server.service.yaml',
+                 reuse_service=False,
+                 reuse_namespace=False,
+                 namespace_template=None,
+                 debug_use_port_forwarding=False):
+        super().__init__(k8s_namespace, namespace_template, reuse_namespace)
+
+        # Settings
+        self.deployment_name = deployment_name
+        self.image_name = image_name
+        self.gcp_service_account = gcp_service_account
+        self.service_account_name = service_account_name or deployment_name
+        self.service_name = service_name or deployment_name
+        # xDS bootstrap generator
+        self.td_bootstrap_image = td_bootstrap_image
+        # This only works in k8s >= 1.18.10-gke.600
+        # https://cloud.google.com/kubernetes-engine/docs/how-to/standalone-neg#naming_negs
+        self.neg_name = neg_name or (f'{self.k8s_namespace.name}-'
+                                     f'{self.service_name}')
+        self.network = network
+        self.deployment_template = deployment_template
+        self.service_account_template = service_account_template
+        self.service_template = service_template
+        self.reuse_service = reuse_service
+        self.debug_use_port_forwarding = debug_use_port_forwarding
+
+        # Mutable state
+        self.deployment: Optional[k8s.V1Deployment] = None
+        self.service_account: Optional[k8s.V1ServiceAccount] = None
+        self.service: Optional[k8s.V1Service] = None
+        self.port_forwarder = None
+
+    def run(self, *,
+            test_port=8080, maintenance_port=None,
+            secure_mode=False, server_id=None,
+            replica_count=1) -> XdsTestServer:
+        # todo(sergiitk): multiple replicas
+        if replica_count != 1:
+            raise NotImplementedError("Multiple replicas not yet supported")
+
+        # Implementation detail: in secure mode, maintenance ("backchannel")
+        # port must be different from the test port so communication with
+        # maintenance services can be reached independently from the security
+        # configuration under test.
+        if maintenance_port is None:
+            maintenance_port = test_port if not secure_mode else test_port + 1
+        if secure_mode and maintenance_port == test_port:
+            raise ValueError('port and maintenance_port must be different '
+                             'when running test server in secure mode')
+        # To avoid bugs with comparing wrong types.
+        if not (isinstance(test_port, int) and
+                isinstance(maintenance_port, int)):
+            raise TypeError('Port numbers must be integer')
+
+        # Create namespace.
+        super().run()
+
+        # Reuse existing if requested, create a new deployment when missing.
+        # Useful for debugging to avoid NEG loosing relation to deleted service.
+        if self.reuse_service:
+            self.service = self._reuse_service(self.service_name)
+        if not self.service:
+            self.service = self._create_service(
+                self.service_template,
+                service_name=self.service_name,
+                namespace_name=self.k8s_namespace.name,
+                deployment_name=self.deployment_name,
+                neg_name=self.neg_name,
+                test_port=test_port)
+        self._wait_service_neg(self.service_name, test_port)
+
+        # Create service account
+        self.service_account = self._create_service_account(
+            self.service_account_template,
+            service_account_name=self.service_account_name,
+            namespace_name=self.k8s_namespace.name,
+            gcp_service_account=self.gcp_service_account)
+
+        # Always create a new deployment
+        self.deployment = self._create_deployment(
+            self.deployment_template,
+            deployment_name=self.deployment_name,
+            image_name=self.image_name,
+            namespace_name=self.k8s_namespace.name,
+            service_account_name=self.service_account_name,
+            td_bootstrap_image=self.td_bootstrap_image,
+            network_name=self.network,
+            replica_count=replica_count,
+            test_port=test_port,
+            maintenance_port=maintenance_port,
+            server_id=server_id,
+            secure_mode=secure_mode)
+
+        self._wait_deployment_with_available_replicas(
+            self.deployment_name, replica_count, timeout_sec=120)
+
+        # Wait for pods running
+        pods = self.k8s_namespace.list_deployment_pods(self.deployment)
+        for pod in pods:
+            self._wait_pod_started(pod.metadata.name)
+
+        # todo(sergiitk): This is why multiple replicas not yet supported
+        pod = pods[0]
+        pod_ip = pod.status.pod_ip
+        rpc_host = None
+        # Experimental, for local debugging.
+        if self.debug_use_port_forwarding:
+            logger.info('Enabling port forwarding from %s:%s',
+                        pod_ip, maintenance_port)
+            self.port_forwarder = self.k8s_namespace.port_forward_pod(
+                pod, remote_port=maintenance_port)
+            rpc_host = self.k8s_namespace.PORT_FORWARD_LOCAL_ADDRESS
+
+        return XdsTestServer(
+            ip=pod_ip,
+            rpc_port=test_port,
+            maintenance_port=maintenance_port,
+            secure_mode=secure_mode,
+            server_id=server_id,
+            rpc_host=rpc_host)
+
+    def cleanup(self, *, force=False, force_namespace=False):
+        if self.port_forwarder:
+            self.k8s_namespace.port_forward_stop(self.port_forwarder)
+            self.port_forwarder = None
+        if self.deployment or force:
+            self._delete_deployment(self.deployment_name)
+            self.deployment = None
+        if (self.service and not self.reuse_service) or force:
+            self._delete_service(self.service_name)
+            self.service = None
+        if self.service_account or force:
+            self._delete_service_account(self.service_account_name)
+            self.service_account = None
+        super().cleanup(force=(force_namespace and force))

+ 51 - 0
tools/run_tests/xds_test_driver/framework/xds_flags.py

@@ -0,0 +1,51 @@
+#  Copyright 2020 gRPC authors.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+from absl import flags
+import googleapiclient.discovery
+
+# GCP
+PROJECT = flags.DEFINE_string(
+    "project", default=None, help="GCP Project ID. Required")
+NAMESPACE = flags.DEFINE_string(
+    "namespace", default=None,
+    help="Isolate GCP resources using given namespace / name prefix. Required")
+NETWORK = flags.DEFINE_string(
+    "network", default="default", help="GCP Network ID")
+
+# Test server
+SERVER_NAME = flags.DEFINE_string(
+    "server_name", default="psm-grpc-server",
+    help="Server deployment and service name")
+SERVER_PORT = flags.DEFINE_integer(
+    "server_port", default=8080,
+    help="Server test port")
+SERVER_XDS_HOST = flags.DEFINE_string(
+    "server_xds_host", default='xds-test-server',
+    help="Test server xDS hostname")
+SERVER_XDS_PORT = flags.DEFINE_integer(
+    "server_xds_port", default=8000, help="Test server xDS port")
+
+# Test client
+CLIENT_NAME = flags.DEFINE_string(
+    "client_name", default="psm-grpc-client",
+    help="Client deployment and service name")
+CLIENT_PORT = flags.DEFINE_integer(
+    "client_port", default=8079,
+    help="Client test port")
+
+
+flags.mark_flags_as_required([
+    "project",
+    "namespace",
+])

+ 43 - 0
tools/run_tests/xds_test_driver/framework/xds_k8s_flags.py

@@ -0,0 +1,43 @@
+#  Copyright 2020 gRPC authors.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+from absl import flags
+
+# GCP
+KUBE_CONTEXT = flags.DEFINE_string(
+    "kube_context", default=None, help="Kubectl context to use")
+GCP_SERVICE_ACCOUNT = flags.DEFINE_string(
+    "gcp_service_account", default=None,
+    help="GCP Service account for GKE workloads to impersonate")
+TD_BOOTSTRAP_IMAGE = flags.DEFINE_string(
+    "td_bootstrap_image", default=None,
+    help="Traffic Director gRPC Bootstrap Docker image")
+
+# Test app
+SERVER_IMAGE = flags.DEFINE_string(
+    "server_image", default=None,
+    help="Server Docker image name")
+CLIENT_IMAGE = flags.DEFINE_string(
+    "client_image", default=None,
+    help="Client Docker image name")
+CLIENT_PORT_FORWARDING = flags.DEFINE_bool(
+    "client_debug_use_port_forwarding", default=False,
+    help="Development only: use kubectl port-forward to connect to test client")
+
+flags.mark_flags_as_required([
+    "gcp_service_account",
+    "kube_context",
+    "td_bootstrap_image",
+    "server_image",
+    "client_image",
+])

+ 395 - 0
tools/run_tests/xds_test_driver/framework/xds_k8s_testcase.py

@@ -0,0 +1,395 @@
+#  Copyright 2020 gRPC authors.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import enum
+import hashlib
+import logging
+from typing import Tuple
+
+from absl import flags
+from absl.testing import absltest
+
+from framework import xds_flags
+from framework import xds_k8s_flags
+from framework.infrastructure import k8s
+from framework.infrastructure import gcp
+from framework.infrastructure import traffic_director
+from framework.rpc import grpc_channelz
+from framework.test_app import client_app
+from framework.test_app import server_app
+
+logger = logging.getLogger(__name__)
+flags.adopt_module_key_flags(xds_flags)
+flags.adopt_module_key_flags(xds_k8s_flags)
+
+# Type aliases
+XdsTestServer = server_app.XdsTestServer
+XdsTestClient = client_app.XdsTestClient
+
+
+class XdsKubernetesTestCase(absltest.TestCase):
+    k8s_api_manager: k8s.KubernetesApiManager
+    gcp_api_manager: gcp.api.GcpApiManager
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    @classmethod
+    def setUpClass(cls):
+        # GCP
+        cls.project: str = xds_flags.PROJECT.value
+        cls.network: str = xds_flags.NETWORK.value
+        cls.gcp_service_account: str = xds_k8s_flags.GCP_SERVICE_ACCOUNT.value
+        cls.td_bootstrap_image = xds_k8s_flags.TD_BOOTSTRAP_IMAGE.value
+
+        # Base namespace
+        # todo(sergiitk): generate for each test
+        cls.namespace: str = xds_flags.NAMESPACE.value
+
+        # Test server
+        cls.server_image = xds_k8s_flags.SERVER_IMAGE.value
+        cls.server_name = xds_flags.SERVER_NAME.value
+        cls.server_port = xds_flags.SERVER_PORT.value
+        cls.server_xds_host = xds_flags.SERVER_NAME.value
+        cls.server_xds_port = xds_flags.SERVER_XDS_PORT.value
+
+        # Test client
+        cls.client_image = xds_k8s_flags.CLIENT_IMAGE.value
+        cls.client_name = xds_flags.CLIENT_NAME.value
+        cls.client_port = xds_flags.CLIENT_PORT.value
+        cls.client_port_forwarding = xds_k8s_flags.CLIENT_PORT_FORWARDING.value
+
+        # Resource managers
+        cls.k8s_api_manager = k8s.KubernetesApiManager(
+            xds_k8s_flags.KUBE_CONTEXT.value)
+        cls.gcp_api_manager = gcp.api.GcpApiManager()
+
+    def setUp(self):
+        # todo(sergiitk): generate for each test
+        self.server_namespace = self.namespace
+        self.client_namespace = self.namespace
+
+        # Init this in child class
+        self.server_runner = None
+        self.client_runner = None
+        self.td = None
+        # todo(sergiitk): generate namespace with run id
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.k8s_api_manager.close()
+        cls.gcp_api_manager.close()
+
+    def tearDown(self):
+        logger.debug('######## tearDown(): resource cleanup initiated ########')
+        self.td.cleanup()
+        self.client_runner.cleanup()
+        self.server_runner.cleanup()
+
+    def setupTrafficDirectorGrpc(self):
+        self.td.setup_for_grpc(self.server_xds_host, self.server_xds_port)
+
+    def setupServerBackends(self):
+        # Load Backends
+        neg_name, neg_zones = self.server_runner.k8s_namespace.get_service_neg(
+            self.server_runner.service_name, self.server_port)
+
+        # Add backends to the Backend Service
+        self.td.backend_service_add_neg_backends(neg_name, neg_zones)
+
+    def assertSuccessfulRpcs(
+        self,
+        test_client: XdsTestClient,
+        num_rpcs: int = 100
+    ):
+        # Run the test
+        lb_stats = test_client.get_load_balancer_stats(num_rpcs=num_rpcs)
+        # Check the results
+        self.assertAllBackendsReceivedRpcs(lb_stats)
+        self.assertFailedRpcsAtMost(lb_stats, 0)
+
+    def assertAllBackendsReceivedRpcs(self, lb_stats):
+        # todo(sergiitk): assert backends length
+        logger.info(lb_stats.rpcs_by_peer)
+        for backend, rpcs_count in lb_stats.rpcs_by_peer.items():
+            self.assertGreater(
+                int(rpcs_count), 0,
+                msg='Backend {backend} did not receive a single RPC')
+
+    def assertFailedRpcsAtMost(self, lb_stats, limit):
+        failed = int(lb_stats.num_failures)
+        self.assertLessEqual(
+            failed, limit,
+            msg=f'Unexpected number of RPC failures {failed} > {limit}')
+
+
+class RegularXdsKubernetesTestCase(XdsKubernetesTestCase):
+    def setUp(self):
+        super().setUp()
+
+        # Traffic Director Configuration
+        self.td = traffic_director.TrafficDirectorManager(
+            self.gcp_api_manager,
+            project=self.project,
+            resource_prefix=self.namespace,
+            network=self.network)
+
+        # Test Server Runner
+        self.server_runner = server_app.KubernetesServerRunner(
+            k8s.KubernetesNamespace(self.k8s_api_manager,
+                                    self.server_namespace),
+            deployment_name=self.server_name,
+            image_name=self.server_image,
+            gcp_service_account=self.gcp_service_account,
+            network=self.network,
+            td_bootstrap_image=self.td_bootstrap_image)
+
+        # Test Client Runner
+        self.client_runner = client_app.KubernetesClientRunner(
+            k8s.KubernetesNamespace(self.k8s_api_manager,
+                                    self.client_namespace),
+            deployment_name=self.client_name,
+            image_name=self.client_image,
+            gcp_service_account=self.gcp_service_account,
+            network=self.network,
+            td_bootstrap_image=self.td_bootstrap_image,
+            debug_use_port_forwarding=self.client_port_forwarding,
+            stats_port=self.client_port,
+            reuse_namespace=self.server_namespace == self.client_namespace)
+
+    def startTestServer(self, replica_count=1, **kwargs) -> XdsTestServer:
+        test_server = self.server_runner.run(
+            replica_count=replica_count,
+            test_port=self.server_port,
+            **kwargs)
+        test_server.set_xds_address(self.server_xds_host, self.server_xds_port)
+        return test_server
+
+    def startTestClient(self,
+                        test_server: XdsTestServer,
+                        **kwargs) -> XdsTestClient:
+        test_client = self.client_runner.run(server_target=test_server.xds_uri,
+                                             **kwargs)
+        logger.debug('Waiting fot the client to establish healthy channel with '
+                     'the server')
+        test_client.wait_for_active_server_channel()
+        return test_client
+
+
+class SecurityXdsKubernetesTestCase(XdsKubernetesTestCase):
+    class SecurityMode(enum.Enum):
+        MTLS = enum.auto()
+        TLS = enum.auto()
+        PLAINTEXT = enum.auto()
+
+    def setUp(self):
+        super().setUp()
+
+        # Traffic Director Configuration
+        self.td = traffic_director.TrafficDirectorSecureManager(
+            self.gcp_api_manager,
+            project=self.project,
+            resource_prefix=self.namespace,
+            network=self.network)
+
+        # Test Server Runner
+        self.server_runner = server_app.KubernetesServerRunner(
+            k8s.KubernetesNamespace(self.k8s_api_manager,
+                                    self.server_namespace),
+            deployment_name=self.server_name,
+            image_name=self.server_image,
+            gcp_service_account=self.gcp_service_account,
+            network=self.network,
+            td_bootstrap_image=self.td_bootstrap_image,
+            deployment_template='server-secure.deployment.yaml',
+            debug_use_port_forwarding=self.client_port_forwarding)
+
+        # Test Client Runner
+        self.client_runner = client_app.KubernetesClientRunner(
+            k8s.KubernetesNamespace(self.k8s_api_manager,
+                                    self.client_namespace),
+            deployment_name=self.client_name,
+            image_name=self.client_image,
+            gcp_service_account=self.gcp_service_account,
+            network=self.network,
+            td_bootstrap_image=self.td_bootstrap_image,
+            deployment_template='client-secure.deployment.yaml',
+            stats_port=self.client_port,
+            reuse_namespace=self.server_namespace == self.client_namespace,
+            debug_use_port_forwarding=self.client_port_forwarding)
+
+    def startSecureTestServer(self, replica_count=1, **kwargs) -> XdsTestServer:
+        test_server = self.server_runner.run(
+            replica_count=replica_count,
+            test_port=self.server_port,
+            maintenance_port=8081,
+            secure_mode=True,
+            **kwargs)
+        test_server.set_xds_address(self.server_xds_host, self.server_xds_port)
+        return test_server
+
+    def setupSecurityPolicies(self, *,
+                              server_tls, server_mtls,
+                              client_tls, client_mtls):
+        self.td.setup_client_security(self.server_namespace, self.server_name,
+                                      tls=client_tls, mtls=client_mtls)
+        self.td.setup_server_security(self.server_port,
+                                      tls=server_tls, mtls=server_mtls)
+
+    def startSecureTestClient(
+        self,
+        test_server: XdsTestServer,
+        **kwargs
+    ) -> XdsTestClient:
+        test_client = self.client_runner.run(
+            server_target=test_server.xds_uri,
+            secure_mode=True,
+            **kwargs)
+        logger.debug('Waiting fot the client to establish healthy channel with '
+                     'the server')
+        test_client.wait_for_active_server_channel()
+        return test_client
+
+    def assertTestAppSecurity(self,
+                              mode: SecurityMode,
+                              test_client: XdsTestClient,
+                              test_server: XdsTestServer):
+        client_socket, server_socket = self.getConnectedSockets(test_client,
+                                                                test_server)
+        server_security: grpc_channelz.Security = server_socket.security
+        client_security: grpc_channelz.Security = client_socket.security
+        logger.info('Server certs: %s', self.debug_sock_certs(server_security))
+        logger.info('Client certs: %s', self.debug_sock_certs(client_security))
+
+        if mode is self.SecurityMode.MTLS:
+            self.assertSecurityMtls(client_security, server_security)
+        elif mode is self.SecurityMode.TLS:
+            self.assertSecurityTls(client_security, server_security)
+        elif mode is self.SecurityMode.PLAINTEXT:
+            self.assertSecurityPlaintext(client_security, server_security)
+        else:
+            raise TypeError(f'Incorrect security mode')
+
+    def assertSecurityMtls(self,
+                           client_security: grpc_channelz.Security,
+                           server_security: grpc_channelz.Security):
+        self.assertEqual(client_security.WhichOneof('model'), 'tls',
+                         msg='(mTLS) Client socket security model must be TLS')
+        self.assertEqual(server_security.WhichOneof('model'), 'tls',
+                         msg='(mTLS) Server socket security model must be TLS')
+        server_tls, client_tls = server_security.tls, client_security.tls
+
+        # Confirm regular TLS: server local cert == client remote cert
+        self.assertNotEmpty(
+            server_tls.local_certificate,
+            msg="(mTLS) Server local certificate is missing")
+        self.assertNotEmpty(
+            client_tls.remote_certificate,
+            msg="(mTLS) Client remote certificate is missing")
+        self.assertEqual(
+            server_tls.local_certificate, client_tls.remote_certificate,
+            msg="(mTLS) Server local certificate must match client's "
+                "remote certificate")
+
+        # mTLS: server remote cert == client local cert
+        self.assertNotEmpty(
+            server_tls.remote_certificate,
+            msg="(mTLS) Server remote certificate is missing")
+        self.assertNotEmpty(
+            client_tls.local_certificate,
+            msg="(mTLS) Client local certificate is missing")
+        self.assertEqual(
+            server_tls.remote_certificate, client_tls.local_certificate,
+            msg="(mTLS) Server remote certificate must match client's "
+                "local certificate")
+
+        # Success
+        logger.info('mTLS security mode  confirmed!')
+
+    def assertSecurityTls(self,
+                          client_security: grpc_channelz.Security,
+                          server_security: grpc_channelz.Security):
+        self.assertEqual(client_security.WhichOneof('model'), 'tls',
+                         msg='(TLS) Client socket security model must be TLS')
+        self.assertEqual(server_security.WhichOneof('model'), 'tls',
+                         msg='(TLS) Server socket security model must be TLS')
+        server_tls, client_tls = server_security.tls, client_security.tls
+
+        # Regular TLS: server local cert == client remote cert
+        self.assertNotEmpty(
+            server_tls.local_certificate,
+            msg="(TLS) Server local certificate is missing")
+        self.assertNotEmpty(
+            client_tls.remote_certificate,
+            msg="(TLS) Client remote certificate is missing")
+        self.assertEqual(
+            server_tls.local_certificate, client_tls.remote_certificate,
+            msg="(TLS) Server local certificate must match client "
+                "remote certificate")
+
+        # mTLS must not be used
+        self.assertEmpty(
+            server_tls.remote_certificate,
+            msg="(TLS) Server remote certificate must be empty in TLS mode. "
+                "Is server security incorrectly configured for mTLS?")
+        self.assertEmpty(
+            client_tls.local_certificate,
+            msg="(TLS) Client local certificate must be empty in TLS mode. "
+                "Is client security incorrectly configured for mTLS?")
+
+        # Success
+        logger.info('TLS security mode confirmed!')
+
+    def assertSecurityPlaintext(self, client_security, server_security):
+        server_tls, client_tls = server_security.tls, client_security.tls
+        # Not TLS
+        self.assertEmpty(
+            server_tls.local_certificate,
+            msg="(Plaintext) Server local certificate must be empty.")
+        self.assertEmpty(
+            client_tls.local_certificate,
+            msg="(Plaintext) Client local certificate must be empty.")
+
+        # Not mTLS
+        self.assertEmpty(
+            server_tls.remote_certificate,
+            msg="(Plaintext) Server remote certificate must be empty.")
+        self.assertEmpty(
+            client_tls.local_certificate,
+            msg="(Plaintext) Client local certificate must be empty.")
+
+        # Success
+        logger.info('Plaintext security mode confirmed!')
+
+    @staticmethod
+    def getConnectedSockets(
+        test_client: XdsTestClient,
+        test_server: XdsTestServer
+    ) -> Tuple[grpc_channelz.Socket, grpc_channelz.Socket]:
+        client_sock = test_client.get_client_socket_with_test_server()
+        server_sock = test_server.get_server_socket_matching_client(client_sock)
+        return client_sock, server_sock
+
+    @classmethod
+    def debug_sock_certs(cls, security: grpc_channelz.Security):
+        if security.WhichOneof('model') == 'other':
+            return f'other: <{security.other.name}={security.other.value}>'
+
+        return (f'local: <{cls.debug_cert(security.tls.local_certificate)}>, '
+                f'remote: <{cls.debug_cert(security.tls.remote_certificate)}>')
+
+    @staticmethod
+    def debug_cert(cert):
+        if not cert: return 'missing'
+        sha1 = hashlib.sha1(cert)
+        return f'sha1={sha1.hexdigest()}, len={len(cert)}'

+ 79 - 0
tools/run_tests/xds_test_driver/kubernetes-manifests/client-secure.deployment.yaml

@@ -0,0 +1,79 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: ${deployment_name}
+  namespace: ${namespace_name}
+  labels:
+    app: ${deployment_name}
+    owner: xds-k8s-interop-test
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: ${deployment_name}
+  template:
+    metadata:
+      labels:
+        app: ${deployment_name}
+        owner: xds-k8s-interop-test
+    spec:
+      serviceAccountName: ${service_account_name}
+      containers:
+      - name: ${deployment_name}
+        image: ${image_name}
+        imagePullPolicy: Always
+        args:
+          - "--server=${server_target}"
+          - "--stats_port=${stats_port}"
+          - "--secure_mode=${secure_mode}"
+          - "--qps=${qps}"
+          - "--rpc=${rpc}"
+          - "--print_response=${print_response}"
+        ports:
+          - containerPort: ${stats_port}
+        env:
+          - name: GRPC_XDS_BOOTSTRAP
+            value: "/tmp/grpc-xds/td-grpc-bootstrap.json"
+          - name: GRPC_XDS_EXPERIMENTAL_SECURITY_SUPPORT
+            value: "true"
+          - name: GRPC_XDS_CERT_INSTANCE_OVERRIDE
+            value: "true"
+        volumeMounts:
+          - mountPath: /tmp/grpc-xds/
+            name: grpc-td-conf
+            readOnly: true
+          - mountPath: /var/run/gke-spiffe/certs
+            name: gke-spiffe-certs-volume
+            readOnly: true
+        resources:
+          limits:
+            cpu: 800m
+            memory: 512Mi
+          requests:
+            cpu: 100m
+            memory: 512Mi
+      initContainers:
+        - name: grpc-td-init
+          image: ${td_bootstrap_image}
+          imagePullPolicy: Always
+          args:
+            - "--output=/tmp/bootstrap/td-grpc-bootstrap.json"
+            - "--vpc-network-name=${network_name}"
+            - "--include-psm-security-experimental"
+          resources:
+            limits:
+              cpu: 100m
+              memory: 100Mi
+            requests:
+              cpu: 10m
+              memory: 100Mi
+          volumeMounts:
+            - mountPath: /tmp/bootstrap/
+              name: grpc-td-conf
+      volumes:
+        - name: grpc-td-conf
+          emptyDir:
+            medium: Memory
+        - name: gke-spiffe-certs-volume
+          csi:
+            driver: certs.spiffe.gke.io

+ 67 - 0
tools/run_tests/xds_test_driver/kubernetes-manifests/client.deployment.yaml

@@ -0,0 +1,67 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: ${deployment_name}
+  namespace: ${namespace_name}
+  labels:
+    app: ${deployment_name}
+    owner: xds-k8s-interop-test
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: ${deployment_name}
+  template:
+    metadata:
+      labels:
+        app: ${deployment_name}
+        owner: xds-k8s-interop-test
+    spec:
+      serviceAccountName: ${service_account_name}
+      containers:
+      - name: ${deployment_name}
+        image: ${image_name}
+        imagePullPolicy: Always
+        args:
+          - "--server=${server_target}"
+          - "--stats_port=${stats_port}"
+          - "--qps=${qps}"
+          - "--rpc=${rpc}"
+          - "--print_response=${print_response}"
+        ports:
+          - containerPort: ${stats_port}
+        env:
+          - name: GRPC_XDS_BOOTSTRAP
+            value: "/tmp/grpc-xds/td-grpc-bootstrap.json"
+        volumeMounts:
+          - mountPath: /tmp/grpc-xds/
+            name: grpc-td-conf
+            readOnly: true
+        resources:
+          limits:
+            cpu: 800m
+            memory: 512Mi
+          requests:
+            cpu: 100m
+            memory: 512Mi
+      initContainers:
+        - name: grpc-td-init
+          image: ${td_bootstrap_image}
+          imagePullPolicy: Always
+          args:
+            - "--output=/tmp/bootstrap/td-grpc-bootstrap.json"
+            - "--vpc-network-name=${network_name}"
+          resources:
+            limits:
+              cpu: 100m
+              memory: 100Mi
+            requests:
+              cpu: 10m
+              memory: 100Mi
+          volumeMounts:
+            - mountPath: /tmp/bootstrap/
+              name: grpc-td-conf
+      volumes:
+        - name: grpc-td-conf
+          emptyDir:
+            medium: Memory

+ 7 - 0
tools/run_tests/xds_test_driver/kubernetes-manifests/namespace.yaml

@@ -0,0 +1,7 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: ${namespace_name}
+  labels:
+    name: ${namespace_name}
+    owner: xds-k8s-interop-test

+ 78 - 0
tools/run_tests/xds_test_driver/kubernetes-manifests/server-secure.deployment.yaml

@@ -0,0 +1,78 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: ${deployment_name}
+  namespace: ${namespace_name}
+  labels:
+    app: ${deployment_name}
+    owner: xds-k8s-interop-test
+spec:
+  replicas: ${replica_count}
+  selector:
+    matchLabels:
+      app: ${deployment_name}
+  template:
+    metadata:
+      labels:
+        app: ${deployment_name}
+        owner: xds-k8s-interop-test
+    spec:
+      serviceAccountName: ${service_account_name}
+      containers:
+      - name: ${deployment_name}
+        image: ${image_name}
+        imagePullPolicy: Always
+        args:
+          - "--port=${test_port}"
+          - "--maintenance_port=${maintenance_port}"
+          - "--secure_mode=${secure_mode}"
+        ports:
+          - containerPort: ${test_port}
+          - containerPort: ${maintenance_port}
+        env:
+          - name: GRPC_XDS_BOOTSTRAP
+            value: "/tmp/grpc-xds/td-grpc-bootstrap.json"
+          - name: GRPC_XDS_EXPERIMENTAL_SECURITY_SUPPORT
+            value: "true"
+          - name: GRPC_XDS_CERT_INSTANCE_OVERRIDE
+            value: "true"
+        volumeMounts:
+          - mountPath: /tmp/grpc-xds/
+            name: grpc-td-conf
+            readOnly: true
+          - mountPath: /var/run/gke-spiffe/certs
+            name: gke-spiffe-certs-volume
+            readOnly: true
+        resources:
+          limits:
+            cpu: 800m
+            memory: 512Mi
+          requests:
+            cpu: 100m
+            memory: 512Mi
+      initContainers:
+        - name: grpc-td-init
+          image: ${td_bootstrap_image}
+          imagePullPolicy: Always
+          args:
+            - "--output=/tmp/bootstrap/td-grpc-bootstrap.json"
+            - "--vpc-network-name=${network_name}"
+            - "--include-psm-security-experimental"
+            - "--node-metadata-experimental=version=production"
+          resources:
+            limits:
+              cpu: 100m
+              memory: 100Mi
+            requests:
+              cpu: 10m
+              memory: 100Mi
+          volumeMounts:
+            - mountPath: /tmp/bootstrap/
+              name: grpc-td-conf
+      volumes:
+        - name: grpc-td-conf
+          emptyDir:
+            medium: Memory
+        - name: gke-spiffe-certs-volume
+          csi:
+            driver: certs.spiffe.gke.io

+ 34 - 0
tools/run_tests/xds_test_driver/kubernetes-manifests/server.deployment.yaml

@@ -0,0 +1,34 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: ${deployment_name}
+  namespace: ${namespace_name}
+  labels:
+    app: ${deployment_name}
+    owner: xds-k8s-interop-test
+spec:
+  replicas: ${replica_count}
+  selector:
+    matchLabels:
+      app: ${deployment_name}
+  template:
+    metadata:
+      labels:
+        app: ${deployment_name}
+    spec:
+      serviceAccountName: ${service_account_name}
+      containers:
+      - name: ${deployment_name}
+        image: ${image_name}
+        imagePullPolicy: Always
+        args:
+          - "--port=${test_port}"
+        ports:
+          - containerPort: ${test_port}
+        resources:
+          limits:
+            cpu: 800m
+            memory: 512Mi
+          requests:
+            cpu: 100m
+            memory: 512Mi

+ 17 - 0
tools/run_tests/xds_test_driver/kubernetes-manifests/server.service.yaml

@@ -0,0 +1,17 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: ${service_name}
+  namespace: ${namespace_name}
+  labels:
+    owner: xds-k8s-interop-test
+  annotations:
+    cloud.google.com/neg: '{"exposed_ports": {"${test_port}":{"name":"${neg_name}"}}}'
+spec:
+  type: ClusterIP
+  selector:
+    app: ${deployment_name}
+  ports:
+  - port: ${test_port}
+    protocol: TCP
+    targetPort: ${test_port}

+ 9 - 0
tools/run_tests/xds_test_driver/kubernetes-manifests/service-account.yaml

@@ -0,0 +1,9 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: ${service_account_name}
+  namespace: ${namespace_name}
+  labels:
+    owner: xds-k8s-interop-test
+  annotations:
+    iam.gke.io/gcp-service-account: ${gcp_service_account}

+ 12 - 0
tools/run_tests/xds_test_driver/requirements.txt

@@ -0,0 +1,12 @@
+Mako~=1.1
+PyYAML~=5.3
+absl-py~=0.11
+dataclasses~=0.8
+google-api-python-client~=1.12
+grpcio~=1.34
+grpcio-tools~=1.34
+grpcio-channelz~=1.34
+kubernetes~=12.0
+retrying~=1.3
+tenacity~=6.2
+protobuf~=3.14

+ 13 - 0
tools/run_tests/xds_test_driver/tests/__init__.py

@@ -0,0 +1,13 @@
+# Copyright 2020 gRPC authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

+ 41 - 0
tools/run_tests/xds_test_driver/tests/baseline_test.py

@@ -0,0 +1,41 @@
+#  Copyright 2020 gRPC authors.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import logging
+
+from absl import flags
+from absl.testing import absltest
+
+from framework import xds_k8s_testcase
+
+logger = logging.getLogger(__name__)
+flags.adopt_module_key_flags(xds_k8s_testcase)
+
+# Type aliases
+XdsTestServer = xds_k8s_testcase.XdsTestServer
+XdsTestClient = xds_k8s_testcase.XdsTestClient
+
+
+class BaselineTest(xds_k8s_testcase.RegularXdsKubernetesTestCase):
+    def test_ping_pong(self):
+        self.setupTrafficDirectorGrpc()
+
+        test_server: XdsTestServer = self.startTestServer()
+        self.setupServerBackends()
+
+        test_client: XdsTestClient = self.startTestClient(test_server)
+        self.assertSuccessfulRpcs(test_client)
+
+
+if __name__ == '__main__':
+    absltest.main()

+ 79 - 0
tools/run_tests/xds_test_driver/tests/security_test.py

@@ -0,0 +1,79 @@
+#  Copyright 2020 gRPC authors.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import logging
+
+from absl import flags
+from absl.testing import absltest
+
+from framework import xds_k8s_testcase
+
+logger = logging.getLogger(__name__)
+flags.adopt_module_key_flags(xds_k8s_testcase)
+SKIP_REASON = 'Work in progress'
+
+# Type aliases
+XdsTestServer = xds_k8s_testcase.XdsTestServer
+XdsTestClient = xds_k8s_testcase.XdsTestClient
+SecurityMode = xds_k8s_testcase.SecurityXdsKubernetesTestCase.SecurityMode
+
+
+class SecurityTest(xds_k8s_testcase.SecurityXdsKubernetesTestCase):
+    def test_mtls(self):
+        self.setupTrafficDirectorGrpc()
+        self.setupSecurityPolicies(server_tls=True, server_mtls=True,
+                                   client_tls=True, client_mtls=True)
+
+        test_server: XdsTestServer = self.startSecureTestServer()
+        self.setupServerBackends()
+        test_client: XdsTestClient = self.startSecureTestClient(test_server)
+
+        self.assertTestAppSecurity(SecurityMode.MTLS, test_client, test_server)
+        self.assertSuccessfulRpcs(test_client)
+
+    def test_tls(self):
+        self.setupTrafficDirectorGrpc()
+        self.setupSecurityPolicies(server_tls=True, server_mtls=False,
+                                   client_tls=True, client_mtls=False)
+
+        test_server: XdsTestServer = self.startSecureTestServer()
+        self.setupServerBackends()
+        test_client: XdsTestClient = self.startSecureTestClient(test_server)
+
+        self.assertTestAppSecurity(SecurityMode.TLS, test_client, test_server)
+        self.assertSuccessfulRpcs(test_client)
+
+    def test_plaintext_fallback(self):
+        self.setupTrafficDirectorGrpc()
+        self.setupSecurityPolicies(server_tls=False, server_mtls=False,
+                                   client_tls=False, client_mtls=False)
+
+        test_server: XdsTestServer = self.startSecureTestServer()
+        self.setupServerBackends()
+        test_client: XdsTestClient = self.startSecureTestClient(test_server)
+
+        self.assertTestAppSecurity(
+            SecurityMode.PLAINTEXT, test_client, test_server)
+        self.assertSuccessfulRpcs(test_client)
+
+    @absltest.skip(SKIP_REASON)
+    def test_mtls_error(self):
+        pass
+
+    @absltest.skip(SKIP_REASON)
+    def test_server_authz_error(self):
+        pass
+
+
+if __name__ == '__main__':
+    absltest.main()