| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189 | 
							- #!/usr/bin/env python2.7
 
- from __future__ import print_function
 
- import argparse
 
- import json
 
- import uuid
 
- import httplib2
 
- from apiclient import discovery
 
- from apiclient.errors import HttpError
 
- from oauth2client.client import GoogleCredentials
 
- # 30 days in milliseconds
 
- _EXPIRATION_MS = 30 * 24 * 60 * 60 * 1000
 
- NUM_RETRIES = 3
 
- def create_big_query():
 
-     """Authenticates with cloud platform and gets a BiqQuery service object
 
-   """
 
-     creds = GoogleCredentials.get_application_default()
 
-     return discovery.build(
 
-         'bigquery', 'v2', credentials=creds, cache_discovery=False)
 
- def create_dataset(biq_query, project_id, dataset_id):
 
-     is_success = True
 
-     body = {
 
-         'datasetReference': {
 
-             'projectId': project_id,
 
-             'datasetId': dataset_id
 
-         }
 
-     }
 
-     try:
 
-         dataset_req = biq_query.datasets().insert(
 
-             projectId=project_id, body=body)
 
-         dataset_req.execute(num_retries=NUM_RETRIES)
 
-     except HttpError as http_error:
 
-         if http_error.resp.status == 409:
 
-             print('Warning: The dataset %s already exists' % dataset_id)
 
-         else:
 
-             # Note: For more debugging info, print "http_error.content"
 
-             print('Error in creating dataset: %s. Err: %s' % (dataset_id,
 
-                                                               http_error))
 
-             is_success = False
 
-     return is_success
 
- def create_table(big_query, project_id, dataset_id, table_id, table_schema,
 
-                  description):
 
-     fields = [{
 
-         'name': field_name,
 
-         'type': field_type,
 
-         'description': field_description
 
-     } for (field_name, field_type, field_description) in table_schema]
 
-     return create_table2(big_query, project_id, dataset_id, table_id, fields,
 
-                          description)
 
- def create_partitioned_table(big_query,
 
-                              project_id,
 
-                              dataset_id,
 
-                              table_id,
 
-                              table_schema,
 
-                              description,
 
-                              partition_type='DAY',
 
-                              expiration_ms=_EXPIRATION_MS):
 
-     """Creates a partitioned table. By default, a date-paritioned table is created with
 
-   each partition lasting 30 days after it was last modified.
 
-   """
 
-     fields = [{
 
-         'name': field_name,
 
-         'type': field_type,
 
-         'description': field_description
 
-     } for (field_name, field_type, field_description) in table_schema]
 
-     return create_table2(big_query, project_id, dataset_id, table_id, fields,
 
-                          description, partition_type, expiration_ms)
 
- def create_table2(big_query,
 
-                   project_id,
 
-                   dataset_id,
 
-                   table_id,
 
-                   fields_schema,
 
-                   description,
 
-                   partition_type=None,
 
-                   expiration_ms=None):
 
-     is_success = True
 
-     body = {
 
-         'description': description,
 
-         'schema': {
 
-             'fields': fields_schema
 
-         },
 
-         'tableReference': {
 
-             'datasetId': dataset_id,
 
-             'projectId': project_id,
 
-             'tableId': table_id
 
-         }
 
-     }
 
-     if partition_type and expiration_ms:
 
-         body["timePartitioning"] = {
 
-             "type": partition_type,
 
-             "expirationMs": expiration_ms
 
-         }
 
-     try:
 
-         table_req = big_query.tables().insert(
 
-             projectId=project_id, datasetId=dataset_id, body=body)
 
-         res = table_req.execute(num_retries=NUM_RETRIES)
 
-         print('Successfully created %s "%s"' % (res['kind'], res['id']))
 
-     except HttpError as http_error:
 
-         if http_error.resp.status == 409:
 
-             print('Warning: Table %s already exists' % table_id)
 
-         else:
 
-             print('Error in creating table: %s. Err: %s' % (table_id,
 
-                                                             http_error))
 
-             is_success = False
 
-     return is_success
 
- def patch_table(big_query, project_id, dataset_id, table_id, fields_schema):
 
-     is_success = True
 
-     body = {
 
-         'schema': {
 
-             'fields': fields_schema
 
-         },
 
-         'tableReference': {
 
-             'datasetId': dataset_id,
 
-             'projectId': project_id,
 
-             'tableId': table_id
 
-         }
 
-     }
 
-     try:
 
-         table_req = big_query.tables().patch(
 
-             projectId=project_id,
 
-             datasetId=dataset_id,
 
-             tableId=table_id,
 
-             body=body)
 
-         res = table_req.execute(num_retries=NUM_RETRIES)
 
-         print('Successfully patched %s "%s"' % (res['kind'], res['id']))
 
-     except HttpError as http_error:
 
-         print('Error in creating table: %s. Err: %s' % (table_id, http_error))
 
-         is_success = False
 
-     return is_success
 
- def insert_rows(big_query, project_id, dataset_id, table_id, rows_list):
 
-     is_success = True
 
-     body = {'rows': rows_list}
 
-     try:
 
-         insert_req = big_query.tabledata().insertAll(
 
-             projectId=project_id,
 
-             datasetId=dataset_id,
 
-             tableId=table_id,
 
-             body=body)
 
-         res = insert_req.execute(num_retries=NUM_RETRIES)
 
-         if res.get('insertErrors', None):
 
-             print('Error inserting rows! Response: %s' % res)
 
-             is_success = False
 
-     except HttpError as http_error:
 
-         print('Error inserting rows to the table %s' % table_id)
 
-         is_success = False
 
-     return is_success
 
- def sync_query_job(big_query, project_id, query, timeout=5000):
 
-     query_data = {'query': query, 'timeoutMs': timeout}
 
-     query_job = None
 
-     try:
 
-         query_job = big_query.jobs().query(
 
-             projectId=project_id,
 
-             body=query_data).execute(num_retries=NUM_RETRIES)
 
-     except HttpError as http_error:
 
-         print('Query execute job failed with error: %s' % http_error)
 
-         print(http_error.content)
 
-     return query_job
 
-     # List of (column name, column type, description) tuples
 
- def make_row(unique_row_id, row_values_dict):
 
-     """row_values_dict is a dictionary of column name and column value.
 
-   """
 
-     return {'insertId': unique_row_id, 'json': row_values_dict}
 
 
  |