blob: 2659c03e63198996d22a295fa008053173b59042 [file] [log] [blame]
Copyright 2014 Google Inc.
Use of this source code is governed by a BSD-style license that can be
found in the LICENSE file.
Utilities for accessing Google Cloud Storage.
TODO(epoger): move this into tools/utils for broader use?
# System-level imports
import os
import posixpath
import sys
from apiclient.discovery import build as build_service
except ImportError:
print ('Missing google-api-python-client. Please install it; directions '
'can be found at'
# Local imports
import url_utils
def download_file(source_bucket, source_path, dest_path,
""" Downloads a single file from Google Cloud Storage to local disk.
source_bucket: GCS bucket to download the file from
source_path: full path (Posix-style) within that bucket
dest_path: full path (local-OS-style) on local disk to copy the file to
create_subdirs_if_needed: boolean; whether to create subdirectories as
needed to create dest_path
source_http_url = posixpath.join(
'', source_bucket, source_path)
url_utils.copy_contents(source_url=source_http_url, dest_path=dest_path,
def list_bucket_contents(bucket, subdir=None):
""" Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple.
Uses the API documented at
bucket: name of the Google Storage bucket
subdir: directory within the bucket to list, or None for root directory
# The GCS command relies on the subdir name (if any) ending with a slash.
if subdir and not subdir.endswith('/'):
subdir += '/'
subdir_length = len(subdir) if subdir else 0
storage = build_service('storage', 'v1')
command = storage.objects().list(
bucket=bucket, delimiter='/', fields='items(name),prefixes',
results = command.execute()
# The GCS command returned two subdicts:
# prefixes: the full path of every directory within subdir, with trailing '/'
# items: property dict for each file object within subdir
# (including 'name', which is full path of the object)
dirs = []
for dir_fullpath in results.get('prefixes', []):
dir_basename = dir_fullpath[subdir_length:]
dirs.append(dir_basename[:-1]) # strip trailing slash
files = []
for file_properties in results.get('items', []):
file_fullpath = file_properties['name']
file_basename = file_fullpath[subdir_length:]
return (dirs, files)