| # -*- coding: utf-8 -*- |
| # Copyright 2011 Google Inc. All Rights Reserved. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| """Implementation of Unix-like rm command for cloud storage providers.""" |
| |
| from __future__ import absolute_import |
| |
| from gslib.cloud_api import NotEmptyException |
| from gslib.cloud_api import ServiceException |
| from gslib.command import Command |
| from gslib.command import GetFailureCount |
| from gslib.command import ResetFailureCount |
| from gslib.command_argument import CommandArgument |
| from gslib.cs_api_map import ApiSelector |
| from gslib.exception import CommandException |
| from gslib.name_expansion import NameExpansionIterator |
| from gslib.storage_url import StorageUrlFromString |
| from gslib.translation_helper import PreconditionsFromHeaders |
| from gslib.util import GetCloudApiInstance |
| from gslib.util import NO_MAX |
| from gslib.util import Retry |
| from gslib.util import StdinIterator |
| |
| |
| _SYNOPSIS = """ |
| gsutil rm [-f] [-r] url... |
| gsutil rm [-f] [-r] -I |
| """ |
| |
| _DETAILED_HELP_TEXT = (""" |
| <B>SYNOPSIS</B> |
| """ + _SYNOPSIS + """ |
| |
| |
| <B>DESCRIPTION</B> |
| The gsutil rm command removes objects. |
| For example, the command: |
| |
| gsutil rm gs://bucket/subdir/* |
| |
| will remove all objects in gs://bucket/subdir, but not in any of its |
| sub-directories. In contrast: |
| |
| gsutil rm gs://bucket/subdir/** |
| |
| will remove all objects under gs://bucket/subdir or any of its |
| subdirectories. |
| |
| You can also use the -r option to specify recursive object deletion. Thus, for |
| example, either of the following two commands will remove gs://bucket/subdir |
| and all objects and subdirectories under it: |
| |
| gsutil rm gs://bucket/subdir** |
| gsutil rm -r gs://bucket/subdir |
| |
| The -r option will also delete all object versions in the subdirectory for |
| versioning-enabled buckets, whereas the ** command will only delete the live |
| version of each object in the subdirectory. |
| |
| Running gsutil rm -r on a bucket will delete all versions of all objects in |
| the bucket, and then delete the bucket: |
| |
| gsutil rm -r gs://bucket |
| |
| If you want to delete all objects in the bucket, but not the bucket itself, |
| this command will work: |
| |
| gsutil rm gs://bucket/** |
| |
| If you have a large number of objects to remove you might want to use the |
| gsutil -m option, to perform a parallel (multi-threaded/multi-processing) |
| removes: |
| |
| gsutil -m rm -r gs://my_bucket/subdir |
| |
| You can pass a list of URLs (one per line) to remove on stdin instead of as |
| command line arguments by using the -I option. This allows you to use gsutil |
| in a pipeline to remove objects identified by a program, such as: |
| |
| some_program | gsutil -m rm -I |
| |
| The contents of stdin can name cloud URLs and wildcards of cloud URLs. |
| |
| Note that gsutil rm will refuse to remove files from the local |
| file system. For example this will fail: |
| |
| gsutil rm *.txt |
| |
| WARNING: Object removal cannot be undone. Google Cloud Storage is designed |
| to give developers a high amount of flexibility and control over their data, |
| and Google maintains strict controls over the processing and purging of |
| deleted data. To protect yourself from mistakes, you can configure object |
| versioning on your bucket(s). See 'gsutil help versions' for details. |
| |
| |
| <B>DATA RESTORATION FROM ACCIDENTAL DELETION OR OVERWRITES</B> |
| Google Cloud Storage does not provide support for restoring data lost |
| or overwritten due to customer errors. If you have concerns that your |
| application software (or your users) may at some point erroneously delete or |
| overwrite data, you can protect yourself from that risk by enabling Object |
| Versioning (see "gsutil help versioning"). Doing so increases storage costs, |
| which can be partially mitigated by configuring Lifecycle Management to delete |
| older object versions (see "gsutil help lifecycle"). |
| |
| |
| <B>OPTIONS</B> |
| -f Continues silently (without printing error messages) despite |
| errors when removing multiple objects. If some of the objects |
| could not be removed, gsutil's exit status will be non-zero even |
| if this flag is set. This option is implicitly set when running |
| "gsutil -m rm ...". |
| |
| -I Causes gsutil to read the list of objects to remove from stdin. |
| This allows you to run a program that generates the list of |
| objects to remove. |
| |
| -R, -r Causes bucket or bucket subdirectory contents (all objects and |
| subdirectories that it contains) to be removed recursively. If |
| used with a bucket-only URL (like gs://bucket), after deleting |
| objects and subdirectories gsutil will delete the bucket. The -r |
| flag implies the -a flag and will delete all object versions. |
| |
| -a Delete all versions of an object. |
| """) |
| |
| |
| def _RemoveExceptionHandler(cls, e): |
| """Simple exception handler to allow post-completion status.""" |
| if not cls.continue_on_error: |
| cls.logger.error(str(e)) |
| cls.everything_removed_okay = False |
| |
| |
| # pylint: disable=unused-argument |
| def _RemoveFoldersExceptionHandler(cls, e): |
| """When removing folders, we don't mind if none exist.""" |
| if (isinstance(e, CommandException.__class__) and |
| 'No URLs matched' in e.message): |
| pass |
| else: |
| raise e |
| |
| |
| def _RemoveFuncWrapper(cls, name_expansion_result, thread_state=None): |
| cls.RemoveFunc(name_expansion_result, thread_state=thread_state) |
| |
| |
| class RmCommand(Command): |
| """Implementation of gsutil rm command.""" |
| |
| # Command specification. See base class for documentation. |
| command_spec = Command.CreateCommandSpec( |
| 'rm', |
| command_name_aliases=['del', 'delete', 'remove'], |
| usage_synopsis=_SYNOPSIS, |
| min_args=0, |
| max_args=NO_MAX, |
| supported_sub_args='afIrR', |
| file_url_ok=False, |
| provider_url_ok=False, |
| urls_start_arg=0, |
| gs_api_support=[ApiSelector.XML, ApiSelector.JSON], |
| gs_default_api=ApiSelector.JSON, |
| argparse_arguments=[ |
| CommandArgument.MakeZeroOrMoreCloudURLsArgument() |
| ] |
| ) |
| # Help specification. See help_provider.py for documentation. |
| help_spec = Command.HelpSpec( |
| help_name='rm', |
| help_name_aliases=['del', 'delete', 'remove'], |
| help_type='command_help', |
| help_one_line_summary='Remove objects', |
| help_text=_DETAILED_HELP_TEXT, |
| subcommand_help_text={}, |
| ) |
| |
| def RunCommand(self): |
| """Command entry point for the rm command.""" |
| # self.recursion_requested is initialized in command.py (so it can be |
| # checked in parent class for all commands). |
| self.continue_on_error = False |
| self.read_args_from_stdin = False |
| self.all_versions = False |
| if self.sub_opts: |
| for o, unused_a in self.sub_opts: |
| if o == '-a': |
| self.all_versions = True |
| elif o == '-f': |
| self.continue_on_error = True |
| elif o == '-I': |
| self.read_args_from_stdin = True |
| elif o == '-r' or o == '-R': |
| self.recursion_requested = True |
| self.all_versions = True |
| |
| if self.read_args_from_stdin: |
| if self.args: |
| raise CommandException('No arguments allowed with the -I flag.') |
| url_strs = StdinIterator() |
| else: |
| if not self.args: |
| raise CommandException('The rm command (without -I) expects at ' |
| 'least one URL.') |
| url_strs = self.args |
| |
| bucket_urls_to_delete = [] |
| bucket_strings_to_delete = [] |
| if self.recursion_requested: |
| bucket_fields = ['id'] |
| for url_str in url_strs: |
| url = StorageUrlFromString(url_str) |
| if url.IsBucket() or url.IsProvider(): |
| for blr in self.WildcardIterator(url_str).IterBuckets( |
| bucket_fields=bucket_fields): |
| bucket_urls_to_delete.append(blr.storage_url) |
| bucket_strings_to_delete.append(url_str) |
| |
| self.preconditions = PreconditionsFromHeaders(self.headers or {}) |
| |
| # Used to track if any files failed to be removed. |
| self.everything_removed_okay = True |
| |
| try: |
| # Expand wildcards, dirs, buckets, and bucket subdirs in URLs. |
| name_expansion_iterator = NameExpansionIterator( |
| self.command_name, self.debug, self.logger, self.gsutil_api, |
| url_strs, self.recursion_requested, project_id=self.project_id, |
| all_versions=self.all_versions, |
| continue_on_error=self.continue_on_error or self.parallel_operations) |
| |
| # Perform remove requests in parallel (-m) mode, if requested, using |
| # configured number of parallel processes and threads. Otherwise, |
| # perform requests with sequential function calls in current process. |
| self.Apply(_RemoveFuncWrapper, name_expansion_iterator, |
| _RemoveExceptionHandler, |
| fail_on_error=(not self.continue_on_error)) |
| |
| # Assuming the bucket has versioning enabled, url's that don't map to |
| # objects should throw an error even with all_versions, since the prior |
| # round of deletes only sends objects to a history table. |
| # This assumption that rm -a is only called for versioned buckets should be |
| # corrected, but the fix is non-trivial. |
| except CommandException as e: |
| # Don't raise if there are buckets to delete -- it's valid to say: |
| # gsutil rm -r gs://some_bucket |
| # if the bucket is empty. |
| if not bucket_urls_to_delete and not self.continue_on_error: |
| raise |
| # Reset the failure count if we failed due to an empty bucket that we're |
| # going to delete. |
| msg = 'No URLs matched: ' |
| if msg in str(e): |
| parts = str(e).split(msg) |
| if len(parts) == 2 and parts[1] in bucket_strings_to_delete: |
| ResetFailureCount() |
| except ServiceException, e: |
| if not self.continue_on_error: |
| raise |
| |
| if not self.everything_removed_okay and not self.continue_on_error: |
| raise CommandException('Some files could not be removed.') |
| |
| # If this was a gsutil rm -r command covering any bucket subdirs, |
| # remove any dir_$folder$ objects (which are created by various web UI |
| # tools to simulate folders). |
| if self.recursion_requested: |
| had_previous_failures = GetFailureCount() > 0 |
| folder_object_wildcards = [] |
| for url_str in url_strs: |
| url = StorageUrlFromString(url_str) |
| if url.IsObject(): |
| folder_object_wildcards.append('%s**_$folder$' % url_str) |
| if folder_object_wildcards: |
| self.continue_on_error = True |
| try: |
| name_expansion_iterator = NameExpansionIterator( |
| self.command_name, self.debug, |
| self.logger, self.gsutil_api, |
| folder_object_wildcards, self.recursion_requested, |
| project_id=self.project_id, |
| all_versions=self.all_versions) |
| # When we're removing folder objects, always continue on error |
| self.Apply(_RemoveFuncWrapper, name_expansion_iterator, |
| _RemoveFoldersExceptionHandler, |
| fail_on_error=False) |
| except CommandException as e: |
| # Ignore exception from name expansion due to an absent folder file. |
| if not e.reason.startswith('No URLs matched:'): |
| raise |
| if not had_previous_failures: |
| ResetFailureCount() |
| |
| # Now that all data has been deleted, delete any bucket URLs. |
| for url in bucket_urls_to_delete: |
| self.logger.info('Removing %s...', url) |
| |
| @Retry(NotEmptyException, tries=3, timeout_secs=1) |
| def BucketDeleteWithRetry(): |
| self.gsutil_api.DeleteBucket(url.bucket_name, provider=url.scheme) |
| |
| BucketDeleteWithRetry() |
| |
| return 0 |
| |
| def RemoveFunc(self, name_expansion_result, thread_state=None): |
| gsutil_api = GetCloudApiInstance(self, thread_state=thread_state) |
| |
| exp_src_url = name_expansion_result.expanded_storage_url |
| self.logger.info('Removing %s...', exp_src_url) |
| gsutil_api.DeleteObject( |
| exp_src_url.bucket_name, exp_src_url.object_name, |
| preconditions=self.preconditions, generation=exp_src_url.generation, |
| provider=exp_src_url.scheme) |
| |