| /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. |
| |
| Licensed under the Apache License, Version 2.0 (the "License"); |
| you may not use this file except in compliance with the License. |
| You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| ==============================================================================*/ |
| |
| #include "tensorflow/core/lib/strings/str_util.h" |
| |
| #include <ctype.h> |
| #include <algorithm> |
| #include <cstring> |
| #include <vector> |
| #include "absl/strings/ascii.h" |
| #include "absl/strings/escaping.h" |
| #include "absl/strings/match.h" |
| #include "absl/strings/strip.h" |
| #include "tensorflow/core/lib/strings/stringprintf.h" |
| #include "tensorflow/core/platform/logging.h" |
| |
| namespace tensorflow { |
| namespace str_util { |
| |
| string CEscape(StringPiece src) { return absl::CEscape(src); } |
| |
| bool CUnescape(StringPiece source, string* dest, string* error) { |
| return absl::CUnescape(source, dest, error); |
| } |
| |
| void StripTrailingWhitespace(string* s) { |
| absl::StripTrailingAsciiWhitespace(s); |
| } |
| |
| // Return lower-cased version of s. |
| string Lowercase(StringPiece s) { return absl::AsciiStrToLower(s); } |
| |
| // Return upper-cased version of s. |
| string Uppercase(StringPiece s) { return absl::AsciiStrToUpper(s); } |
| |
| string ArgDefCase(StringPiece s) { |
| const size_t n = s.size(); |
| |
| // Compute the size of resulting string. |
| // Number of extra underscores we will need to add. |
| size_t extra_us = 0; |
| // Number of non-alpha chars in the beginning to skip. |
| size_t to_skip = 0; |
| for (size_t i = 0; i < n; ++i) { |
| // If we are skipping and current letter is non-alpha, skip it as well |
| if (i == to_skip && !isalpha(s[i])) { |
| ++to_skip; |
| continue; |
| } |
| |
| // If we are here, we are not skipping any more. |
| // If this letter is upper case, not the very first char in the |
| // resulting string, and previous letter isn't replaced with an underscore, |
| // we will need to insert an underscore. |
| if (isupper(s[i]) && i != to_skip && i > 0 && isalnum(s[i - 1])) { |
| ++extra_us; |
| } |
| } |
| |
| // Initialize result with all '_'s. There is no string |
| // constructor that does not initialize memory. |
| string result(n + extra_us - to_skip, '_'); |
| // i - index into s |
| // j - index into result |
| for (size_t i = to_skip, j = 0; i < n; ++i, ++j) { |
| DCHECK_LT(j, result.size()); |
| char c = s[i]; |
| // If c is not alphanumeric, we don't need to do anything |
| // since there is already an underscore in its place. |
| if (isalnum(c)) { |
| if (isupper(c)) { |
| // If current char is upper case, we might need to insert an |
| // underscore. |
| if (i != to_skip) { |
| DCHECK_GT(j, 0); |
| if (result[j - 1] != '_') ++j; |
| } |
| result[j] = tolower(c); |
| } else { |
| result[j] = c; |
| } |
| } |
| } |
| |
| return result; |
| } |
| |
| void TitlecaseString(string* s, StringPiece delimiters) { |
| bool upper = true; |
| for (string::iterator ss = s->begin(); ss != s->end(); ++ss) { |
| if (upper) { |
| *ss = toupper(*ss); |
| } |
| upper = (delimiters.find(*ss) != StringPiece::npos); |
| } |
| } |
| |
| string StringReplace(StringPiece s, StringPiece oldsub, StringPiece newsub, |
| bool replace_all) { |
| // TODO(jlebar): We could avoid having to shift data around in the string if |
| // we had a StringPiece::find() overload that searched for a StringPiece. |
| string res(s); |
| size_t pos = 0; |
| while ((pos = res.find(oldsub.data(), pos, oldsub.size())) != string::npos) { |
| res.replace(pos, oldsub.size(), newsub.data(), newsub.size()); |
| pos += newsub.size(); |
| if (oldsub.empty()) { |
| pos++; // Match at the beginning of the text and after every byte |
| } |
| if (!replace_all) { |
| break; |
| } |
| } |
| return res; |
| } |
| |
| size_t RemoveLeadingWhitespace(StringPiece* text) { |
| absl::string_view new_text = absl::StripLeadingAsciiWhitespace(*text); |
| size_t count = text->size() - new_text.size(); |
| *text = new_text; |
| return count; |
| } |
| |
| size_t RemoveTrailingWhitespace(StringPiece* text) { |
| absl::string_view new_text = absl::StripTrailingAsciiWhitespace(*text); |
| size_t count = text->size() - new_text.size(); |
| *text = new_text; |
| return count; |
| } |
| |
| size_t RemoveWhitespaceContext(StringPiece* text) { |
| absl::string_view new_text = absl::StripAsciiWhitespace(*text); |
| size_t count = text->size() - new_text.size(); |
| *text = new_text; |
| return count; |
| } |
| |
| bool ConsumePrefix(StringPiece* s, StringPiece expected) { |
| return absl::ConsumePrefix(s, expected); |
| } |
| |
| bool ConsumeSuffix(StringPiece* s, StringPiece expected) { |
| return absl::ConsumeSuffix(s, expected); |
| } |
| |
| bool ConsumeLeadingDigits(StringPiece* s, uint64* val) { |
| const char* p = s->data(); |
| const char* limit = p + s->size(); |
| uint64 v = 0; |
| while (p < limit) { |
| const char c = *p; |
| if (c < '0' || c > '9') break; |
| uint64 new_v = (v * 10) + (c - '0'); |
| if (new_v / 8 < v) { |
| // Overflow occurred |
| return false; |
| } |
| v = new_v; |
| p++; |
| } |
| if (p > s->data()) { |
| // Consume some digits |
| s->remove_prefix(p - s->data()); |
| *val = v; |
| return true; |
| } else { |
| return false; |
| } |
| } |
| |
| bool ConsumeNonWhitespace(StringPiece* s, StringPiece* val) { |
| const char* p = s->data(); |
| const char* limit = p + s->size(); |
| while (p < limit) { |
| const char c = *p; |
| if (isspace(c)) break; |
| p++; |
| } |
| const size_t n = p - s->data(); |
| if (n > 0) { |
| *val = StringPiece(s->data(), n); |
| s->remove_prefix(n); |
| return true; |
| } else { |
| *val = StringPiece(); |
| return false; |
| } |
| } |
| |
| size_t Strnlen(const char* str, const size_t string_max_len) { |
| size_t len = 0; |
| while (len < string_max_len && str[len] != '\0') { |
| ++len; |
| } |
| return len; |
| } |
| |
| bool StrContains(StringPiece haystack, StringPiece needle) { |
| return absl::StrContains(haystack, needle); |
| } |
| |
| bool StartsWith(StringPiece text, StringPiece prefix) { |
| return absl::StartsWith(text, prefix); |
| } |
| |
| bool EndsWith(StringPiece text, StringPiece suffix) { |
| return absl::EndsWith(text, suffix); |
| } |
| |
| } // namespace str_util |
| } // namespace tensorflow |