blob: 64a58ed7e646234ab5135bef3363c7f097b9357f [file] [log] [blame]
// Copyright (C) 2011 The Libphonenumber Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Author: George Yakovlev
// Philippe Liard
#include "phonenumbers/regexp_adapter_re2.h"
#include <cstddef>
#include <string>
#include <re2/re2.h>
#include <re2/stringpiece.h>
#include "phonenumbers/base/basictypes.h"
#include "phonenumbers/base/logging.h"
#include "phonenumbers/stringutil.h"
namespace i18n {
namespace phonenumbers {
using re2::StringPiece;
// Implementation of RegExpInput abstract class.
class RE2RegExpInput : public RegExpInput {
public:
explicit RE2RegExpInput(const string& utf8_input)
: string_(utf8_input),
utf8_input_(string_) {}
virtual string ToString() const {
return utf8_input_.ToString();
}
StringPiece* Data() {
return &utf8_input_;
}
private:
// string_ holds the string referenced by utf8_input_ as StringPiece doesn't
// copy the string passed in.
const string string_;
StringPiece utf8_input_;
};
namespace {
template <typename Function, typename Input>
bool DispatchRE2Call(Function regex_function,
Input input,
const RE2& regexp,
string* out1,
string* out2,
string* out3) {
if (out3) {
return regex_function(input, regexp, out1, out2, out3);
}
if (out2) {
return regex_function(input, regexp, out1, out2);
}
if (out1) {
return regex_function(input, regexp, out1);
}
return regex_function(input, regexp);
}
// Replaces unescaped dollar-signs with backslashes. Backslashes are deleted
// when they escape dollar-signs.
string TransformRegularExpressionToRE2Syntax(const string& regex) {
string re2_regex(regex);
if (GlobalReplaceSubstring("$", "\\", &re2_regex) == 0) {
return regex;
}
// If we replaced a dollar sign with a backslash and there are now two
// backslashes in the string, we assume that the dollar-sign was previously
// escaped and that we need to retain it. To do this, we replace pairs of
// backslashes with a dollar sign.
GlobalReplaceSubstring("\\\\", "$", &re2_regex);
return re2_regex;
}
} // namespace
// Implementation of RegExp abstract class.
class RE2RegExp : public RegExp {
public:
explicit RE2RegExp(const string& utf8_regexp)
: utf8_regexp_(utf8_regexp) {}
virtual bool Consume(RegExpInput* input_string,
bool anchor_at_start,
string* matched_string1,
string* matched_string2,
string* matched_string3) const {
DCHECK(input_string);
StringPiece* utf8_input =
static_cast<RE2RegExpInput*>(input_string)->Data();
if (anchor_at_start) {
return DispatchRE2Call(RE2::Consume, utf8_input, utf8_regexp_,
matched_string1, matched_string2,
matched_string3);
} else {
return DispatchRE2Call(RE2::FindAndConsume, utf8_input, utf8_regexp_,
matched_string1, matched_string2,
matched_string3);
}
}
virtual bool Match(const string& input_string,
bool full_match,
string* matched_string) const {
if (full_match) {
return DispatchRE2Call(RE2::FullMatch, input_string, utf8_regexp_,
matched_string, NULL, NULL);
} else {
return DispatchRE2Call(RE2::PartialMatch, input_string, utf8_regexp_,
matched_string, NULL, NULL);
}
}
virtual bool Replace(string* string_to_process,
bool global,
const string& replacement_string) const {
DCHECK(string_to_process);
const string re2_replacement_string =
TransformRegularExpressionToRE2Syntax(replacement_string);
if (global) {
return RE2::GlobalReplace(string_to_process, utf8_regexp_,
re2_replacement_string);
} else {
return RE2::Replace(string_to_process, utf8_regexp_,
re2_replacement_string);
}
}
private:
RE2 utf8_regexp_;
};
RegExpInput* RE2RegExpFactory::CreateInput(const string& utf8_input) const {
return new RE2RegExpInput(utf8_input);
}
RegExp* RE2RegExpFactory::CreateRegExp(const string& utf8_regexp) const {
return new RE2RegExp(utf8_regexp);
}
} // namespace phonenumbers
} // namespace i18n