Upgrade rust/crates/protobuf to 2.20.0
Test: make
Change-Id: Ib611629af667df0d09ceb4668cb9512d946503db
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
index 2bb6739..97d68b2 100644
--- a/.cargo_vcs_info.json
+++ b/.cargo_vcs_info.json
@@ -1,5 +1,5 @@
{
"git": {
- "sha1": "09619bc3a85243f092e68ab9ba1f2c5573d5a85b"
+ "sha1": "1e5368c80fd4272ada7d2ed7c6be783376bb5080"
}
}
diff --git a/Android.bp b/Android.bp
index 90fd1da..7f83ff2 100644
--- a/Android.bp
+++ b/Android.bp
@@ -9,6 +9,7 @@
rust_library {
name: "libprotobuf",
+ // has rustc warnings
host_supported: true,
crate_name: "protobuf",
srcs: [
@@ -27,4 +28,4 @@
}
// dependent_library ["feature_list"]
-// bytes-0.5.6 "default,std"
+// bytes-1.0.0 "default,std"
diff --git a/Cargo.toml b/Cargo.toml
index f4ed1e8..de24a5d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,7 +13,7 @@
[package]
edition = "2018"
name = "protobuf"
-version = "2.18.1"
+version = "2.20.0"
authors = ["Stepan Koltsov <stepan.koltsov@gmail.com>"]
description = "Rust implementation of Google protocol buffers\n"
homepage = "https://github.com/stepancheg/rust-protobuf/"
@@ -27,7 +27,7 @@
doctest = false
bench = false
[dependencies.bytes]
-version = "0.5"
+version = "1.0"
optional = true
[dependencies.serde]
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
index 3b03c96..0935e0e 100644
--- a/Cargo.toml.orig
+++ b/Cargo.toml.orig
@@ -1,7 +1,7 @@
[package]
name = "protobuf"
-version = "2.18.1"
+version = "2.20.0"
authors = ["Stepan Koltsov <stepan.koltsov@gmail.com>"]
edition = "2018"
license = "MIT"
@@ -21,7 +21,7 @@
with-serde = ["serde", "serde_derive"]
[dependencies]
-bytes = { version = "0.5", optional = true }
+bytes = { version = "1.0", optional = true }
serde = { version = "1.0", optional = true }
serde_derive = { version = "1.0", optional = true }
diff --git a/METADATA b/METADATA
index e6de1b9..59982b1 100644
--- a/METADATA
+++ b/METADATA
@@ -7,13 +7,13 @@
}
url {
type: ARCHIVE
- value: "https://static.crates.io/crates/protobuf/protobuf-2.18.1.crate"
+ value: "https://static.crates.io/crates/protobuf/protobuf-2.20.0.crate"
}
- version: "2.18.1"
+ version: "2.20.0"
license_type: NOTICE
last_upgrade_date {
- year: 2020
- month: 11
- day: 24
+ year: 2021
+ month: 1
+ day: 7
}
}
diff --git a/out/version.rs b/out/version.rs
index 22c110e..239f02e 100644
--- a/out/version.rs
+++ b/out/version.rs
@@ -1,7 +1,7 @@
/// protobuf crate version
-pub const VERSION: &'static str = "2.18.1";
+pub const VERSION: &'static str = "2.20.0";
/// This symbol is used by codegen
#[doc(hidden)]
-pub const VERSION_IDENT: &'static str = "VERSION_2_18_1";
+pub const VERSION_IDENT: &'static str = "VERSION_2_20_0";
/// This symbol can be referenced to assert that proper version of crate is used
-pub const VERSION_2_18_1: () = ();
+pub const VERSION_2_20_0: () = ();
diff --git a/src/buf_read_iter.rs b/src/buf_read_iter.rs
index 8b4dbf5..cf36280 100644
--- a/src/buf_read_iter.rs
+++ b/src/buf_read_iter.rs
@@ -6,6 +6,8 @@
use std::u64;
#[cfg(feature = "bytes")]
+use bytes::buf::UninitSlice;
+#[cfg(feature = "bytes")]
use bytes::BufMut;
#[cfg(feature = "bytes")]
use bytes::Bytes;
@@ -308,7 +310,7 @@
} else {
let mut r = BytesMut::with_capacity(len);
unsafe {
- let buf = Self::slice_get_mut(&mut r.bytes_mut()[..len]);
+ let buf = Self::uninit_slice_as_mut_slice(&mut r.chunk_mut()[..len]);
self.read_exact(buf)?;
r.advance_mut(len);
}
@@ -317,10 +319,10 @@
}
}
- /// Copy-paste of `MaybeUninit::slice_get_mut`
- #[allow(dead_code)] // only used when bytes feature is on
- unsafe fn slice_get_mut<T>(slice: &mut [MaybeUninit<T>]) -> &mut [T] {
- &mut *(slice as *mut [MaybeUninit<T>] as *mut [T])
+ #[cfg(feature = "bytes")]
+ unsafe fn uninit_slice_as_mut_slice(slice: &mut UninitSlice) -> &mut [u8] {
+ use std::slice;
+ slice::from_raw_parts_mut(slice.as_mut_ptr(), slice.len())
}
/// Returns 0 when EOF or limit reached.
diff --git a/src/compiler_plugin.rs b/src/compiler_plugin.rs
index b23f905..e056071 100644
--- a/src/compiler_plugin.rs
+++ b/src/compiler_plugin.rs
@@ -2,7 +2,6 @@
#![doc(hidden)]
use crate::descriptor::FileDescriptorProto;
-use crate::parse_from_reader;
use crate::plugin::*;
use crate::Message;
use std::io::stdin;
@@ -31,7 +30,7 @@
where
F: Fn(&GenRequest) -> Vec<GenResult>,
{
- let req = parse_from_reader::<CodeGeneratorRequest>(&mut stdin()).unwrap();
+ let req = CodeGeneratorRequest::parse_from_reader(&mut stdin()).unwrap();
let result = gen(&GenRequest {
file_descriptors: &req.get_proto_file(),
files_to_generate: &req.get_file_to_generate(),
diff --git a/src/descriptor.rs b/src/descriptor.rs
index a03970a..01dcd81 100644
--- a/src/descriptor.rs
+++ b/src/descriptor.rs
@@ -1,4 +1,4 @@
-// This file is generated by rust-protobuf 2.17.0-pre. Do not edit
+// This file is generated by rust-protobuf 2.19.0-pre. Do not edit
// @generated
// https://github.com/rust-lang/rust-clippy/issues/702
@@ -9727,7 +9727,7 @@
static file_descriptor_proto_lazy: crate::rt::LazyV2<crate::descriptor::FileDescriptorProto> = crate::rt::LazyV2::INIT;
fn parse_descriptor_proto() -> crate::descriptor::FileDescriptorProto {
- crate::parse_from_bytes(file_descriptor_proto_data).unwrap()
+ crate::Message::parse_from_bytes(file_descriptor_proto_data).unwrap()
}
pub fn file_descriptor_proto() -> &'static crate::descriptor::FileDescriptorProto {
diff --git a/src/lib.rs b/src/lib.rs
index f8d1089..da5a756 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -17,9 +17,12 @@
pub use crate::enums::ProtobufEnum;
pub use crate::error::ProtobufError;
pub use crate::error::ProtobufResult;
+#[allow(deprecated)]
pub use crate::message::parse_from_bytes;
#[cfg(feature = "bytes")]
+#[allow(deprecated)]
pub use crate::message::parse_from_carllerche_bytes;
+#[allow(deprecated)]
pub use crate::message::parse_from_reader;
#[allow(deprecated)]
pub use crate::message::parse_length_delimited_from;
diff --git a/src/message.rs b/src/message.rs
index 2f1a1e7..b22ace2 100644
--- a/src/message.rs
+++ b/src/message.rs
@@ -32,6 +32,17 @@
/// Update this message object with fields read from given stream.
fn merge_from(&mut self, is: &mut CodedInputStream) -> ProtobufResult<()>;
+ /// Parse message from stream.
+ fn parse_from(is: &mut CodedInputStream) -> ProtobufResult<Self>
+ where
+ Self: Sized,
+ {
+ let mut r: Self = Message::new();
+ r.merge_from(is)?;
+ r.check_initialized()?;
+ Ok(r)
+ }
+
/// Write message to the stream.
///
/// Sizes of this messages and nested messages must be cached
@@ -85,6 +96,42 @@
self.merge_from(&mut is)
}
+ /// Parse message from reader.
+ /// Parse stops on EOF or when error encountered.
+ fn parse_from_reader(reader: &mut dyn Read) -> ProtobufResult<Self>
+ where
+ Self: Sized,
+ {
+ let mut is = CodedInputStream::new(reader);
+ let r = Message::parse_from(&mut is)?;
+ is.check_eof()?;
+ Ok(r)
+ }
+
+ /// Parse message from byte array.
+ fn parse_from_bytes(bytes: &[u8]) -> ProtobufResult<Self>
+ where
+ Self: Sized,
+ {
+ let mut is = CodedInputStream::from_bytes(bytes);
+ let r = Message::parse_from(&mut is)?;
+ is.check_eof()?;
+ Ok(r)
+ }
+
+ /// Parse message from `Bytes` object.
+ /// Resulting message may share references to the passed bytes object.
+ #[cfg(feature = "bytes")]
+ fn parse_from_carllerche_bytes(bytes: &Bytes) -> ProtobufResult<Self>
+ where
+ Self: Sized,
+ {
+ let mut is = CodedInputStream::from_carllerche_bytes(bytes);
+ let r = Self::parse_from(&mut is)?;
+ is.check_eof()?;
+ Ok(r)
+ }
+
/// Check if all required fields of this object are initialized.
fn check_initialized(&self) -> ProtobufResult<()> {
if !self.is_initialized() {
@@ -213,31 +260,28 @@
m.as_any().downcast_ref::<M>().unwrap()
}
-/// Parse message from stream.
-pub fn parse_from<M: Message>(is: &mut CodedInputStream) -> ProtobufResult<M> {
- let mut r: M = Message::new();
- r.merge_from(is)?;
- r.check_initialized()?;
- Ok(r)
-}
-
/// Parse message from reader.
/// Parse stops on EOF or when error encountered.
+#[deprecated(since = "2.19", note = "Use Message::parse_from_reader instead")]
pub fn parse_from_reader<M: Message>(reader: &mut dyn Read) -> ProtobufResult<M> {
- reader.with_coded_input_stream(|is| parse_from::<M>(is))
+ M::parse_from_reader(reader)
}
/// Parse message from byte array.
+#[deprecated(since = "2.19", note = "Use Message::parse_from_bytes instead")]
pub fn parse_from_bytes<M: Message>(bytes: &[u8]) -> ProtobufResult<M> {
- bytes.with_coded_input_stream(|is| parse_from::<M>(is))
+ M::parse_from_bytes(bytes)
}
/// Parse message from `Bytes` object.
/// Resulting message may share references to the passed bytes object.
#[cfg(feature = "bytes")]
+#[deprecated(
+ since = "2.19",
+ note = "Use Message::parse_from_carllerche_bytes instead"
+)]
pub fn parse_from_carllerche_bytes<M: Message>(bytes: &Bytes) -> ProtobufResult<M> {
- // Call trait explicitly to avoid accidental construction from `&[u8]`
- WithCodedInputStream::with_coded_input_stream(bytes, |is| parse_from::<M>(is))
+ M::parse_from_carllerche_bytes(bytes)
}
/// Parse length-delimited message from stream.
diff --git a/src/plugin.rs b/src/plugin.rs
index e3ac18f..904b0f1 100644
--- a/src/plugin.rs
+++ b/src/plugin.rs
@@ -1,4 +1,4 @@
-// This file is generated by rust-protobuf 2.17.0-pre. Do not edit
+// This file is generated by rust-protobuf 2.19.0-pre. Do not edit
// @generated
// https://github.com/rust-lang/rust-clippy/issues/702
@@ -971,7 +971,7 @@
static file_descriptor_proto_lazy: crate::rt::LazyV2<crate::descriptor::FileDescriptorProto> = crate::rt::LazyV2::INIT;
fn parse_descriptor_proto() -> crate::descriptor::FileDescriptorProto {
- crate::parse_from_bytes(file_descriptor_proto_data).unwrap()
+ crate::Message::parse_from_bytes(file_descriptor_proto_data).unwrap()
}
pub fn file_descriptor_proto() -> &'static crate::descriptor::FileDescriptorProto {
diff --git a/src/repeated.rs b/src/repeated.rs
index 6ceae53..8068d0c 100644
--- a/src/repeated.rs
+++ b/src/repeated.rs
@@ -369,6 +369,24 @@
}
}
+impl<'a, T> IntoIterator for &'a mut RepeatedField<T> {
+ type Item = &'a mut T;
+ type IntoIter = slice::IterMut<'a, T>;
+
+ fn into_iter(self) -> slice::IterMut<'a, T> {
+ self.iter_mut()
+ }
+}
+
+impl<'a, T> IntoIterator for RepeatedField<T> {
+ type Item = T;
+ type IntoIter = vec::IntoIter<T>;
+
+ fn into_iter(self) -> vec::IntoIter<T> {
+ self.into_iter()
+ }
+}
+
impl<T: PartialEq> PartialEq for RepeatedField<T> {
#[inline]
fn eq(&self, other: &RepeatedField<T>) -> bool {
diff --git a/src/rustproto.rs b/src/rustproto.rs
index 9b9a360..cf78d20 100644
--- a/src/rustproto.rs
+++ b/src/rustproto.rs
@@ -1,4 +1,4 @@
-// This file is generated by rust-protobuf 2.17.0-pre. Do not edit
+// This file is generated by rust-protobuf 2.19.0-pre. Do not edit
// @generated
// https://github.com/rust-lang/rust-clippy/issues/702
@@ -185,7 +185,7 @@
static file_descriptor_proto_lazy: crate::rt::LazyV2<crate::descriptor::FileDescriptorProto> = crate::rt::LazyV2::INIT;
fn parse_descriptor_proto() -> crate::descriptor::FileDescriptorProto {
- crate::parse_from_bytes(file_descriptor_proto_data).unwrap()
+ crate::Message::parse_from_bytes(file_descriptor_proto_data).unwrap()
}
pub fn file_descriptor_proto() -> &'static crate::descriptor::FileDescriptorProto {
diff --git a/src/text_format/lexer/float.rs b/src/text_format/lexer/float.rs
new file mode 100644
index 0000000..0d69c09
--- /dev/null
+++ b/src/text_format/lexer/float.rs
@@ -0,0 +1,58 @@
+use std::f64;
+
+#[derive(Debug)]
+pub enum ProtobufFloatParseError {
+ EmptyString,
+ CannotParseFloat,
+}
+
+pub type ProtobufFloatParseResult<T> = Result<T, ProtobufFloatParseError>;
+
+pub const PROTOBUF_NAN: &str = "nan";
+pub const PROTOBUF_INF: &str = "inf";
+
+/// Format float as in protobuf `.proto` files
+pub fn format_protobuf_float(f: f64) -> String {
+ if f.is_nan() {
+ PROTOBUF_NAN.to_owned()
+ } else if f.is_infinite() {
+ if f > 0.0 {
+ format!("{}", PROTOBUF_INF)
+ } else {
+ format!("-{}", PROTOBUF_INF)
+ }
+ } else {
+ // TODO: make sure doesn't lose precision
+ format!("{}", f)
+ }
+}
+
+/// Parse float from `.proto` format
+pub fn parse_protobuf_float(s: &str) -> ProtobufFloatParseResult<f64> {
+ if s.is_empty() {
+ return Err(ProtobufFloatParseError::EmptyString);
+ }
+ if s == PROTOBUF_NAN {
+ return Ok(f64::NAN);
+ }
+ if s == PROTOBUF_INF || s == format!("+{}", PROTOBUF_INF) {
+ return Ok(f64::INFINITY);
+ }
+ if s == format!("-{}", PROTOBUF_INF) {
+ return Ok(f64::NEG_INFINITY);
+ }
+ match s.parse() {
+ Ok(f) => Ok(f),
+ Err(_) => Err(ProtobufFloatParseError::CannotParseFloat),
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ #[test]
+ fn test_format_protobuf_float() {
+ assert_eq!("10", format_protobuf_float(10.0));
+ }
+}
diff --git a/src/text_format/lexer/json_number_lit.rs b/src/text_format/lexer/json_number_lit.rs
new file mode 100644
index 0000000..6394b0e
--- /dev/null
+++ b/src/text_format/lexer/json_number_lit.rs
@@ -0,0 +1,10 @@
+use std::fmt;
+
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct JsonNumberLit(pub(crate) String);
+
+impl fmt::Display for JsonNumberLit {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ fmt::Display::fmt(&self.0, f)
+ }
+}
diff --git a/src/text_format/lexer/lexer_impl.rs b/src/text_format/lexer/lexer_impl.rs
new file mode 100644
index 0000000..f824a7a
--- /dev/null
+++ b/src/text_format/lexer/lexer_impl.rs
@@ -0,0 +1,719 @@
+use std::char;
+use std::convert::TryFrom;
+use std::f64;
+use std::fmt;
+use std::num::ParseFloatError;
+use std::num::ParseIntError;
+
+use super::float;
+use super::loc::Loc;
+use super::loc::FIRST_COL;
+use super::str_lit::StrLit;
+use super::str_lit::StrLitDecodeError;
+use super::token::Token;
+use super::token::TokenWithLocation;
+use super::ParserLanguage;
+use crate::text_format::lexer::JsonNumberLit;
+
+#[derive(Debug)]
+pub enum LexerError {
+ IncorrectInput, // TODO: something better than this
+ UnexpectedEof,
+ ExpectChar(char),
+ ParseIntError,
+ ParseFloatError,
+ IncorrectFloatLit, // TODO: how it is different from ParseFloatError?
+ IncorrectJsonEscape,
+ IncorrectJsonNumber,
+ IncorrectUnicodeChar,
+ ExpectHexDigit,
+ ExpectOctDigit,
+ ExpectDecDigit,
+ StrLitDecodeError(StrLitDecodeError),
+ ExpectedIdent,
+}
+
+impl fmt::Display for LexerError {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match self {
+ LexerError::IncorrectInput => write!(f, "Incorrect input"),
+ LexerError::UnexpectedEof => write!(f, "Unexpected EOF"),
+ LexerError::ExpectChar(c) => write!(f, "Expecting char: {}", c),
+ LexerError::ParseIntError => write!(f, "Parse int error"),
+ LexerError::ParseFloatError => write!(f, "Parse float error"),
+ LexerError::IncorrectFloatLit => write!(f, "Incorrect float literal"),
+ LexerError::IncorrectJsonEscape => write!(f, "Incorrect JSON escape"),
+ LexerError::IncorrectJsonNumber => write!(f, "Incorrect JSON number"),
+ LexerError::IncorrectUnicodeChar => write!(f, "Incorrect Unicode char"),
+ LexerError::ExpectHexDigit => write!(f, "Expecting hex digit"),
+ LexerError::ExpectOctDigit => write!(f, "Expecting oct digit"),
+ LexerError::ExpectDecDigit => write!(f, "Expecting dec digit"),
+ LexerError::StrLitDecodeError(e) => write!(f, "{}", e),
+ LexerError::ExpectedIdent => write!(f, "Expecting identifier"),
+ }
+ }
+}
+
+impl std::error::Error for LexerError {}
+
+pub type LexerResult<T> = Result<T, LexerError>;
+
+impl From<StrLitDecodeError> for LexerError {
+ fn from(e: StrLitDecodeError) -> Self {
+ LexerError::StrLitDecodeError(e)
+ }
+}
+
+impl From<ParseIntError> for LexerError {
+ fn from(_: ParseIntError) -> Self {
+ LexerError::ParseIntError
+ }
+}
+
+impl From<ParseFloatError> for LexerError {
+ fn from(_: ParseFloatError) -> Self {
+ LexerError::ParseFloatError
+ }
+}
+
+impl From<float::ProtobufFloatParseError> for LexerError {
+ fn from(_: float::ProtobufFloatParseError) -> Self {
+ LexerError::IncorrectFloatLit
+ }
+}
+
+#[derive(Copy, Clone)]
+pub struct Lexer<'a> {
+ language: ParserLanguage,
+ input: &'a str,
+ pos: usize,
+ pub loc: Loc,
+}
+
+fn is_letter(c: char) -> bool {
+ c.is_alphabetic() || c == '_'
+}
+
+impl<'a> Lexer<'a> {
+ pub fn new(input: &'a str, language: ParserLanguage) -> Lexer<'a> {
+ Lexer {
+ language,
+ input,
+ pos: 0,
+ loc: Loc::start(),
+ }
+ }
+
+ /// No more chars
+ pub fn eof(&self) -> bool {
+ self.pos == self.input.len()
+ }
+
+ /// Remaining chars
+ fn rem_chars(&self) -> &'a str {
+ &self.input[self.pos..]
+ }
+
+ pub fn lookahead_char_is<P: FnOnce(char) -> bool>(&self, p: P) -> bool {
+ self.lookahead_char().map_or(false, p)
+ }
+
+ fn lookahead_char_is_in(&self, alphabet: &str) -> bool {
+ self.lookahead_char_is(|c| alphabet.contains(c))
+ }
+
+ fn next_char_opt(&mut self) -> Option<char> {
+ let rem = self.rem_chars();
+ if rem.is_empty() {
+ None
+ } else {
+ let mut char_indices = rem.char_indices();
+ let (_, c) = char_indices.next().unwrap();
+ let c_len = char_indices.next().map(|(len, _)| len).unwrap_or(rem.len());
+ self.pos += c_len;
+ if c == '\n' {
+ self.loc.line += 1;
+ self.loc.col = FIRST_COL;
+ } else {
+ self.loc.col += 1;
+ }
+ Some(c)
+ }
+ }
+
+ fn next_char(&mut self) -> LexerResult<char> {
+ self.next_char_opt().ok_or(LexerError::UnexpectedEof)
+ }
+
+ /// Skip whitespaces
+ fn skip_whitespaces(&mut self) {
+ self.take_while(|c| c.is_whitespace());
+ }
+
+ fn skip_c_comment(&mut self) -> LexerResult<()> {
+ if self.skip_if_lookahead_is_str("/*") {
+ let end = "*/";
+ match self.rem_chars().find(end) {
+ None => Err(LexerError::UnexpectedEof),
+ Some(len) => {
+ let new_pos = self.pos + len + end.len();
+ self.skip_to_pos(new_pos);
+ Ok(())
+ }
+ }
+ } else {
+ Ok(())
+ }
+ }
+
+ fn skip_cpp_comment(&mut self) {
+ if self.skip_if_lookahead_is_str("//") {
+ loop {
+ match self.next_char_opt() {
+ Some('\n') | None => break,
+ _ => {}
+ }
+ }
+ }
+ }
+
+ fn skip_sh_comment(&mut self) {
+ if self.skip_if_lookahead_is_str("#") {
+ loop {
+ match self.next_char_opt() {
+ Some('\n') | None => break,
+ _ => {}
+ }
+ }
+ }
+ }
+
+ fn skip_comment(&mut self) -> LexerResult<()> {
+ match self.language {
+ ParserLanguage::Proto => {
+ self.skip_c_comment()?;
+ self.skip_cpp_comment();
+ }
+ ParserLanguage::TextFormat => {
+ self.skip_sh_comment();
+ }
+ ParserLanguage::Json => {}
+ }
+ Ok(())
+ }
+
+ pub fn skip_ws(&mut self) -> LexerResult<()> {
+ loop {
+ let pos = self.pos;
+ self.skip_whitespaces();
+ self.skip_comment()?;
+ if pos == self.pos {
+ // Did not advance
+ return Ok(());
+ }
+ }
+ }
+
+ pub fn take_while<F>(&mut self, f: F) -> &'a str
+ where
+ F: Fn(char) -> bool,
+ {
+ let start = self.pos;
+ while self.lookahead_char().map(&f) == Some(true) {
+ self.next_char_opt().unwrap();
+ }
+ let end = self.pos;
+ &self.input[start..end]
+ }
+
+ fn lookahead_char(&self) -> Option<char> {
+ self.clone().next_char_opt()
+ }
+
+ fn lookahead_is_str(&self, s: &str) -> bool {
+ self.rem_chars().starts_with(s)
+ }
+
+ fn skip_if_lookahead_is_str(&mut self, s: &str) -> bool {
+ if self.lookahead_is_str(s) {
+ let new_pos = self.pos + s.len();
+ self.skip_to_pos(new_pos);
+ true
+ } else {
+ false
+ }
+ }
+
+ fn next_char_if<P>(&mut self, p: P) -> Option<char>
+ where
+ P: FnOnce(char) -> bool,
+ {
+ let mut clone = self.clone();
+ match clone.next_char_opt() {
+ Some(c) if p(c) => {
+ *self = clone;
+ Some(c)
+ }
+ _ => None,
+ }
+ }
+
+ pub fn next_char_if_eq(&mut self, expect: char) -> bool {
+ self.next_char_if(|c| c == expect) != None
+ }
+
+ fn next_char_if_in(&mut self, alphabet: &str) -> Option<char> {
+ for c in alphabet.chars() {
+ if self.next_char_if_eq(c) {
+ return Some(c);
+ }
+ }
+ None
+ }
+
+ fn next_char_expect_eq(&mut self, expect: char) -> LexerResult<()> {
+ if self.next_char_if_eq(expect) {
+ Ok(())
+ } else {
+ Err(LexerError::ExpectChar(expect))
+ }
+ }
+
+ fn next_char_expect<P>(&mut self, expect: P, err: LexerError) -> LexerResult<char>
+ where
+ P: FnOnce(char) -> bool,
+ {
+ self.next_char_if(expect).ok_or(err)
+ }
+
+ // str functions
+
+ /// properly update line and column
+ fn skip_to_pos(&mut self, new_pos: usize) -> &'a str {
+ assert!(new_pos >= self.pos);
+ assert!(new_pos <= self.input.len());
+ let pos = self.pos;
+ while self.pos != new_pos {
+ self.next_char_opt().unwrap();
+ }
+ &self.input[pos..new_pos]
+ }
+
+ // Protobuf grammar
+
+ // char functions
+
+ // letter = "A" … "Z" | "a" … "z"
+ // https://github.com/google/protobuf/issues/4565
+ fn next_letter_opt(&mut self) -> Option<char> {
+ self.next_char_if(is_letter)
+ }
+
+ // capitalLetter = "A" … "Z"
+ fn _next_capital_letter_opt(&mut self) -> Option<char> {
+ self.next_char_if(|c| c >= 'A' && c <= 'Z')
+ }
+
+ fn next_ident_part(&mut self) -> Option<char> {
+ self.next_char_if(|c| c.is_ascii_alphanumeric() || c == '_')
+ }
+
+ // Identifiers
+
+ // ident = letter { letter | decimalDigit | "_" }
+ fn next_ident_opt(&mut self) -> LexerResult<Option<String>> {
+ if let Some(c) = self.next_letter_opt() {
+ let mut ident = String::new();
+ ident.push(c);
+ while let Some(c) = self.next_ident_part() {
+ ident.push(c);
+ }
+ Ok(Some(ident))
+ } else {
+ Ok(None)
+ }
+ }
+
+ // Integer literals
+
+ // hexLit = "0" ( "x" | "X" ) hexDigit { hexDigit }
+ fn next_hex_lit_opt(&mut self) -> LexerResult<Option<u64>> {
+ Ok(
+ if self.skip_if_lookahead_is_str("0x") || self.skip_if_lookahead_is_str("0X") {
+ let s = self.take_while(|c| c.is_ascii_hexdigit());
+ Some(u64::from_str_radix(s, 16)? as u64)
+ } else {
+ None
+ },
+ )
+ }
+
+ // decimalLit = ( "1" … "9" ) { decimalDigit }
+ // octalLit = "0" { octalDigit }
+ fn next_decimal_octal_lit_opt(&mut self) -> LexerResult<Option<u64>> {
+ // do not advance on number parse error
+ let mut clone = self.clone();
+
+ let pos = clone.pos;
+
+ Ok(if clone.next_char_if(|c| c.is_ascii_digit()) != None {
+ clone.take_while(|c| c.is_ascii_digit());
+ let value = clone.input[pos..clone.pos].parse()?;
+ *self = clone;
+ Some(value)
+ } else {
+ None
+ })
+ }
+
+ // hexDigit = "0" … "9" | "A" … "F" | "a" … "f"
+ fn next_hex_digit(&mut self) -> LexerResult<u32> {
+ let mut clone = self.clone();
+ let r = match clone.next_char()? {
+ c if c >= '0' && c <= '9' => c as u32 - b'0' as u32,
+ c if c >= 'A' && c <= 'F' => c as u32 - b'A' as u32 + 10,
+ c if c >= 'a' && c <= 'f' => c as u32 - b'a' as u32 + 10,
+ _ => return Err(LexerError::ExpectHexDigit),
+ };
+ *self = clone;
+ Ok(r)
+ }
+
+ // octalDigit = "0" … "7"
+ fn next_octal_digit(&mut self) -> LexerResult<u32> {
+ self.next_char_expect(|c| c >= '0' && c <= '9', LexerError::ExpectOctDigit)
+ .map(|c| c as u32 - '0' as u32)
+ }
+
+ // decimalDigit = "0" … "9"
+ fn next_decimal_digit(&mut self) -> LexerResult<u32> {
+ self.next_char_expect(|c| c >= '0' && c <= '9', LexerError::ExpectDecDigit)
+ .map(|c| c as u32 - '0' as u32)
+ }
+
+ // decimals = decimalDigit { decimalDigit }
+ fn next_decimal_digits(&mut self) -> LexerResult<()> {
+ self.next_decimal_digit()?;
+ self.take_while(|c| c >= '0' && c <= '9');
+ Ok(())
+ }
+
+ // intLit = decimalLit | octalLit | hexLit
+ pub fn next_int_lit_opt(&mut self) -> LexerResult<Option<u64>> {
+ assert_ne!(ParserLanguage::Json, self.language);
+
+ self.skip_ws()?;
+ if let Some(i) = self.next_hex_lit_opt()? {
+ return Ok(Some(i));
+ }
+ if let Some(i) = self.next_decimal_octal_lit_opt()? {
+ return Ok(Some(i));
+ }
+ Ok(None)
+ }
+
+ // Floating-point literals
+
+ // exponent = ( "e" | "E" ) [ "+" | "-" ] decimals
+ fn next_exponent_opt(&mut self) -> LexerResult<Option<()>> {
+ if self.next_char_if_in("eE") != None {
+ self.next_char_if_in("+-");
+ self.next_decimal_digits()?;
+ Ok(Some(()))
+ } else {
+ Ok(None)
+ }
+ }
+
+ // floatLit = ( decimals "." [ decimals ] [ exponent ] | decimals exponent | "."decimals [ exponent ] ) | "inf" | "nan"
+ fn next_float_lit(&mut self) -> LexerResult<()> {
+ assert_ne!(ParserLanguage::Json, self.language);
+
+ // "inf" and "nan" are handled as part of ident
+ if self.next_char_if_eq('.') {
+ self.next_decimal_digits()?;
+ self.next_exponent_opt()?;
+ } else {
+ self.next_decimal_digits()?;
+ if self.next_char_if_eq('.') {
+ self.next_decimal_digits()?;
+ self.next_exponent_opt()?;
+ } else {
+ if self.next_exponent_opt()? == None {
+ return Err(LexerError::IncorrectFloatLit);
+ }
+ }
+ }
+ Ok(())
+ }
+
+ // String literals
+
+ // charValue = hexEscape | octEscape | charEscape | /[^\0\n\\]/
+ // hexEscape = '\' ( "x" | "X" ) hexDigit hexDigit
+ // https://github.com/google/protobuf/issues/4560
+ // octEscape = '\' octalDigit octalDigit octalDigit
+ // charEscape = '\' ( "a" | "b" | "f" | "n" | "r" | "t" | "v" | '\' | "'" | '"' )
+ // quote = "'" | '"'
+ pub fn next_byte_value(&mut self) -> LexerResult<u8> {
+ match self.next_char()? {
+ '\\' => {
+ match self.next_char()? {
+ '\'' => Ok(b'\''),
+ '"' => Ok(b'"'),
+ '\\' => Ok(b'\\'),
+ 'a' => Ok(b'\x07'),
+ 'b' => Ok(b'\x08'),
+ 'f' => Ok(b'\x0c'),
+ 'n' => Ok(b'\n'),
+ 'r' => Ok(b'\r'),
+ 't' => Ok(b'\t'),
+ 'v' => Ok(b'\x0b'),
+ 'x' => {
+ let d1 = self.next_hex_digit()? as u8;
+ let d2 = self.next_hex_digit()? as u8;
+ Ok(((d1 << 4) | d2) as u8)
+ }
+ d if d >= '0' && d <= '7' => {
+ let mut r = d as u8 - b'0';
+ for _ in 0..2 {
+ match self.next_octal_digit() {
+ Err(_) => break,
+ Ok(d) => r = (r << 3) + d as u8,
+ }
+ }
+ Ok(r)
+ }
+ // https://github.com/google/protobuf/issues/4562
+ // TODO: overflow
+ c => Ok(c as u8),
+ }
+ }
+ '\n' | '\0' => Err(LexerError::IncorrectInput),
+ // TODO: check overflow
+ c => Ok(c as u8),
+ }
+ }
+
+ fn char_try_from(i: u32) -> LexerResult<char> {
+ char::try_from(i).map_err(|_| LexerError::IncorrectUnicodeChar)
+ }
+
+ pub fn next_json_char_value(&mut self) -> LexerResult<char> {
+ match self.next_char()? {
+ '\\' => match self.next_char()? {
+ '"' => Ok('"'),
+ '\'' => Ok('\''),
+ '\\' => Ok('\\'),
+ '/' => Ok('/'),
+ 'b' => Ok('\x08'),
+ 'f' => Ok('\x0c'),
+ 'n' => Ok('\n'),
+ 'r' => Ok('\r'),
+ 't' => Ok('\t'),
+ 'u' => {
+ let mut v = 0;
+ for _ in 0..4 {
+ let digit = self.next_hex_digit()?;
+ v = v * 16 + digit;
+ }
+ Self::char_try_from(v)
+ }
+ _ => Err(LexerError::IncorrectJsonEscape),
+ },
+ c => Ok(c),
+ }
+ }
+
+ // https://github.com/google/protobuf/issues/4564
+ // strLit = ( "'" { charValue } "'" ) | ( '"' { charValue } '"' )
+ fn next_str_lit_raw(&mut self) -> LexerResult<String> {
+ let mut raw = String::new();
+
+ let mut first = true;
+ loop {
+ if !first {
+ self.skip_ws()?;
+ }
+
+ let start = self.pos;
+
+ let q = match self.next_char_if_in("'\"") {
+ Some(q) => q,
+ None if !first => break,
+ None => return Err(LexerError::IncorrectInput),
+ };
+ first = false;
+ while self.lookahead_char() != Some(q) {
+ self.next_byte_value()?;
+ }
+ self.next_char_expect_eq(q)?;
+
+ raw.push_str(&self.input[start + 1..self.pos - 1]);
+ }
+ Ok(raw)
+ }
+
+ fn next_str_lit_raw_opt(&mut self) -> LexerResult<Option<String>> {
+ if self.lookahead_char_is_in("'\"") {
+ Ok(Some(self.next_str_lit_raw()?))
+ } else {
+ Ok(None)
+ }
+ }
+
+ /// Parse next token as JSON number
+ fn next_json_number_opt(&mut self) -> LexerResult<Option<JsonNumberLit>> {
+ assert_eq!(ParserLanguage::Json, self.language);
+
+ fn is_digit(c: char) -> bool {
+ c >= '0' && c <= '9'
+ }
+
+ fn is_digit_1_9(c: char) -> bool {
+ c >= '1' && c <= '9'
+ }
+
+ if !self.lookahead_char_is_in("-0123456789") {
+ return Ok(None);
+ }
+
+ let mut s = String::new();
+ if self.next_char_if_eq('-') {
+ s.push('-');
+ }
+
+ if self.next_char_if_eq('0') {
+ s.push('0');
+ } else {
+ s.push(self.next_char_expect(is_digit_1_9, LexerError::IncorrectJsonNumber)?);
+ while let Some(c) = self.next_char_if(is_digit) {
+ s.push(c);
+ }
+ }
+
+ if self.next_char_if_eq('.') {
+ s.push('.');
+ s.push(self.next_char_expect(is_digit, LexerError::IncorrectJsonNumber)?);
+ while let Some(c) = self.next_char_if(is_digit) {
+ s.push(c);
+ }
+ }
+
+ if let Some(c) = self.next_char_if_in("eE") {
+ s.push(c);
+ if let Some(c) = self.next_char_if_in("+-") {
+ s.push(c);
+ }
+ s.push(self.next_char_expect(is_digit_1_9, LexerError::IncorrectJsonNumber)?);
+ while let Some(c) = self.next_char_if(is_digit) {
+ s.push(c);
+ }
+ }
+
+ Ok(Some(JsonNumberLit(s)))
+ }
+
+ fn next_token_inner(&mut self) -> LexerResult<Token> {
+ if self.language == ParserLanguage::Json {
+ if let Some(v) = self.next_json_number_opt()? {
+ return Ok(Token::JsonNumber(v));
+ }
+ }
+
+ if let Some(ident) = self.next_ident_opt()? {
+ let token = if self.language != ParserLanguage::Json && ident == float::PROTOBUF_NAN {
+ Token::FloatLit(f64::NAN)
+ } else if self.language != ParserLanguage::Json && ident == float::PROTOBUF_INF {
+ Token::FloatLit(f64::INFINITY)
+ } else {
+ Token::Ident(ident.to_owned())
+ };
+ return Ok(token);
+ }
+
+ if self.language != ParserLanguage::Json {
+ let mut clone = self.clone();
+ let pos = clone.pos;
+ if let Ok(_) = clone.next_float_lit() {
+ let f = float::parse_protobuf_float(&self.input[pos..clone.pos])?;
+ *self = clone;
+ return Ok(Token::FloatLit(f));
+ }
+
+ if let Some(lit) = self.next_int_lit_opt()? {
+ return Ok(Token::IntLit(lit));
+ }
+ }
+
+ if let Some(escaped) = self.next_str_lit_raw_opt()? {
+ return Ok(Token::StrLit(StrLit { escaped }));
+ }
+
+ // This branch must be after str lit
+ if let Some(c) = self.next_char_if(|c| c.is_ascii_punctuation()) {
+ return Ok(Token::Symbol(c));
+ }
+
+ if let Some(ident) = self.next_ident_opt()? {
+ return Ok(Token::Ident(ident));
+ }
+
+ Err(LexerError::IncorrectInput)
+ }
+
+ pub fn next_token(&mut self) -> LexerResult<Option<TokenWithLocation>> {
+ self.skip_ws()?;
+ let loc = self.loc;
+
+ Ok(if self.eof() {
+ None
+ } else {
+ let token = self.next_token_inner()?;
+ // Skip whitespace here to update location
+ // to the beginning of the next token
+ self.skip_ws()?;
+ Some(TokenWithLocation { token, loc })
+ })
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ fn lex<P, R>(input: &str, parse_what: P) -> R
+ where
+ P: FnOnce(&mut Lexer) -> LexerResult<R>,
+ {
+ let mut lexer = Lexer::new(input, ParserLanguage::Proto);
+ let r = parse_what(&mut lexer).expect(&format!("lexer failed at {}", lexer.loc));
+ assert!(lexer.eof(), "check eof failed at {}", lexer.loc);
+ r
+ }
+
+ fn lex_opt<P, R>(input: &str, parse_what: P) -> R
+ where
+ P: FnOnce(&mut Lexer) -> LexerResult<Option<R>>,
+ {
+ let mut lexer = Lexer::new(input, ParserLanguage::Proto);
+ let o = parse_what(&mut lexer).expect(&format!("lexer failed at {}", lexer.loc));
+ let r = o.expect(&format!("lexer returned none at {}", lexer.loc));
+ assert!(lexer.eof(), "check eof failed at {}", lexer.loc);
+ r
+ }
+
+ #[test]
+ fn test_lexer_int_lit() {
+ let msg = r#"10"#;
+ let mess = lex_opt(msg, |p| p.next_int_lit_opt());
+ assert_eq!(10, mess);
+ }
+
+ #[test]
+ fn test_lexer_float_lit() {
+ let msg = r#"12.3"#;
+ let mess = lex(msg, |p| p.next_token_inner());
+ assert_eq!(Token::FloatLit(12.3), mess);
+ }
+}
diff --git a/src/text_format/lexer/loc.rs b/src/text_format/lexer/loc.rs
new file mode 100644
index 0000000..ea3fc1a
--- /dev/null
+++ b/src/text_format/lexer/loc.rs
@@ -0,0 +1,28 @@
+use std::fmt;
+
+pub const FIRST_LINE: u32 = 1;
+pub const FIRST_COL: u32 = 1;
+
+/// Location in file
+#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
+pub struct Loc {
+ /// 1-based
+ pub line: u32,
+ /// 1-based
+ pub col: u32,
+}
+
+impl fmt::Display for Loc {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "{}:{}", self.line, self.col)
+ }
+}
+
+impl Loc {
+ pub fn start() -> Loc {
+ Loc {
+ line: FIRST_LINE,
+ col: FIRST_COL,
+ }
+ }
+}
diff --git a/src/text_format/lexer/mod.rs b/src/text_format/lexer/mod.rs
new file mode 100644
index 0000000..a7ed884
--- /dev/null
+++ b/src/text_format/lexer/mod.rs
@@ -0,0 +1,21 @@
+//! Implementation of lexer for both protobuf parser and for text format parser.
+
+pub mod float;
+mod json_number_lit;
+mod lexer_impl;
+mod loc;
+mod num_lit;
+mod parser_language;
+mod str_lit;
+mod token;
+
+pub use self::json_number_lit::JsonNumberLit;
+pub use self::lexer_impl::Lexer;
+pub use self::lexer_impl::LexerError;
+pub use self::loc::Loc;
+pub use self::num_lit::NumLit;
+pub use self::parser_language::ParserLanguage;
+pub use self::str_lit::StrLit;
+pub use self::str_lit::StrLitDecodeError;
+pub use self::token::Token;
+pub use self::token::TokenWithLocation;
diff --git a/src/text_format/lexer/num_lit.rs b/src/text_format/lexer/num_lit.rs
new file mode 100644
index 0000000..cc64cc4
--- /dev/null
+++ b/src/text_format/lexer/num_lit.rs
@@ -0,0 +1,5 @@
+#[derive(Copy, Clone)]
+pub enum NumLit {
+ U64(u64),
+ F64(f64),
+}
diff --git a/src/text_format/lexer/parser_language.rs b/src/text_format/lexer/parser_language.rs
new file mode 100644
index 0000000..e356571
--- /dev/null
+++ b/src/text_format/lexer/parser_language.rs
@@ -0,0 +1,10 @@
+/// We use the same lexer/tokenizer for all parsers for simplicity
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+pub enum ParserLanguage {
+ // `.proto` files
+ Proto,
+ // Protobuf text format
+ TextFormat,
+ // JSON
+ Json,
+}
diff --git a/src/text_format/lexer/str_lit.rs b/src/text_format/lexer/str_lit.rs
new file mode 100644
index 0000000..caa98f1
--- /dev/null
+++ b/src/text_format/lexer/str_lit.rs
@@ -0,0 +1,85 @@
+use super::lexer_impl::Lexer;
+use super::lexer_impl::LexerError;
+use crate::text_format::lexer::ParserLanguage;
+use std::fmt;
+use std::string::FromUtf8Error;
+
+#[derive(Debug)]
+pub enum StrLitDecodeError {
+ FromUtf8Error(FromUtf8Error),
+ // TODO: be more specific
+ OtherError,
+}
+
+impl fmt::Display for StrLitDecodeError {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match self {
+ StrLitDecodeError::FromUtf8Error(e) => write!(f, "{}", e),
+ StrLitDecodeError::OtherError => write!(f, "String literal decode error"),
+ }
+ }
+}
+
+impl std::error::Error for StrLitDecodeError {}
+
+impl From<LexerError> for StrLitDecodeError {
+ fn from(_: LexerError) -> Self {
+ StrLitDecodeError::OtherError
+ }
+}
+
+impl From<FromUtf8Error> for StrLitDecodeError {
+ fn from(e: FromUtf8Error) -> Self {
+ StrLitDecodeError::FromUtf8Error(e)
+ }
+}
+
+pub type StrLitDecodeResult<T> = Result<T, StrLitDecodeError>;
+
+/// String literal, both `string` and `bytes`.
+#[derive(Clone, Eq, PartialEq, Debug)]
+pub struct StrLit {
+ pub escaped: String,
+}
+
+impl StrLit {
+ /// May fail if not valid UTF8
+ pub fn decode_utf8(&self) -> StrLitDecodeResult<String> {
+ let mut lexer = Lexer::new(&self.escaped, ParserLanguage::Json);
+ let mut r = Vec::new();
+ while !lexer.eof() {
+ r.push(lexer.next_byte_value()?);
+ }
+ Ok(String::from_utf8(r)?)
+ }
+
+ pub fn decode_bytes(&self) -> StrLitDecodeResult<Vec<u8>> {
+ let mut lexer = Lexer::new(&self.escaped, ParserLanguage::Json);
+ let mut r = Vec::new();
+ while !lexer.eof() {
+ r.push(lexer.next_byte_value()?);
+ }
+ Ok(r)
+ }
+
+ pub fn quoted(&self) -> String {
+ format!("\"{}\"", self.escaped)
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use crate::text_format::lexer::StrLit;
+
+ #[test]
+ fn decode_utf8() {
+ assert_eq!(
+ "\u{1234}".to_owned(),
+ StrLit {
+ escaped: "\\341\\210\\264".to_owned()
+ }
+ .decode_utf8()
+ .unwrap()
+ )
+ }
+}
diff --git a/src/text_format/lexer/token.rs b/src/text_format/lexer/token.rs
new file mode 100644
index 0000000..55d931a
--- /dev/null
+++ b/src/text_format/lexer/token.rs
@@ -0,0 +1,47 @@
+use super::lexer_impl::LexerError;
+use super::lexer_impl::LexerResult;
+use super::loc::Loc;
+use super::num_lit::NumLit;
+use super::str_lit::StrLit;
+use crate::text_format::lexer::JsonNumberLit;
+
+#[derive(Clone, Debug, PartialEq)]
+pub enum Token {
+ Ident(String),
+ Symbol(char),
+ // Protobuf tokenizer has separate tokens for int and float.
+ // Tokens do not include sign.
+ IntLit(u64),
+ FloatLit(f64),
+ JsonNumber(JsonNumberLit),
+ // including quotes
+ StrLit(StrLit),
+}
+
+impl Token {
+ /// Back to original
+ pub fn format(&self) -> String {
+ match self {
+ &Token::Ident(ref s) => s.clone(),
+ &Token::Symbol(c) => c.to_string(),
+ &Token::IntLit(ref i) => i.to_string(),
+ &Token::StrLit(ref s) => s.quoted(),
+ &Token::FloatLit(ref f) => f.to_string(),
+ &Token::JsonNumber(ref f) => f.to_string(),
+ }
+ }
+
+ pub fn to_num_lit(&self) -> LexerResult<NumLit> {
+ match self {
+ &Token::IntLit(i) => Ok(NumLit::U64(i)),
+ &Token::FloatLit(f) => Ok(NumLit::F64(f)),
+ _ => Err(LexerError::IncorrectInput),
+ }
+ }
+}
+
+#[derive(Clone)]
+pub struct TokenWithLocation {
+ pub token: Token,
+ pub loc: Loc,
+}
diff --git a/src/text_format.rs b/src/text_format/mod.rs
similarity index 87%
rename from src/text_format.rs
rename to src/text_format/mod.rs
index 3cbe13d..7af03e4 100644
--- a/src/text_format.rs
+++ b/src/text_format/mod.rs
@@ -26,37 +26,20 @@
use std::fmt;
use std::fmt::Write;
-fn quote_bytes_to(bytes: &[u8], buf: &mut String) {
- for &c in bytes {
- match c {
- b'\n' => buf.push_str(r"\n"),
- b'\r' => buf.push_str(r"\r"),
- b'\t' => buf.push_str(r"\t"),
- b'"' => buf.push_str("\\\""),
- b'\\' => buf.push_str(r"\\"),
- b'\x20'..=b'\x7e' => buf.push(c as char),
- _ => {
- buf.push('\\');
- buf.push((b'0' + (c >> 6)) as char);
- buf.push((b'0' + ((c >> 3) & 7)) as char);
- buf.push((b'0' + (c & 7)) as char);
- }
- }
- }
-}
+mod print;
-fn quote_escape_bytes_to(bytes: &[u8], buf: &mut String) {
- buf.push('"');
- quote_bytes_to(bytes, buf);
- buf.push('"');
-}
-
+// Used by text format parser and by pure-rust codegen parsed
+// this it is public but hidden module.
+// https://github.com/rust-lang/rust/issues/44663
#[doc(hidden)]
-pub fn quote_escape_bytes(bytes: &[u8]) -> String {
- let mut r = String::new();
- quote_escape_bytes_to(bytes, &mut r);
- r
-}
+pub mod lexer;
+
+use self::print::print_str_to;
+#[doc(hidden)]
+pub use self::print::quote_bytes_to;
+#[doc(hidden)]
+pub use self::print::quote_escape_bytes;
+use crate::text_format::print::quote_escape_bytes_to;
#[doc(hidden)]
pub fn unescape_string(string: &str) -> Vec<u8> {
@@ -127,11 +110,6 @@
}
}
-fn print_str_to(s: &str, buf: &mut String) {
- // TODO: keep printable Unicode
- quote_escape_bytes_to(s.as_bytes(), buf);
-}
-
fn do_indent(buf: &mut String, pretty: bool, indent: usize) {
if pretty && indent > 0 {
for _ in 0..indent {
@@ -305,7 +283,7 @@
fn test_print_to_bytes() {
assert_eq!("ab", escape(b"ab"));
assert_eq!("a\\\\023", escape(b"a\\023"));
- assert_eq!("a\\r\\n\\t '\\\"\\\\", escape(b"a\r\n\t '\"\\"));
+ assert_eq!("a\\r\\n\\t \\'\\\"\\\\", escape(b"a\r\n\t '\"\\"));
assert_eq!("\\344\\275\\240\\345\\245\\275", escape("你好".as_bytes()));
}
diff --git a/src/text_format/print.rs b/src/text_format/print.rs
new file mode 100644
index 0000000..397e8cd
--- /dev/null
+++ b/src/text_format/print.rs
@@ -0,0 +1,38 @@
+#[doc(hidden)]
+pub fn quote_bytes_to(bytes: &[u8], buf: &mut String) {
+ for &c in bytes {
+ match c {
+ b'\n' => buf.push_str(r"\n"),
+ b'\r' => buf.push_str(r"\r"),
+ b'\t' => buf.push_str(r"\t"),
+ b'\'' => buf.push_str("\\\'"),
+ b'"' => buf.push_str("\\\""),
+ b'\\' => buf.push_str(r"\\"),
+ b'\x20'..=b'\x7e' => buf.push(c as char),
+ _ => {
+ buf.push('\\');
+ buf.push((b'0' + (c >> 6)) as char);
+ buf.push((b'0' + ((c >> 3) & 7)) as char);
+ buf.push((b'0' + (c & 7)) as char);
+ }
+ }
+ }
+}
+
+pub(crate) fn quote_escape_bytes_to(bytes: &[u8], buf: &mut String) {
+ buf.push('"');
+ quote_bytes_to(bytes, buf);
+ buf.push('"');
+}
+
+#[doc(hidden)]
+pub fn quote_escape_bytes(bytes: &[u8]) -> String {
+ let mut r = String::new();
+ quote_escape_bytes_to(bytes, &mut r);
+ r
+}
+
+pub(crate) fn print_str_to(s: &str, buf: &mut String) {
+ // TODO: keep printable Unicode
+ quote_escape_bytes_to(s.as_bytes(), buf);
+}
diff --git a/src/types.rs b/src/types.rs
index 2308dee..bcfdd8f 100644
--- a/src/types.rs
+++ b/src/types.rs
@@ -11,7 +11,6 @@
use crate::enums::ProtobufEnum;
use crate::error::ProtobufResult;
use crate::message::Message;
-use crate::parse_from_bytes;
use crate::reflect::ProtobufValue;
use crate::rt;
use crate::stream::CodedInputStream;
@@ -661,7 +660,7 @@
.iter()
.rev()
.next()
- .map(|bytes| parse_from_bytes(bytes).expect("cannot parse message"))
+ .map(|bytes| M::parse_from_bytes(bytes).expect("cannot parse message"))
}
fn compute_size(value: &M) -> u32 {
diff --git a/src/unknown.rs b/src/unknown.rs
index 514379c..241fb3c 100644
--- a/src/unknown.rs
+++ b/src/unknown.rs
@@ -243,6 +243,13 @@
self.find_field(&number).add_value(value);
}
+ /// Remove unknown field by number
+ pub fn remove(&mut self, field_number: u32) {
+ if let Some(fields) = &mut self.fields {
+ fields.remove(&field_number);
+ }
+ }
+
/// Iterate over all unknowns
pub fn iter<'s>(&'s self) -> UnknownFieldsIter<'s> {
UnknownFieldsIter {
diff --git a/src/well_known_types/any.rs b/src/well_known_types/any.rs
index a2a2c1e..58c62a3 100644
--- a/src/well_known_types/any.rs
+++ b/src/well_known_types/any.rs
@@ -1,4 +1,4 @@
-// This file is generated by rust-protobuf 2.17.0-pre. Do not edit
+// This file is generated by rust-protobuf 2.19.0-pre. Do not edit
// @generated
// https://github.com/rust-lang/rust-clippy/issues/702
@@ -349,7 +349,7 @@
static file_descriptor_proto_lazy: crate::rt::LazyV2<crate::descriptor::FileDescriptorProto> = crate::rt::LazyV2::INIT;
fn parse_descriptor_proto() -> crate::descriptor::FileDescriptorProto {
- crate::parse_from_bytes(file_descriptor_proto_data).unwrap()
+ crate::Message::parse_from_bytes(file_descriptor_proto_data).unwrap()
}
pub fn file_descriptor_proto() -> &'static crate::descriptor::FileDescriptorProto {
diff --git a/src/well_known_types/api.rs b/src/well_known_types/api.rs
index 188f4b3..3bdd79b 100644
--- a/src/well_known_types/api.rs
+++ b/src/well_known_types/api.rs
@@ -1,4 +1,4 @@
-// This file is generated by rust-protobuf 2.17.0-pre. Do not edit
+// This file is generated by rust-protobuf 2.19.0-pre. Do not edit
// @generated
// https://github.com/rust-lang/rust-clippy/issues/702
@@ -1269,7 +1269,7 @@
static file_descriptor_proto_lazy: crate::rt::LazyV2<crate::descriptor::FileDescriptorProto> = crate::rt::LazyV2::INIT;
fn parse_descriptor_proto() -> crate::descriptor::FileDescriptorProto {
- crate::parse_from_bytes(file_descriptor_proto_data).unwrap()
+ crate::Message::parse_from_bytes(file_descriptor_proto_data).unwrap()
}
pub fn file_descriptor_proto() -> &'static crate::descriptor::FileDescriptorProto {
diff --git a/src/well_known_types/duration.rs b/src/well_known_types/duration.rs
index 623c19f..4c126db 100644
--- a/src/well_known_types/duration.rs
+++ b/src/well_known_types/duration.rs
@@ -1,4 +1,4 @@
-// This file is generated by rust-protobuf 2.17.0-pre. Do not edit
+// This file is generated by rust-protobuf 2.19.0-pre. Do not edit
// @generated
// https://github.com/rust-lang/rust-clippy/issues/702
@@ -313,7 +313,7 @@
static file_descriptor_proto_lazy: crate::rt::LazyV2<crate::descriptor::FileDescriptorProto> = crate::rt::LazyV2::INIT;
fn parse_descriptor_proto() -> crate::descriptor::FileDescriptorProto {
- crate::parse_from_bytes(file_descriptor_proto_data).unwrap()
+ crate::Message::parse_from_bytes(file_descriptor_proto_data).unwrap()
}
pub fn file_descriptor_proto() -> &'static crate::descriptor::FileDescriptorProto {
diff --git a/src/well_known_types/empty.rs b/src/well_known_types/empty.rs
index 022f303..a62ac28 100644
--- a/src/well_known_types/empty.rs
+++ b/src/well_known_types/empty.rs
@@ -1,4 +1,4 @@
-// This file is generated by rust-protobuf 2.17.0-pre. Do not edit
+// This file is generated by rust-protobuf 2.19.0-pre. Do not edit
// @generated
// https://github.com/rust-lang/rust-clippy/issues/702
@@ -198,7 +198,7 @@
static file_descriptor_proto_lazy: crate::rt::LazyV2<crate::descriptor::FileDescriptorProto> = crate::rt::LazyV2::INIT;
fn parse_descriptor_proto() -> crate::descriptor::FileDescriptorProto {
- crate::parse_from_bytes(file_descriptor_proto_data).unwrap()
+ crate::Message::parse_from_bytes(file_descriptor_proto_data).unwrap()
}
pub fn file_descriptor_proto() -> &'static crate::descriptor::FileDescriptorProto {
diff --git a/src/well_known_types/field_mask.rs b/src/well_known_types/field_mask.rs
index 37c03f4..c9db6c0 100644
--- a/src/well_known_types/field_mask.rs
+++ b/src/well_known_types/field_mask.rs
@@ -1,4 +1,4 @@
-// This file is generated by rust-protobuf 2.17.0-pre. Do not edit
+// This file is generated by rust-protobuf 2.19.0-pre. Do not edit
// @generated
// https://github.com/rust-lang/rust-clippy/issues/702
@@ -369,7 +369,7 @@
static file_descriptor_proto_lazy: crate::rt::LazyV2<crate::descriptor::FileDescriptorProto> = crate::rt::LazyV2::INIT;
fn parse_descriptor_proto() -> crate::descriptor::FileDescriptorProto {
- crate::parse_from_bytes(file_descriptor_proto_data).unwrap()
+ crate::Message::parse_from_bytes(file_descriptor_proto_data).unwrap()
}
pub fn file_descriptor_proto() -> &'static crate::descriptor::FileDescriptorProto {
diff --git a/src/well_known_types/source_context.rs b/src/well_known_types/source_context.rs
index 9583d89..f22391d 100644
--- a/src/well_known_types/source_context.rs
+++ b/src/well_known_types/source_context.rs
@@ -1,4 +1,4 @@
-// This file is generated by rust-protobuf 2.17.0-pre. Do not edit
+// This file is generated by rust-protobuf 2.19.0-pre. Do not edit
// @generated
// https://github.com/rust-lang/rust-clippy/issues/702
@@ -239,7 +239,7 @@
static file_descriptor_proto_lazy: crate::rt::LazyV2<crate::descriptor::FileDescriptorProto> = crate::rt::LazyV2::INIT;
fn parse_descriptor_proto() -> crate::descriptor::FileDescriptorProto {
- crate::parse_from_bytes(file_descriptor_proto_data).unwrap()
+ crate::Message::parse_from_bytes(file_descriptor_proto_data).unwrap()
}
pub fn file_descriptor_proto() -> &'static crate::descriptor::FileDescriptorProto {
diff --git a/src/well_known_types/struct_pb.rs b/src/well_known_types/struct_pb.rs
index 0045a09..70db8f3 100644
--- a/src/well_known_types/struct_pb.rs
+++ b/src/well_known_types/struct_pb.rs
@@ -1,4 +1,4 @@
-// This file is generated by rust-protobuf 2.17.0-pre. Do not edit
+// This file is generated by rust-protobuf 2.19.0-pre. Do not edit
// @generated
// https://github.com/rust-lang/rust-clippy/issues/702
@@ -999,7 +999,7 @@
static file_descriptor_proto_lazy: crate::rt::LazyV2<crate::descriptor::FileDescriptorProto> = crate::rt::LazyV2::INIT;
fn parse_descriptor_proto() -> crate::descriptor::FileDescriptorProto {
- crate::parse_from_bytes(file_descriptor_proto_data).unwrap()
+ crate::Message::parse_from_bytes(file_descriptor_proto_data).unwrap()
}
pub fn file_descriptor_proto() -> &'static crate::descriptor::FileDescriptorProto {
diff --git a/src/well_known_types/timestamp.rs b/src/well_known_types/timestamp.rs
index ef76de4..9c72377 100644
--- a/src/well_known_types/timestamp.rs
+++ b/src/well_known_types/timestamp.rs
@@ -1,4 +1,4 @@
-// This file is generated by rust-protobuf 2.17.0-pre. Do not edit
+// This file is generated by rust-protobuf 2.19.0-pre. Do not edit
// @generated
// https://github.com/rust-lang/rust-clippy/issues/702
@@ -316,7 +316,7 @@
static file_descriptor_proto_lazy: crate::rt::LazyV2<crate::descriptor::FileDescriptorProto> = crate::rt::LazyV2::INIT;
fn parse_descriptor_proto() -> crate::descriptor::FileDescriptorProto {
- crate::parse_from_bytes(file_descriptor_proto_data).unwrap()
+ crate::Message::parse_from_bytes(file_descriptor_proto_data).unwrap()
}
pub fn file_descriptor_proto() -> &'static crate::descriptor::FileDescriptorProto {
diff --git a/src/well_known_types/type_pb.rs b/src/well_known_types/type_pb.rs
index 3150134..556457e 100644
--- a/src/well_known_types/type_pb.rs
+++ b/src/well_known_types/type_pb.rs
@@ -1,4 +1,4 @@
-// This file is generated by rust-protobuf 2.17.0-pre. Do not edit
+// This file is generated by rust-protobuf 2.19.0-pre. Do not edit
// @generated
// https://github.com/rust-lang/rust-clippy/issues/702
@@ -2213,7 +2213,7 @@
static file_descriptor_proto_lazy: crate::rt::LazyV2<crate::descriptor::FileDescriptorProto> = crate::rt::LazyV2::INIT;
fn parse_descriptor_proto() -> crate::descriptor::FileDescriptorProto {
- crate::parse_from_bytes(file_descriptor_proto_data).unwrap()
+ crate::Message::parse_from_bytes(file_descriptor_proto_data).unwrap()
}
pub fn file_descriptor_proto() -> &'static crate::descriptor::FileDescriptorProto {
diff --git a/src/well_known_types/wrappers.rs b/src/well_known_types/wrappers.rs
index 0ec90b2..65e77de 100644
--- a/src/well_known_types/wrappers.rs
+++ b/src/well_known_types/wrappers.rs
@@ -1,4 +1,4 @@
-// This file is generated by rust-protobuf 2.17.0-pre. Do not edit
+// This file is generated by rust-protobuf 2.19.0-pre. Do not edit
// @generated
// https://github.com/rust-lang/rust-clippy/issues/702
@@ -1546,7 +1546,7 @@
static file_descriptor_proto_lazy: crate::rt::LazyV2<crate::descriptor::FileDescriptorProto> = crate::rt::LazyV2::INIT;
fn parse_descriptor_proto() -> crate::descriptor::FileDescriptorProto {
- crate::parse_from_bytes(file_descriptor_proto_data).unwrap()
+ crate::Message::parse_from_bytes(file_descriptor_proto_data).unwrap()
}
pub fn file_descriptor_proto() -> &'static crate::descriptor::FileDescriptorProto {
diff --git a/src/well_known_types_util/any.rs b/src/well_known_types_util/any.rs
index 31ddd82..7441e17 100644
--- a/src/well_known_types_util/any.rs
+++ b/src/well_known_types_util/any.rs
@@ -1,4 +1,3 @@
-use crate::parse_from_bytes;
use crate::reflect::MessageDescriptor;
use crate::well_known_types::Any;
use crate::Message;
@@ -90,7 +89,7 @@
if !self.is::<M>() {
return Ok(None);
}
- Ok(Some(parse_from_bytes(&self.value)?))
+ Ok(Some(M::parse_from_bytes(&self.value)?))
}
/// Extract a message from this `Any`.