blob: 0106d02db71d13e9585f9aa274e15cba0b8d72b6 [file] [log] [blame]
/*
* upb - a minimalist implementation of protocol buffers.
*
* Copyright (c) 2009 Joshua Haberman. See LICENSE for details.
*/
#include <inttypes.h>
#include <stdlib.h>
#include "upb_msg.h"
#include "descriptor.h"
#include "upb_mm.h"
#include "upb_parse.h"
#include "upb_serialize.h"
#include "upb_text.h"
/* Parsing. ******************************************************************/
struct upb_msgparser_frame {
struct upb_msg *msg;
};
struct upb_msgparser {
struct upb_cbparser *s;
bool merge;
bool byref;
struct upb_msgparser_frame stack[UPB_MAX_NESTING], *top;
};
/* Helper function that returns a pointer to where the next value for field "f"
* should be stored, taking into account whether f is an array that may need to
* be allocated or resized. */
static union upb_value_ptr get_value_ptr(struct upb_msg *msg,
struct upb_fielddef *f)
{
union upb_value_ptr p = upb_msg_getptr(msg, f);
if(upb_isarray(f)) {
if(!upb_msg_isset(msg, f)) {
if(!*p.arr || !upb_mmhead_only(&((*p.arr)->mmhead))) {
if(*p.arr)
upb_array_unref(*p.arr);
*p.arr = upb_array_new(f);
}
upb_array_truncate(*p.arr);
upb_msg_set(msg, f);
}
p = upb_array_append(*p.arr);
}
return p;
}
/* Callbacks for the stream parser. */
static bool value_cb(void *udata, struct upb_msgdef *msgdef,
struct upb_fielddef *f, union upb_value val)
{
(void)msgdef;
struct upb_msgparser *mp = udata;
struct upb_msg *msg = mp->top->msg;
union upb_value_ptr p = get_value_ptr(msg, f);
upb_msg_set(msg, f);
upb_value_write(p, val, f->type);
return true;
}
static bool str_cb(void *udata, struct upb_msgdef *msgdef,
struct upb_fielddef *f, uint8_t *str, size_t avail_len,
size_t total_len)
{
(void)msgdef;
struct upb_msgparser *mp = udata;
struct upb_msg *msg = mp->top->msg;
union upb_value_ptr p = get_value_ptr(msg, f);
upb_msg_set(msg, f);
if(avail_len != total_len) abort(); /* TODO: support streaming. */
//bool byref = avail_len == total_len && mp->byref;
if(!*p.str || !upb_mmhead_only(&((*p.str)->mmhead))) {
if(*p.str)
upb_string_unref(*p.str);
*p.str = upb_string_new();
}
//if(byref) {
// upb_strdrop(*p.str);
// (*p.str)->ptr = (char*)str;
// (*p.str)->byte_len = avail_len;
//} else {
upb_string_resize(*p.str, total_len);
memcpy((*p.str)->ptr, str, avail_len);
(*p.str)->byte_len = avail_len;
//}
return true;
}
static void start_cb(void *udata, struct upb_fielddef *f)
{
struct upb_msgparser *mp = udata;
struct upb_msg *oldmsg = mp->top->msg;
union upb_value_ptr p = get_value_ptr(oldmsg, f);
if(upb_isarray(f) || !upb_msg_isset(oldmsg, f)) {
if(!*p.msg || !upb_mmhead_only(&((*p.msg)->mmhead))) {
if(*p.msg)
upb_msg_unref(*p.msg);
*p.msg = upb_msg_new(upb_downcast_msgdef(f->def));
}
upb_msg_clear(*p.msg);
upb_msg_set(oldmsg, f);
}
mp->top++;
mp->top->msg = *p.msg;
}
static void end_cb(void *udata)
{
struct upb_msgparser *mp = udata;
mp->top--;
}
/* Externally-visible functions for the msg parser. */
struct upb_msgparser *upb_msgparser_new(struct upb_msgdef *def)
{
struct upb_msgparser *mp = malloc(sizeof(struct upb_msgparser));
mp->s = upb_cbparser_new(def, value_cb, str_cb, start_cb, end_cb);
return mp;
}
void upb_msgparser_reset(struct upb_msgparser *s, struct upb_msg *msg, bool byref)
{
upb_cbparser_reset(s->s, s);
s->byref = byref;
s->top = s->stack;
s->top->msg = msg;
}
void upb_msgparser_free(struct upb_msgparser *s)
{
upb_cbparser_free(s->s);
free(s);
}
void upb_msg_parsestr(struct upb_msg *msg, void *buf, size_t len,
struct upb_status *status)
{
struct upb_msgparser *mp = upb_msgparser_new(msg->def);
upb_msgparser_reset(mp, msg, false);
upb_msg_clear(msg);
upb_msgparser_parse(mp, buf, len, status);
upb_msgparser_free(mp);
}
size_t upb_msgparser_parse(struct upb_msgparser *s, void *data, size_t len,
struct upb_status *status)
{
return upb_cbparser_parse(s->s, data, len, status);
}
/* Serialization. ************************************************************/
/* We store the message sizes linearly in post-order (size of parent after sizes
* of children) for a right-to-left traversal of the message tree. Iterating
* over this in reverse gives us a pre-order (size of parent before sizes of
* children) left-to-right traversal, which is what we want for parsing. */
struct upb_msgsizes {
int len;
int size;
size_t *sizes;
};
/* Declared below -- this and get_valuesize are mutually recursive. */
static size_t get_msgsize(struct upb_msgsizes *sizes, struct upb_msg *m);
/* Returns a size of a value as it will be serialized. Does *not* include
* the size of the tag -- that is already accounted for. */
static size_t get_valuesize(struct upb_msgsizes *sizes, union upb_value_ptr p,
struct upb_fielddef *f)
{
switch(f->type) {
default: assert(false); return 0; /* Internal corruption. */
case UPB_TYPENUM(MESSAGE): {
size_t submsg_size = get_msgsize(sizes, *p.msg);
return upb_get_INT32_size(submsg_size) + submsg_size;
}
case UPB_TYPENUM(GROUP): {
size_t endgrp_tag_size = upb_get_tag_size(f->number);
return endgrp_tag_size + get_msgsize(sizes, *p.msg);
}
#define CASE(type, member) \
case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## type: \
return upb_get_ ## type ## _size(*p.member);
CASE(DOUBLE, _double)
CASE(FLOAT, _float)
CASE(INT32, int32)
CASE(INT64, int64)
CASE(UINT32, uint32)
CASE(UINT64, uint64)
CASE(SINT32, int32)
CASE(SINT64, int64)
CASE(FIXED32, uint32)
CASE(FIXED64, uint64)
CASE(SFIXED32, int32)
CASE(SFIXED64, int64)
CASE(BOOL, _bool)
CASE(ENUM, int32)
#undef CASE
}
}
/* This is mostly just a pure recursive function to calculate the size of a
* message. However it also stores the results of each level of the recursion
* in sizes, because we need all of this intermediate information later. */
static size_t get_msgsize(struct upb_msgsizes *sizes, struct upb_msg *m)
{
size_t size = 0;
/* We iterate over fields and arrays in reverse order. */
for(int32_t i = m->def->num_fields - 1; i >= 0; i--) {
struct upb_fielddef *f = &m->def->fields[i];
if(!upb_msg_isset(m, f)) continue;
union upb_value_ptr p = upb_msg_getptr(m, f);
if(upb_isarray(f)) {
for(int32_t j = (*p.arr)->len - 1; j >= 0; j--) {
union upb_value_ptr elem = upb_array_getelementptr(*p.arr, j);
/* TODO: for packed arrays tag size goes outside the loop. */
size += upb_get_tag_size(f->number);
size += get_valuesize(sizes, elem, f);
}
} else {
size += upb_get_tag_size(f->number);
size += get_valuesize(sizes, p, f);
}
}
/* Resize the 'sizes' array if necessary. */
assert(sizes->len <= sizes->size);
if(sizes->len == sizes->size) {
sizes->size *= 2;
sizes->sizes = realloc(sizes->sizes, sizes->size * sizeof(size_t));
}
/* Add our size (already added our children, so post-order). */
sizes->sizes[sizes->len++] = size;
return size;
}
void upb_msgsizes_read(struct upb_msgsizes *sizes, struct upb_msg *m)
{
get_msgsize(sizes, m);
}
/* Initialize/free a upb_msg_sizes for the given message. */
void upb_msgsizes_init(struct upb_msgsizes *sizes)
{
sizes->len = 0;
sizes->size = 0;
sizes->sizes = NULL;
}
void upb_msgsizes_free(struct upb_msgsizes *sizes)
{
free(sizes->sizes);
}
size_t upb_msgsizes_totalsize(struct upb_msgsizes *sizes)
{
return sizes->sizes[sizes->len-1];
}
struct upb_msg_serialize_state {
struct {
int field_iter;
int elem_iter;
struct upb_msgdef *m;
void *msg;
} stack[UPB_MAX_NESTING], *top, *limit;
};
void upb_msg_serialize_alloc(struct upb_msg_serialize_state *s)
{
(void)s;
}
void upb_msg_serialize_free(struct upb_msg_serialize_state *s)
{
(void)s;
}
void upb_msg_serialize_init(struct upb_msg_serialize_state *s, struct upb_msg *m,
struct upb_msgsizes *sizes)
{
(void)s;
(void)m;
(void)sizes;
}
#if 0
static uint8_t *serialize_tag(uint8_t *buf, uint8_t *end,
struct upb_fielddef *f,
struct upb_status *status)
{
/* TODO: need to have the field number also. */
return upb_put_UINT32(buf, end, f->type, status);
}
/* Serializes the next set of bytes into buf (which has size len). Returns
* UPB_STATUS_OK if serialization is complete, or UPB_STATUS_NEED_MORE_DATA
* if there is more data from the message left to be serialized.
*
* The number of bytes written to buf is returned in *read. This will be
* equal to len unless we finished serializing. */
size_t upb_msg_serialize(struct upb_msg_serialize_state *s,
void *_buf, size_t len, struct upb_status *status)
{
uint8_t *buf = _buf;
uint8_t *end = buf + len;
uint8_t *const start = buf;
int i = s->top->field_iter;
//int j = s->top->elem_iter;
void *msg = s->top->msg;
struct upb_msgdef *m = s->top->m;
while(buf < end) {
struct upb_fielddef *f = &m->fields[i];
//union upb_value_ptr p = upb_msg_getptr(msg, f);
buf = serialize_tag(buf, end, f, status);
if(f->type == UPB_TYPENUM(MESSAGE)) {
} else if(f->type == UPB_TYPENUM(GROUP)) {
} else if(upb_isstring(f)) {
} else {
//upb_serialize_value(buf, end, f->type, p, status);
}
}
return buf - start;
}
#endif
/* Comparison. ***************************************************************/
bool upb_value_eql(union upb_value_ptr p1, union upb_value_ptr p2,
upb_field_type_t type)
{
#define CMP(type) return *p1.type == *p2.type;
switch(type) {
case UPB_TYPENUM(DOUBLE):
CMP(_double)
case UPB_TYPENUM(FLOAT):
CMP(_float)
case UPB_TYPENUM(INT64):
case UPB_TYPENUM(SFIXED64):
case UPB_TYPENUM(SINT64):
CMP(int64)
case UPB_TYPENUM(UINT64):
case UPB_TYPENUM(FIXED64):
CMP(uint64)
case UPB_TYPENUM(INT32):
case UPB_TYPENUM(SFIXED32):
case UPB_TYPENUM(SINT32):
CMP(int32)
case UPB_TYPENUM(UINT32):
case UPB_TYPENUM(FIXED32):
case UPB_TYPENUM(ENUM):
CMP(uint32);
case UPB_TYPENUM(BOOL):
CMP(_bool);
case UPB_TYPENUM(STRING):
case UPB_TYPENUM(BYTES):
return upb_streql(*p1.str, *p2.str);
default: return false;
}
}
bool upb_array_eql(struct upb_array *arr1, struct upb_array *arr2,
struct upb_fielddef *f, bool recursive)
{
if(arr1->len != arr2->len) return false;
if(upb_issubmsg(f)) {
if(!recursive) return true;
for(uint32_t i = 0; i < arr1->len; i++)
if(!upb_msg_eql(arr1->elements.msg[i], arr2->elements.msg[i], recursive))
return false;
} else if(upb_isstring(f)) {
for(uint32_t i = 0; i < arr1->len; i++)
if(!upb_streql(arr1->elements.str[i], arr2->elements.str[i]))
return false;
} else {
/* For primitive types we can compare the memory directly. */
return memcmp(arr1->elements._void, arr2->elements._void,
arr1->len * upb_type_info[f->type].size) == 0;
}
return true;
}
bool upb_msg_eql(struct upb_msg *msg1, struct upb_msg *msg2, bool recursive)
{
/* Must have the same fields set. TODO: is this wrong? Should we also
* consider absent defaults equal to explicitly set defaults? */
if(msg1->def != msg2->def) return false;
struct upb_msgdef *m = msg1->def;
if(memcmp(msg1->data, msg2->data, msg1->def->set_flags_bytes) != 0)
return false;
/* Possible optimization: create a mask of the bytes in the messages that
* contain only primitive values (not strings, arrays, submessages, or
* padding) and memcmp the masked messages. */
for(uint32_t i = 0; i < m->num_fields; i++) {
struct upb_fielddef *f = &m->fields[i];
bool msg1set = upb_msg_isset(msg1, f);
bool msg2set = upb_msg_isset(msg2, f);
if(msg1set != msg2set) return false;
if(!msg1set) continue;
union upb_value_ptr p1 = upb_msg_getptr(msg1, f);
union upb_value_ptr p2 = upb_msg_getptr(msg2, f);
if(upb_isarray(f)) {
if(!upb_array_eql(*p1.arr, *p2.arr, f, recursive)) return false;
} else if(upb_issubmsg(f)) {
if(recursive && !upb_msg_eql(*p1.msg, *p2.msg, recursive))
return false;
} else if(!upb_value_eql(p1, p2, f->type)) {
return false;
}
}
return true;
}