Some cleanup and reformatting, fixed the benchmarks.
diff --git a/Makefile b/Makefile
index 0aaae32..110a263 100644
--- a/Makefile
+++ b/Makefile
@@ -34,7 +34,7 @@
 LIBUPB=src/libupb.a
 LIBUPB_PIC=src/libupb_pic.a
 LIBUPB_SHARED=src/libupb.so
-ALL=deps $(OBJ) $(LIBUPB) $(LIBUPB_PIC) $(LIBUPB_SHARED) tests/test_table tests/tests tools/upbc
+ALL=deps $(OBJ) $(LIBUPB) $(LIBUPB_PIC)  tests/test_table tests/tests tools/upbc
 all: $(ALL)
 clean:
 	rm -rf $(call rwildcard,,*.o) $(call rwildcard,,*.lo) $(ALL) benchmark/google_messages.proto.pb benchmark/google_messages.pb.* benchmarks/b.* benchmarks/*.pb*
@@ -42,8 +42,9 @@
 	cd lang_ext/python && python setup.py clean --all
 
 # The core library (src/libupb.a)
-SRC=src/upb_parse.c src/upb_table.c src/upb_msg.c src/upb_mm.c src/upb_enum.c src/upb_context.c \
-    src/upb_string.c src/upb_text.c src/upb_serialize.c descriptor/descriptor.c
+SRC=src/upb.c src/upb_parse.c src/upb_table.c src/upb_msg.c src/upb_mm.c \
+    src/upb_enum.c src/upb_context.c src/upb_string.c src/upb_text.c \
+    src/upb_serialize.c descriptor/descriptor.c
 STATICOBJ=$(patsubst %.c,%.o,$(SRC))
 SHAREDOBJ=$(patsubst %.c,%.lo,$(SRC))
 # building shared objects is like building static ones, except -fPIC is added.
@@ -91,6 +92,7 @@
 benchmarks: $(BENCHMARKS)
 benchmark:
 	@rm -f benchmarks/results
+	@rm -rf benchmarks/*.dSYM
 	@for test in benchmarks/b.* ; do ./$$test ; done
 
 benchmarks/google_messages.proto.pb: benchmarks/google_messages.proto
diff --git a/benchmarks/parsetostruct.upb_table.c b/benchmarks/parsetostruct.upb_table.c
index 751c982..9daa8e0 100644
--- a/benchmarks/parsetostruct.upb_table.c
+++ b/benchmarks/parsetostruct.upb_table.c
@@ -3,45 +3,47 @@
 
 #include "upb_context.h"
 #include "upb_msg.h"
+#include "upb_mm.h"
 
-static struct upb_context c;
-static struct upb_string str;
-static struct upb_msg_parse_state s;
-static struct upb_msg *m;
-static void *data[NUM_MESSAGES];
+static struct upb_context *c;
+static struct upb_string *str;
+static struct upb_msgdef *def;
+static struct upb_msg *msgs[NUM_MESSAGES];
 
 static bool initialize()
 {
   /* Initialize upb state, parse descriptor. */
-  upb_context_init(&c);
-  struct upb_string fds;
-  if(!upb_strreadfile(MESSAGE_DESCRIPTOR_FILE, &fds)) {
+  c = upb_context_new();
+  struct upb_string *fds = upb_strreadfile(MESSAGE_DESCRIPTOR_FILE);
+  if(!fds) {
     fprintf(stderr, "Couldn't read " MESSAGE_DESCRIPTOR_FILE ".\n");
     return false;
   }
-  if(!upb_context_parsefds(&c, &fds)) {
+  if(!upb_context_parsefds(c, fds)) {
     fprintf(stderr, "Error importing " MESSAGE_DESCRIPTOR_FILE ".\n");
     return false;
   }
-  upb_strfree(fds);
+  upb_string_unref(fds);
 
   char class_name[] = MESSAGE_NAME;
   struct upb_string proto_name;
   proto_name.ptr = class_name;
   proto_name.byte_len = sizeof(class_name)-1;
-  struct upb_symtab_entry *e = upb_context_lookup(&c, &proto_name);
-  if(!e || e->type != UPB_SYM_MESSAGE) {
+  struct upb_symtab_entry e;
+  upb_status_t success = upb_context_lookup(c, &proto_name, &e);
+  if(!success || e.type != UPB_SYM_MESSAGE) {
     fprintf(stderr, "Error finding symbol '" UPB_STRFMT "'.\n",
-            UPB_STRARG(proto_name));
+            UPB_STRARG(&proto_name));
     return false;
   }
 
-  m = e->ref.msg;
+  def = e.ref.msg;
   for(int i = 0; i < 32; i++)
-    data[i] = upb_msgdata_new(m);
+    msgs[i] = upb_msg_new(def);
 
   /* Read the message data itself. */
-  if(!upb_strreadfile(MESSAGE_FILE, &str)) {
+  str = upb_strreadfile(MESSAGE_FILE);
+  if(!str) {
     fprintf(stderr, "Error reading " MESSAGE_FILE "\n");
     return false;
   }
@@ -51,19 +53,18 @@
 static void cleanup()
 {
   for(int i = 0; i < 32; i++)
-    upb_msgdata_free(data[i], m, true);
-  upb_strfree(str);
-  upb_context_free(&c);
+    upb_msg_unref(msgs[i]);
+  upb_string_unref(str);
+  upb_context_unref(c);
 }
 
 static size_t run(int i)
 {
-  size_t read;
-  upb_msg_parse_reset(&s, data[i%NUM_MESSAGES], m, false, BYREF);
-  upb_status_t status = upb_msg_parse(&s, str.ptr, str.byte_len, &read);
-  if(status != UPB_STATUS_OK && read != str.byte_len) {
-    fprintf(stderr, "Error. :(  error=%d, read=%zu\n", status, read);
+  upb_status_t status;
+  status = upb_msg_parsestr(msgs[i%NUM_MESSAGES], str->ptr, str->byte_len);
+  if(status != UPB_STATUS_OK) {
+    fprintf(stderr, "Error. :(  error=%d\n", status);
     return 0;
   }
-  return read;
+  return str->byte_len;
 }
diff --git a/src/upb.c b/src/upb.c
new file mode 100644
index 0000000..e82a8e4
--- /dev/null
+++ b/src/upb.c
@@ -0,0 +1,37 @@
+/*
+ * upb - a minimalist implementation of protocol buffers.
+ *
+ * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
+ *
+ */
+
+#include <stddef.h>
+
+#include "upb.h"
+
+#define alignof(t) offsetof(struct { char c; t x; }, x)
+#define TYPE_INFO(proto_type, wire_type, ctype) \
+    [GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## proto_type] = \
+    {alignof(ctype), sizeof(ctype), wire_type, #ctype},
+
+struct upb_type_info upb_type_info[] = {
+  TYPE_INFO(DOUBLE,   UPB_WIRE_TYPE_64BIT,       double)
+  TYPE_INFO(FLOAT,    UPB_WIRE_TYPE_32BIT,       float)
+  TYPE_INFO(INT64,    UPB_WIRE_TYPE_VARINT,      int64_t)
+  TYPE_INFO(UINT64,   UPB_WIRE_TYPE_VARINT,      uint64_t)
+  TYPE_INFO(INT32,    UPB_WIRE_TYPE_VARINT,      int32_t)
+  TYPE_INFO(FIXED64,  UPB_WIRE_TYPE_64BIT,       uint64_t)
+  TYPE_INFO(FIXED32,  UPB_WIRE_TYPE_32BIT,       uint32_t)
+  TYPE_INFO(BOOL,     UPB_WIRE_TYPE_VARINT,      bool)
+  TYPE_INFO(MESSAGE,  UPB_WIRE_TYPE_DELIMITED,   void*)
+  TYPE_INFO(GROUP,    UPB_WIRE_TYPE_START_GROUP, void*)
+  TYPE_INFO(UINT32,   UPB_WIRE_TYPE_VARINT,      uint32_t)
+  TYPE_INFO(ENUM,     UPB_WIRE_TYPE_VARINT,      uint32_t)
+  TYPE_INFO(SFIXED32, UPB_WIRE_TYPE_32BIT,       int32_t)
+  TYPE_INFO(SFIXED64, UPB_WIRE_TYPE_64BIT,       int64_t)
+  TYPE_INFO(SINT32,   UPB_WIRE_TYPE_VARINT,      int32_t)
+  TYPE_INFO(SINT64,   UPB_WIRE_TYPE_VARINT,      int64_t)
+  TYPE_INFO(STRING,   UPB_WIRE_TYPE_DELIMITED,   struct upb_string*)
+  TYPE_INFO(BYTES,    UPB_WIRE_TYPE_DELIMITED,   struct upb_string*)
+};
+
diff --git a/src/upb.h b/src/upb.h
index af026f5..1112fe1 100644
--- a/src/upb.h
+++ b/src/upb.h
@@ -1,6 +1,6 @@
 /*
  * upb - a minimalist implementation of protocol buffers.
-
+ *
  * Copyright (c) 2009 Joshua Haberman.  See LICENSE for details.
  *
  * This file contains shared definitions that are widely used across upb.
@@ -11,14 +11,14 @@
 
 #include <stdbool.h>
 #include <stdint.h>
-#include <stdio.h>  /* for size_t. */
+#include <stdio.h>  // only for size_t.
 #include "descriptor_const.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-/* inline if possible, emit standalone code if required. */
+// inline if possible, emit standalone code if required.
 #ifndef INLINE
 #define INLINE static inline
 #endif
@@ -26,21 +26,22 @@
 #define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
 #define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
 
-/* The maximum that any submessages can be nested.  Matches proto2's limit. */
+// The maximum that any submessages can be nested.  Matches proto2's limit.
 #define UPB_MAX_NESTING 64
 
-/* The maximum number of fields that any one .proto type can have. */
+// The maximum number of fields that any one .proto type can have.
 #define UPB_MAX_FIELDS (1<<16)
 
-/* Nested type names are separated by periods. */
+// Nested type names are separated by periods.
 #define UPB_SYMBOL_SEPARATOR '.'
 #define UPB_SYMBOL_MAX_LENGTH 256
 
 #define UPB_INDEX(base, i, m) (void*)((char*)(base) + ((i)*(m)))
 
+
 /* Fundamental types and type constants. **************************************/
 
-/* A list of types as they are encoded on-the-wire. */
+// A list of types as they are encoded on-the-wire.
 enum upb_wire_type {
   UPB_WIRE_TYPE_VARINT      = 0,
   UPB_WIRE_TYPE_64BIT       = 1,
@@ -49,26 +50,27 @@
   UPB_WIRE_TYPE_END_GROUP   = 4,
   UPB_WIRE_TYPE_32BIT       = 5
 };
+
 typedef uint8_t upb_wire_type_t;
 
-/* Value type as defined in a .proto file.  eg. string, int32, etc.
- *
- * The values of this are defined by google_protobuf_FieldDescriptorProto_Type
- * (from descriptor.proto).  Note that descriptor.proto reserves "0" for
- * errors, and we use it to represent exceptional circumstances. */
+// Value type as defined in a .proto file.  eg. string, int32, etc.  The
+// integers that represent this are defined by descriptor.proto.  Note that
+// descriptor.proto reserves "0" for errors, and we use it to represent
+// exceptional circumstances.
 typedef uint8_t upb_field_type_t;
 
+// For referencing the type constants tersely.
+#define UPB_TYPENUM(type) GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## type
+
 INLINE bool upb_issubmsgtype(upb_field_type_t type) {
-  return type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP  ||
-         type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE;
+  return type == UPB_TYPENUM(GROUP) || type == UPB_TYPENUM(MESSAGE);
 }
 
 INLINE bool upb_isstringtype(upb_field_type_t type) {
-  return type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING  ||
-         type == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES;
+  return type == UPB_TYPENUM(STRING) || type == UPB_TYPENUM(BYTES);
 }
 
-/* Information about a given value type (upb_field_type_t). */
+// Info for a given field type.
 struct upb_type_info {
   uint8_t align;
   uint8_t size;
@@ -76,87 +78,88 @@
   char *ctype;
 };
 
-/* Contains information for all .proto types.  Indexed by upb_field_type_t. */
+// A static array of info about all of the field types, indexed by type number.
 extern struct upb_type_info upb_type_info[];
 
-/* The number of a field, eg. "optional string foo = 3". */
+// The number of a field, eg. "optional string foo = 3".
 typedef int32_t upb_field_number_t;
 
-/* Label (optional, repeated, required) as defined in a .proto file.  The values
- * of this are defined by google.protobuf.FieldDescriptorProto.Label (from
- * descriptor.proto). */
+// Label (optional, repeated, required) as defined in a .proto file.  The
+// values of this are defined by google.protobuf.FieldDescriptorProto.Label
+// (from descriptor.proto).
 typedef uint8_t  upb_label_t;
 
-/* A value as it is encoded on-the-wire, except delimited, which is handled
- * separately. */
+// A scalar (non-string) wire value.  Used only for parsing unknown fields.
 union upb_wire_value {
   uint64_t varint;
   uint64_t _64bit;
   uint32_t _32bit;
 };
 
-/* A tag occurs before each value on-the-wire. */
+// A tag occurs before each value on-the-wire.
 struct upb_tag {
   upb_field_number_t field_number;
   upb_wire_type_t wire_type;
 };
 
+
 /* Polymorphic values of .proto types *****************************************/
 
 struct upb_string;
 struct upb_array;
 struct upb_msg;
 
-/* A single .proto value.  The owner must have an out-of-band way of knowing
- * the type, so that it knows which union member to use. */
+// A single .proto value.  The owner must have an out-of-band way of knowing
+// the type, so that it knows which union member to use.
 union upb_value {
-  double   _double;
-  float    _float;
-  int32_t  int32;
-  int64_t  int64;
+  double _double;
+  float _float;
+  int32_t int32;
+  int64_t int64;
   uint32_t uint32;
   uint64_t uint64;
-  bool     _bool;
+  bool _bool;
   struct upb_string *str;
   struct upb_array *arr;
   struct upb_msg *msg;
 };
 
-/* A pointer to a .proto value.  The owner must have an out-of-band way of
- * knowing the type, so it knows which union member to use. */
+// A pointer to a .proto value.  The owner must have an out-of-band way of
+// knowing the type, so it knows which union member to use.
 union upb_value_ptr {
-  double   *_double;
-  float    *_float;
-  int32_t  *int32;
-  int64_t  *int64;
+  double *_double;
+  float *_float;
+  int32_t *int32;
+  int64_t *int64;
   uint32_t *uint32;
   uint64_t *uint64;
-  bool     *_bool;
+  bool *_bool;
   struct upb_string **str;
   struct upb_array **arr;
   struct upb_msg **msg;
-  void     *_void;
+  void *_void;
 };
 
-/* Unfortunately there is no way to define this so that it can be used as a
- * generic expression, a la:
- *   foo(UPB_VALUE_ADDROF(bar));
- * ...you have to use it as the initializer of a upb_value_ptr:
- *   union upb_value_ptr p = UPB_VALUE_ADDROF(bar);
- *   foo(p);
- */
+// Unfortunately there is no way to define this so that it can be used as a
+// generic expression, a la:
+//   foo(UPB_VALUE_ADDROF(bar));
+// ...you have to use it as the initializer of a upb_value_ptr:
+//   union upb_value_ptr p = UPB_VALUE_ADDROF(bar);
+//   foo(p);
 #define UPB_VALUE_ADDROF(val) {(void*)&val._double}
 
-/* Converts upb_value_ptr -> upb_value by "dereferencing" the pointer.  We need
- * to know the field type to perform this operation, because we need to know
- * how much memory to copy. */
+/**
+ * Converts upb_value_ptr -> upb_value by reading from the pointer.  We need to
+ * know the field type to perform this operation, because we need to know how
+ * much memory to copy.
+ */
 INLINE union upb_value upb_value_read(union upb_value_ptr ptr,
                                       upb_field_type_t ft) {
   union upb_value val;
+
 #define CASE(t, member_name) \
-  case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: \
-    val.member_name = *ptr.member_name; \
-    break;
+  case UPB_TYPENUM(t): val.member_name = *ptr.member_name; break;
+
   switch(ft) {
     CASE(DOUBLE,   _double)
     CASE(FLOAT,    _float)
@@ -178,19 +181,21 @@
     CASE(GROUP,    msg)
     default: break;
   }
-#undef CASE
   return val;
+
+#undef CASE
 }
 
-/* Converts upb_value_ptr -> upb_value by "dereferencing" the pointer.  We need
- * to know the field type to perform this operation, because we need to know
- * how much memory to copy. */
+/**
+ * Writes a upb_value to a upb_value_ptr location. We need to know the field
+ * type to perform this operation, because we need to know how much memory to
+ * copy.
+ */
 INLINE void upb_value_write(union upb_value_ptr ptr, union upb_value val,
                             upb_field_type_t ft) {
 #define CASE(t, member_name) \
-  case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: \
-    *ptr.member_name = val.member_name; \
-    break;
+  case UPB_TYPENUM(t): *ptr.member_name = val.member_name; break;
+
   switch(ft) {
     CASE(DOUBLE,   _double)
     CASE(FLOAT,    _float)
@@ -212,17 +217,19 @@
     CASE(GROUP,    msg)
     default: break;
   }
+
 #undef CASE
 }
 
+// All the different definitions that can occur in .proto files.
 union upb_symbol_ref {
   struct upb_msgdef *msg;
   struct upb_enum *_enum;
   struct upb_svc *svc;
 };
 
-/* Status codes used as a return value.  Codes >0 are not fatal and can be
- * resumed. */
+// Status codes used as a return value.  Codes >0 are not fatal and can be
+// resumed.
 typedef enum upb_status {
   UPB_STATUS_OK = 0,
 
diff --git a/src/upb_array.h b/src/upb_array.h
index 732c4aa..b5eb22d 100644
--- a/src/upb_array.h
+++ b/src/upb_array.h
@@ -32,11 +32,12 @@
 struct upb_string;
 
 /* Returns a pointer to an array element.  Does not perform a bounds check! */
-INLINE union upb_value_ptr upb_array_getelementptr(
-    struct upb_array *arr, upb_arraylen_t n, upb_field_type_t type)
+INLINE union upb_value_ptr upb_array_getelementptr(struct upb_array *arr,
+                                                   upb_arraylen_t n)
 {
   union upb_value_ptr ptr;
-  ptr._void = (void*)((char*)arr->elements._void + n*upb_type_info[type].size);
+  ptr._void = UPB_INDEX(arr->elements._void, n,
+                        upb_type_info[arr->fielddef->type].size);
   return ptr;
 }
 
@@ -66,26 +67,22 @@
   return v;
 }
 
-/* Resizes array to be "len" elements long (reallocating if necessary). */
-INLINE bool upb_array_resize(struct upb_array *arr, upb_arraylen_t newlen)
+INLINE union upb_value_ptr upb_array_append(struct upb_array *arr)
 {
-  size_t type_size = upb_type_info[arr->fielddef->type].size;
-  bool dropped = false;
-  bool ref = arr->size == 0;   /* Ref'ing external memory. */
-  void *data = arr->elements._void;
-  if(arr->size < newlen) {
-    /* Need to resize. */
-    arr->size = UPB_MAX(4, upb_round_up_to_pow2(newlen));
-    arr->elements._void = realloc(ref ? NULL : data, arr->size * type_size);
+  size_t size = upb_type_info[arr->fielddef->type].size;
+  upb_arraylen_t oldlen = arr->len;
+  if(oldlen == arr->size) {
+    arr->size = UPB_MAX(4, upb_round_up_to_pow2(oldlen+1));
+    arr->elements._void = realloc(arr->elements._void, arr->size * size);
+    memset((char*)arr->elements._void + (arr->len*size), 0, (arr->size - arr->len) * size);
   }
-  if(ref) {
-    /* Need to take referenced data and copy it to memory we own. */
-    memcpy(arr->elements._void, data, UPB_MIN(arr->len, newlen) * type_size);
-    dropped = true;
-  }
-  /* TODO: fill with defaults. */
-  arr->len = newlen;
-  return dropped;
+  arr->len++;
+  return upb_array_getelementptr(arr, oldlen);
+}
+
+INLINE void upb_array_truncate(struct upb_array *arr)
+{
+  arr->len = 0;
 }
 
 #ifdef __cplusplus
diff --git a/src/upb_atomic.h b/src/upb_atomic.h
index c1a60b9..85ec582 100644
--- a/src/upb_atomic.h
+++ b/src/upb_atomic.h
@@ -29,6 +29,7 @@
 #define INLINE static inline
 #endif
 
+#define UPB_THREAD_UNSAFE
 #ifdef UPB_THREAD_UNSAFE
 
 /* Non-thread-safe implementations. ******************************************/
diff --git a/src/upb_mm.c b/src/upb_mm.c
index 853d572..769db96 100644
--- a/src/upb_mm.c
+++ b/src/upb_mm.c
@@ -27,7 +27,7 @@
     upb_arraylen_t i;
     /* Unref elements. */
     for(i = 0; i < arr->len; i++) {
-      union upb_value_ptr p = upb_array_getelementptr(arr, i, arr->fielddef->type);
+      union upb_value_ptr p = upb_array_getelementptr(arr, i);
       upb_mm_ptrtype type = upb_elem_ptrtype(arr->fielddef);
       union upb_mmptr mmptr = upb_mmptr_read(p, type);
       upb_mm_unref(mmptr, type);
@@ -120,7 +120,7 @@
   struct upb_msg_fielddef *f = arr->fielddef;
   assert(upb_elem_ismm(f));
   assert(i < arr->len);
-  union upb_value_ptr p = upb_array_getelementptr(arr, i, f->type);
+  union upb_value_ptr p = upb_array_getelementptr(arr, i);
   upb_mm_ptrtype type = upb_elem_ptrtype(f);
   union upb_mmptr val = upb_mmptr_read(p, type);
   return find_or_create_ref(arrref, arrref->mm, val, type, refcreated);
diff --git a/src/upb_msg.c b/src/upb_msg.c
index 80602dd..f977527 100644
--- a/src/upb_msg.c
+++ b/src/upb_msg.c
@@ -156,11 +156,16 @@
 {
   union upb_value_ptr p = upb_msg_getptr(msg, f);
   if(upb_isarray(f)) {
-    bool isset = upb_msg_isset(msg, f);
-    size_t len = isset ? (*p.arr)->len : 0;
-    if(!isset) *p.arr = upb_array_new(f);
-    upb_array_resize(*p.arr, len+1);
-    p = upb_array_getelementptr(*p.arr, len, f->type);
+    if(!upb_msg_isset(msg, f)) {
+      if(!*p.arr || !upb_mmhead_only(&((*p.arr)->mmhead))) {
+        if(*p.arr)
+          upb_array_unref(*p.arr);
+        *p.arr = upb_array_new(f);
+      }
+      upb_array_truncate(*p.arr);
+      upb_msg_set(msg, f);
+    }
+    p = upb_array_append(*p.arr);
   }
   return p;
 }
@@ -202,7 +207,11 @@
   upb_msg_set(msg, f);
   if(avail_len != total_len) abort();  /* TODO: support streaming. */
   //bool byref = avail_len == total_len && mp->byref;
-  *p.str = upb_string_new();
+  if(!*p.str || !upb_mmhead_only(&((*p.str)->mmhead))) {
+    if(*p.str)
+      upb_string_unref(*p.str);
+    *p.str = upb_string_new();
+  }
   //if(byref) {
   //  upb_strdrop(*p.str);
   //  (*p.str)->ptr = (char*)str;
@@ -220,16 +229,19 @@
   struct upb_msg_fielddef *f = user_field_desc;
   struct upb_msg *oldmsg = mp->top->msg;
   union upb_value_ptr p = get_value_ptr(oldmsg, f);
-  struct upb_msg **submsg = p.msg;
-  //if(*submsg && upb_mmhead_only(&((*submsg)->mmhead))) {
-  //  /* We can reuse the existing submsg. */
-  //} else {
-    *submsg = upb_msg_new(f->ref.msg);
-  //}
-  upb_msg_clear(*submsg);
-  upb_msg_set(oldmsg, f);
+
+  if(upb_isarray(f) || !upb_msg_isset(oldmsg, f)) {
+    if(!*p.msg || !upb_mmhead_only(&((*p.msg)->mmhead))) {
+      if(*p.msg)
+        upb_msg_unref(*p.msg);
+      *p.msg = upb_msg_new(f->ref.msg);
+    }
+    upb_msg_clear(*p.msg);
+    upb_msg_set(oldmsg, f);
+  }
+
   mp->top++;
-  mp->top->msg = *submsg;
+  mp->top->msg = *p.msg;
 }
 
 static void submsg_end_cb(void *udata)
@@ -248,6 +260,7 @@
   struct upb_msg_parser mp;
   upb_msg_parser_reset(&mp, msg, false);
   size_t read;
+  upb_msg_clear(msg);
   upb_status_t ret = upb_msg_parser_parse(&mp, buf, len, &read);
   return ret;
 }
@@ -337,7 +350,7 @@
     union upb_value_ptr p = upb_msg_getptr(m, f);
     if(upb_isarray(f)) {
       for(int32_t j = (*p.arr)->len - 1; j >= 0; j--) {
-        union upb_value_ptr elem = upb_array_getelementptr((*p.arr), j, f->type);
+        union upb_value_ptr elem = upb_array_getelementptr(*p.arr, j);
         /* TODO: for packed arrays tag size goes outside the loop. */
         size += upb_get_tag_size(fd->number);
         size += get_valuesize(sizes, elem, f, fd);
diff --git a/src/upb_parse.c b/src/upb_parse.c
index 7c1ad66..baaeb99 100644
--- a/src/upb_parse.c
+++ b/src/upb_parse.c
@@ -9,32 +9,10 @@
 #include <stddef.h>
 #include <stdlib.h>
 
-/* May want to move this to upb.c if enough other things warrant it. */
-#define alignof(t) offsetof(struct { char c; t x; }, x)
-#define TYPE_INFO(proto_type, wire_type, ctype) [proto_type] = {alignof(ctype), sizeof(ctype), wire_type, #ctype},
-struct upb_type_info upb_type_info[] = {
-  TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE,   UPB_WIRE_TYPE_64BIT,       double)
-  TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT,    UPB_WIRE_TYPE_32BIT,       float)
-  TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64,    UPB_WIRE_TYPE_VARINT,      int64_t)
-  TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64,   UPB_WIRE_TYPE_VARINT,      uint64_t)
-  TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32,    UPB_WIRE_TYPE_VARINT,      int32_t)
-  TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64,  UPB_WIRE_TYPE_64BIT,       uint64_t)
-  TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32,  UPB_WIRE_TYPE_32BIT,       uint32_t)
-  TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL,     UPB_WIRE_TYPE_VARINT,      bool)
-  TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE,  UPB_WIRE_TYPE_DELIMITED,   void*)
-  TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP,    UPB_WIRE_TYPE_START_GROUP, void*)
-  TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32,   UPB_WIRE_TYPE_VARINT,      uint32_t)
-  TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM,     UPB_WIRE_TYPE_VARINT,      uint32_t)
-  TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32, UPB_WIRE_TYPE_32BIT,       int32_t)
-  TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64, UPB_WIRE_TYPE_64BIT,       int64_t)
-  TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32,   UPB_WIRE_TYPE_VARINT,      int32_t)
-  TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64,   UPB_WIRE_TYPE_VARINT,      int64_t)
-  TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING,   UPB_WIRE_TYPE_DELIMITED,   struct upb_string*)
-  TYPE_INFO(GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES,    UPB_WIRE_TYPE_DELIMITED,   struct upb_string*)
-};
-
-/* This is called by the inline version of the function if the varint turns out
- * to be >= 2 bytes. */
+/**
+ * Parses a 64-bit varint that is known to be >= 2 bytes (the inline version
+ * handles 1 and 2 byte varints).
+ */
 upb_status_t upb_get_v_uint64_t_full(uint8_t *buf, uint8_t *end, uint64_t *val,
                                      uint8_t **outbuf)
 {
@@ -42,10 +20,15 @@
   uint8_t last = 0x80;
   *val = 0;
   int bitpos;
+
   for(bitpos = 0; buf < (uint8_t*)end && (last & 0x80); buf++, bitpos += 7)
     *val |= ((uint64_t)((last = *buf) & 0x7F)) << bitpos;
-  if(buf >= end && buf <= maxend && (last & 0x80)) return UPB_STATUS_NEED_MORE_DATA;
-  if(buf > maxend) return UPB_ERROR_UNTERMINATED_VARINT;
+
+  if(buf >= end && buf <= maxend && (last & 0x80))
+    return UPB_STATUS_NEED_MORE_DATA;
+  if(buf > maxend)
+    return UPB_ERROR_UNTERMINATED_VARINT;
+
   *outbuf = buf;
   return UPB_STATUS_OK;
 }
@@ -54,23 +37,37 @@
                                   union upb_wire_value *wv, uint8_t **outbuf)
 {
   switch(wt) {
-    case UPB_WIRE_TYPE_VARINT: return upb_get_v_uint64_t(buf, end, &wv->varint, outbuf);
-    case UPB_WIRE_TYPE_64BIT:  return upb_get_f_uint64_t(buf, end, &wv->_64bit, outbuf);
-    case UPB_WIRE_TYPE_32BIT:  return upb_get_f_uint32_t(buf, end, &wv->_32bit, outbuf);
-    default: return UPB_ERROR_ILLEGAL; /* Doesn't handle delimited, groups. */
+    case UPB_WIRE_TYPE_VARINT:
+      return upb_get_v_uint64_t(buf, end, &wv->varint, outbuf);
+    case UPB_WIRE_TYPE_64BIT:
+      return upb_get_f_uint64_t(buf, end, &wv->_64bit, outbuf);
+    case UPB_WIRE_TYPE_32BIT:
+      return upb_get_f_uint32_t(buf, end, &wv->_32bit, outbuf);
+    default:
+      return UPB_ERROR_ILLEGAL;  // Doesn't handle delimited, groups.
   }
 }
 
+/**
+ * Advances buf past the current wire value (of type wt), saving the result in
+ * outbuf.
+ */
 static upb_status_t skip_wire_value(uint8_t *buf, uint8_t *end, upb_wire_type_t wt,
                                     uint8_t **outbuf)
 {
   switch(wt) {
-    case UPB_WIRE_TYPE_VARINT: return upb_skip_v_uint64_t(buf, end, outbuf);
-    case UPB_WIRE_TYPE_64BIT:  return upb_skip_f_uint64_t(buf, end, outbuf);
-    case UPB_WIRE_TYPE_32BIT:  return upb_skip_f_uint32_t(buf, end, outbuf);
-    case UPB_WIRE_TYPE_START_GROUP: /* TODO: skip to matching end group. */
-    case UPB_WIRE_TYPE_END_GROUP: return UPB_STATUS_OK;
-    default: return UPB_ERROR_ILLEGAL;
+    case UPB_WIRE_TYPE_VARINT:
+      return upb_skip_v_uint64_t(buf, end, outbuf);
+    case UPB_WIRE_TYPE_64BIT:
+      return upb_skip_f_uint64_t(buf, end, outbuf);
+    case UPB_WIRE_TYPE_32BIT:
+      return upb_skip_f_uint32_t(buf, end, outbuf);
+    case UPB_WIRE_TYPE_START_GROUP:
+      // TODO: skip to matching end group.
+    case UPB_WIRE_TYPE_END_GROUP:
+      return UPB_STATUS_OK;
+    default:
+      return UPB_ERROR_ILLEGAL;
   }
 }
 
@@ -78,8 +75,8 @@
                              union upb_value_ptr v, uint8_t **outbuf)
 {
 #define CASE(t, member_name) \
-  case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ ## t: \
-    return upb_get_ ## t(buf, end, v.member_name, outbuf);
+  case UPB_TYPENUM(t): return upb_get_ ## t(buf, end, v.member_name, outbuf);
+
   switch(ft) {
     CASE(DOUBLE,   _double)
     CASE(FLOAT,    _float)
@@ -97,6 +94,7 @@
     CASE(ENUM,     int32)
     default: return UPB_ERROR_ILLEGAL;
   }
+
 #undef CASE
 }
 
@@ -104,55 +102,76 @@
 {
   state->top = state->stack;
   state->limit = &state->stack[UPB_MAX_NESTING];
-  /* The top-level message is not delimited (we can keep receiving data for
-   * it indefinitely), so we treat it like a group. */
-  *state->top = 0;
   state->completed_offset = 0;
   state->udata = udata;
+
+  // The top-level message is not delimited (we can keep receiving data for it
+  // indefinitely), so we treat it like a group.
+  *state->top = 0;
 }
 
-static void *pop_stack_frame(struct upb_stream_parser *s, uint8_t *buf)
-{
-  if(s->submsg_end_cb) s->submsg_end_cb(s->udata);
-  s->top--;
-  return (char*)buf + (*s->top > 0 ? (*s->top - s->completed_offset) : 0);
-}
-
-/* Returns the next end offset. */
-static upb_status_t push_stack_frame(struct upb_stream_parser *s,
-                                     uint8_t *buf, uint32_t len,
-                                     void *user_field_desc, uint8_t **submsg_end)
+/**
+ * Pushes a new stack frame for a submessage with the given len (which will
+ * be zero if the submessage is a group).
+ */
+static upb_status_t push(struct upb_stream_parser *s, uint8_t *start,
+                         uint32_t submsg_len, void *user_field_desc,
+                         uint8_t **submsg_end)
 {
   s->top++;
-  if(s->top > s->limit) return UPB_ERROR_STACK_OVERFLOW;
-  *s->top = s->completed_offset + len;
-  if(s->submsg_start_cb) s->submsg_start_cb(s->udata, user_field_desc);
-  *submsg_end = buf + (*s->top > 0 ? (*s->top - s->completed_offset) : 0);
+  if(s->top >= s->limit)
+    return UPB_ERROR_STACK_OVERFLOW;
+  *s->top = s->completed_offset + submsg_len;
+
+  if(s->submsg_start_cb)
+    s->submsg_start_cb(s->udata, user_field_desc);
+
+  *submsg_end = start + (*s->top > 0 ? (*s->top - s->completed_offset) : 0);
   return UPB_STATUS_OK;
 }
 
+/**
+ * Pops a stack frame, returning a pointer for where the next submsg should
+ * end (or a pointer that is out of range for a group).
+ */
+static void *pop(struct upb_stream_parser *s, uint8_t *start)
+{
+  if(s->submsg_end_cb)
+    s->submsg_end_cb(s->udata);
+
+  s->top--;
+
+  if(*s->top > 0)
+    return (char*)start + (*s->top - s->completed_offset);
+  else
+    return (char*)start;  // group.
+}
+
+
 upb_status_t upb_stream_parser_parse(struct upb_stream_parser *s,
                                      void *_buf, size_t len, size_t *read)
 {
   uint8_t *buf = _buf;
   uint8_t *completed = buf;
-  uint8_t *const start = buf;
+  uint8_t *const start = buf;  // ptr equivalent of s->completed_offset
   uint8_t *end = buf + len;
   uint8_t *submsg_end = buf + (*s->top > 0 ? *s->top : 0);
   upb_status_t status = UPB_STATUS_OK;
 
-  /* Make local copies so optimizer knows they won't change. */
+  // Make local copies so optimizer knows they won't change.
   upb_tag_cb tag_cb = s->tag_cb;
   upb_str_cb str_cb = s->str_cb;
   upb_value_cb value_cb = s->value_cb;
   void *udata = s->udata;
 
-  /* Main loop: parse a tag, then handle the value. */
+#define CHECK(exp) do { if((status = exp) != UPB_STATUS_OK) goto err; } while(0)
+
+  // Main loop: parse a tag, then handle the value.
   while(buf < end) {
     struct upb_tag tag;
-    UPB_CHECK(parse_tag(buf, end, &tag, &buf));
+    CHECK(parse_tag(buf, end, &tag, &buf));
     if(tag.wire_type == UPB_WIRE_TYPE_END_GROUP) {
-      submsg_end = pop_stack_frame(s, start);
+      submsg_end = pop(s, start);
       completed = buf;
       continue;
     }
@@ -161,38 +180,39 @@
     upb_field_type_t ft = tag_cb(udata, &tag, &udesc);
     if(tag.wire_type == UPB_WIRE_TYPE_DELIMITED) {
       int32_t delim_len;
-      UPB_CHECK(upb_get_INT32(buf, end, &delim_len, &buf));
+      CHECK(upb_get_INT32(buf, end, &delim_len, &buf));
       uint8_t *delim_end = buf + delim_len;
-      if(ft == GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE) {
-          UPB_CHECK(push_stack_frame(
-              s, start, delim_end - start, udesc, &submsg_end));
+      if(ft == UPB_TYPENUM(MESSAGE)) {
+        CHECK(push(s, start, delim_end - start, udesc, &submsg_end));
       } else {
-        if(upb_isstringtype(ft))
-          str_cb(udata, buf, UPB_MIN(delim_end, end) - buf, delim_end - buf, udesc);
-        //else
-        //  /* Set a marker for packed arrays. */
-        buf = delim_end;  /* Note that this could be greater than end. */
+        if(upb_isstringtype(ft)) {
+          size_t avail_len = UPB_MIN(delim_end, end) - buf;
+          str_cb(udata, buf, avail_len, delim_end - buf, udesc);
+        } // else { TODO: packed arrays }
+        buf = delim_end;  // Could be >end.
       }
-    } else {  /* Scalar (non-delimited) value. */
+    } else {
+      // Scalar (non-delimited) value.
       switch(ft) {
-        case 0:  /* Client elected to skip. */
-          UPB_CHECK(skip_wire_value(buf, end, tag.wire_type, &buf));
+        case 0:  // Client elected to skip.
+          CHECK(skip_wire_value(buf, end, tag.wire_type, &buf));
           break;
-        case GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP:
-          UPB_CHECK(push_stack_frame(s, start, 0, udesc, &submsg_end));
+        case UPB_TYPENUM(GROUP):
+          CHECK(push(s, start, 0, udesc, &submsg_end));
           break;
         default:
-          UPB_CHECK(value_cb(udata, buf, end, udesc, &buf));
+          CHECK(value_cb(udata, buf, end, udesc, &buf));
           break;
       }
     }
 
-    while(buf == submsg_end) submsg_end = pop_stack_frame(s, start);
-    //while(buf < s->packed_end)  /* packed arrays. */
-    //  UPB_CHECK(value_cb(udata, buf, end, udesc, &buf));
+    while(buf == submsg_end)
+      submsg_end = pop(s, start);
+    // while(buf < s->packed_end) { TODO: packed arrays }
     completed = buf;
   }
 
+err:
   *read = (char*)completed - (char*)start;
   s->completed_offset += *read;
   return status;
diff --git a/src/upb_parse.h b/src/upb_parse.h
index 1454dd5..a8f4294 100644
--- a/src/upb_parse.h
+++ b/src/upb_parse.h
@@ -74,9 +74,8 @@
 typedef void (*upb_submsg_end_cb)(void *udata);
 
 struct upb_stream_parser {
-  /* For delimited submsgs, counts from the submsg len down to zero.
-   * For group submsgs, counts from zero down to the negative len. */
-  uint32_t stack[UPB_MAX_NESTING], *top, *limit;
+  // Stack entries store the offset where the submsg ends (for groups, 0).
+  size_t stack[UPB_MAX_NESTING], *top, *limit;
   size_t completed_offset;
   void *udata;
   upb_tag_cb          tag_cb;
diff --git a/src/upb_struct.h b/src/upb_struct.h
index 9c1bb2e..c83978f 100644
--- a/src/upb_struct.h
+++ b/src/upb_struct.h
@@ -29,6 +29,10 @@
   return head->refcount == 0 && head->refs == NULL;
 }
 
+INLINE bool upb_mmhead_only(struct upb_mmhead *head) {
+  return head->refcount == 1 && head->refs == NULL;
+}
+
 INLINE bool upb_mmhead_unref(struct upb_mmhead *head) {
   head->refcount--;
   return upb_mmhead_norefs(head);
@@ -57,7 +61,7 @@
   struct upb_msg_fielddef *fielddef;  /* Defines the type of the array. */
   union upb_value_ptr elements;
   upb_arraylen_t len;     /* Number of elements in "elements". */
-  upb_arraylen_t size;    /* Memory we own (0 if by reference). */
+  upb_arraylen_t size;    /* Memory we own. */
 };
 
 struct upb_string {
diff --git a/src/upb_text.c b/src/upb_text.c
index 6d43152..bed4b43 100644
--- a/src/upb_text.c
+++ b/src/upb_text.c
@@ -99,7 +99,7 @@
     if(upb_isarray(f)) {
       struct upb_array *arr = *p.arr;
       for(uint32_t j = 0; j < arr->len; j++) {
-        union upb_value_ptr elem_p = upb_array_getelementptr(arr, j, f->type);
+        union upb_value_ptr elem_p = upb_array_getelementptr(arr, j);
         printval(printer, elem_p, f, fd, stream);
       }
     } else {