diff --git a/README.android b/README.android
index 36284d6..c467de4 100644
--- a/README.android
+++ b/README.android
@@ -20,6 +20,9 @@
 - Synced the WebP Code (Encoder/Decoder) with the head change#Ia53f845b
   - Added three color-spaces viz ARGB_8888, RGBA_4444, RGB_565
     supported by Android.
+- Fixed the Endian'ness bug for Color-Configs (RGB_565 & ARGB_4444).
+  The fix is similar to jpeglib handling for JCS_RGB_565 & JCS_RGBA_8888
+  color configs. Added the code under "ANDROID_WEBP_RGB" flag.
 
 The Android.mk file creates WebP Decoder and Encoder static libraries which
 can be added to any application by Adding to LOCAL_STATIC_LIBRARIES
diff --git a/include/webp/decode.h b/include/webp/decode.h
index 195109e..ccb4c36 100644
--- a/include/webp/decode.h
+++ b/include/webp/decode.h
@@ -139,12 +139,14 @@
 typedef struct {
   WEBP_CSP_MODE colorspace;  // Colorspace.
   int width, height;         // Dimensions.
-  int is_external_memory;    // If true, the *memory pointer is not owned.
+  int is_external_memory;    // If true, 'internal_memory' pointer is not used.
   union {
     WebPRGBABuffer RGBA;
     WebPYUVABuffer YUVA;
-  } u;                       // nameless union of buffer parameters.
-  uint8_t* memory;           // main pointer (when is_external_memory is false)
+  } u;                       // Nameless union of buffer parameters.
+  uint8_t* private_memory;   // Internally allocated memory (only when
+                             // is_external_memory is false). Should not be used
+                             // externally, but accessed via the buffer union.
 } WebPDecBuffer;
 
 // Internal, version-checked, entry point
@@ -232,7 +234,7 @@
     uint8_t* u, int u_size, int u_stride,
     uint8_t* v, int v_size, int v_stride);
 
-// Deletes the WebpBuffer object and associated memory. Must always be called
+// Deletes the WebPIDecoder object and associated memory. Must always be called
 // if WebPINew, WebPINewRGB or WebPINewYUV succeeded.
 WEBP_EXTERN(void) WebPIDelete(WebPIDecoder* const idec);
 
@@ -327,7 +329,7 @@
     const uint8_t*, uint32_t, WebPBitstreamFeatures* const, int);
 
 // Retrieve features from the bitstream. The *features structure is filled
-// with informations gathered from the bitstream.
+// with information gathered from the bitstream.
 // Returns false in case of error or version mismatch.
 // In case of error, features->bitstream_status will reflect the error code.
 static inline
diff --git a/include/webp/decode_vp8.h b/include/webp/decode_vp8.h
index 6aa07c6..af276ad 100644
--- a/include/webp/decode_vp8.h
+++ b/include/webp/decode_vp8.h
@@ -21,7 +21,7 @@
 //-----------------------------------------------------------------------------
 // Lower-level API
 //
-// Thes functions provide fine-grained control of the decoding process.
+// These functions provide fine-grained control of the decoding process.
 // The call flow should resemble:
 //
 //   VP8Io io;
@@ -105,7 +105,7 @@
 
 // Set the custom IO function pointers and user-data. The setter for IO hooks
 // should be called before initiating incremental decoding. Returns true if
-// WebPIdecoder object is successfully modified, false otherwise.
+// WebPIDecoder object is successfully modified, false otherwise.
 WEBP_EXTERN(int) WebPISetIOHooks(WebPIDecoder* const idec,
                                  VP8IoPutHook put,
                                  VP8IoSetupHook setup,
diff --git a/include/webp/encode.h b/include/webp/encode.h
index 176d6a6..af6f0a2 100644
--- a/include/webp/encode.h
+++ b/include/webp/encode.h
@@ -113,14 +113,14 @@
 
 typedef struct WebPPicture WebPPicture;   // main structure for I/O
 
-// non-essential structure for storing auxilliary statistics
+// non-essential structure for storing auxiliary statistics
 typedef struct {
   float PSNR[4];          // peak-signal-to-noise ratio for Y/U/V/All
   int coded_size;         // final size
   int block_count[3];     // number of intra4/intra16/skipped macroblocks
-  int header_bytes[2];    // approximative number of bytes spent for header
+  int header_bytes[2];    // approximate number of bytes spent for header
                           // and mode-partition #0
-  int residual_bytes[3][4];  // approximative number of bytes spent for
+  int residual_bytes[3][4];  // approximate number of bytes spent for
                              // DC/AC/uv coefficients for each (0..3) segments.
   int segment_size[4];    // number of macroblocks in each segments
   int segment_quant[4];   // quantizer values for each segments
@@ -256,7 +256,7 @@
 //-----------------------------------------------------------------------------
 // Main call
 
-// Main encoding call, after config and picture have been initialiazed.
+// Main encoding call, after config and picture have been initialized.
 // 'picture' must be less than 16384x16384 in dimension, and the 'config' object
 // must be a valid one.
 // Returns false in case of error, true otherwise.
diff --git a/src/dec/buffer.c b/src/dec/buffer.c
index ad868e5..c433d63 100644
--- a/src/dec/buffer.c
+++ b/src/dec/buffer.c
@@ -60,7 +60,7 @@
     return VP8_STATUS_INVALID_PARAM;
   }
 
-  if (!buffer->is_external_memory && buffer->memory == NULL) {
+  if (!buffer->is_external_memory && buffer->private_memory == NULL) {
     uint8_t* output;
     WEBP_CSP_MODE mode = buffer->colorspace;
     int stride;
@@ -87,7 +87,7 @@
       return VP8_STATUS_INVALID_PARAM;
     }
 
-    buffer->memory = output = (uint8_t*)malloc((size_t)total_size);
+    buffer->private_memory = output = (uint8_t*)malloc((size_t)total_size);
     if (output == NULL) {
       return VP8_STATUS_OUT_OF_MEMORY;
     }
@@ -164,8 +164,8 @@
 void WebPFreeDecBuffer(WebPDecBuffer* const buffer) {
   if (buffer) {
     if (!buffer->is_external_memory)
-      free(buffer->memory);
-    buffer->memory = NULL;
+      free(buffer->private_memory);
+    buffer->private_memory = NULL;
   }
 }
 
@@ -173,9 +173,9 @@
                        WebPDecBuffer* const dst) {
   if (src && dst) {
     *dst = *src;
-    if (src->memory) {
+    if (src->private_memory) {
       dst->is_external_memory = 1;   // dst buffer doesn't own the memory.
-      dst->memory = NULL;
+      dst->private_memory = NULL;
     }
   }
 }
@@ -184,9 +184,9 @@
 void WebPGrabDecBuffer(WebPDecBuffer* const src, WebPDecBuffer* const dst) {
   if (src && dst) {
     *dst = *src;
-    if (src->memory) {
+    if (src->private_memory) {
       src->is_external_memory = 1;   // src relinquishes ownership
-      src->memory = NULL;
+      src->private_memory = NULL;
     }
   }
 }
diff --git a/src/dec/idec.c b/src/dec/idec.c
index 628ff61..1e51f0d 100644
--- a/src/dec/idec.c
+++ b/src/dec/idec.c
@@ -229,6 +229,12 @@
 //------------------------------------------------------------------------------
 
 static VP8StatusCode IDecError(WebPIDecoder* idec, VP8StatusCode error) {
+  if (idec->state_ == STATE_DATA) {
+    VP8Io* const io = &idec->io_;
+    if (io->teardown) {
+      io->teardown(io);
+    }
+  }
   idec->state_ = STATE_ERROR;
   return error;
 }
@@ -323,14 +329,16 @@
     return IDecError(idec, dec->status_);
   }
 
-  // Finish setting up the decoding parameter
-  if (VP8FinishFrameSetup(dec, io) != VP8_STATUS_OK) {
-    return IDecError(idec, dec->status_);
-  }
-
   if (!CopyParts0Data(idec)) {
     return IDecError(idec, VP8_STATUS_OUT_OF_MEMORY);
   }
+
+  // Finish setting up the decoding parameters.
+  if (VP8FinishFrameSetup(dec, io) != VP8_STATUS_OK) {
+    return IDecError(idec, dec->status_);
+  }
+  // Note: past this point, teardown() must always be called
+  // in case of error.
   idec->state_ = STATE_DATA;
   return VP8_STATUS_OK;
 }
diff --git a/src/dec/yuv.h b/src/dec/yuv.h
index 357aaa7..5f16ee6 100644
--- a/src/dec/yuv.h
+++ b/src/dec/yuv.h
@@ -14,6 +14,14 @@
 
 #include "webp/decode_vp8.h"
 
+/*
+ * Define ANDROID_WEBP_RGB to enable specific optimizations for Android
+ * RGBA_4444 & RGB_565 color support.
+ *
+ */
+
+#define ANDROID_WEBP_RGB
+
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif
@@ -42,10 +50,17 @@
   const int r_off = VP8kVToR[v];
   const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX;
   const int b_off = VP8kUToB[u];
+#ifdef ANDROID_WEBP_RGB
+  rgb[1] = ((VP8kClip[y + r_off - YUV_RANGE_MIN] & 0xf8) |
+            (VP8kClip[y + g_off - YUV_RANGE_MIN] >> 5));
+  rgb[0] = (((VP8kClip[y + g_off - YUV_RANGE_MIN] << 3) & 0xe0) |
+            (VP8kClip[y + b_off - YUV_RANGE_MIN] >> 3));
+#else
   rgb[0] = ((VP8kClip[y + r_off - YUV_RANGE_MIN] & 0xf8) |
             (VP8kClip[y + g_off - YUV_RANGE_MIN] >> 5));
   rgb[1] = (((VP8kClip[y + g_off - YUV_RANGE_MIN] << 3) & 0xe0) |
             (VP8kClip[y + b_off - YUV_RANGE_MIN] >> 3));
+#endif
 }
 
 static inline void VP8YuvToArgbKeepA(uint8_t y, uint8_t u, uint8_t v,
@@ -65,15 +80,24 @@
   const int r_off = VP8kVToR[v];
   const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX;
   const int b_off = VP8kUToB[u];
-  // Don't update Aplha (last 4 bits of argb[1])
+#ifdef ANDROID_WEBP_RGB
+  argb[1] = ((VP8kClip4Bits[y + r_off - YUV_RANGE_MIN] << 4) |
+             VP8kClip4Bits[y + g_off - YUV_RANGE_MIN]);
+  argb[0] = (argb[0] & 0x0f) | (VP8kClip4Bits[y + b_off - YUV_RANGE_MIN] << 4);
+#else
   argb[0] = ((VP8kClip4Bits[y + r_off - YUV_RANGE_MIN] << 4) |
              VP8kClip4Bits[y + g_off - YUV_RANGE_MIN]);
   argb[1] = (argb[1] & 0x0f) | (VP8kClip4Bits[y + b_off - YUV_RANGE_MIN] << 4);
+#endif
 }
 
 static inline void VP8YuvToRgba4444(uint8_t y, uint8_t u, uint8_t v,
                                     uint8_t* const argb) {
+#ifdef ANDROID_WEBP_RGB
+  argb[0] = 0x0f;
+#else
   argb[1] = 0x0f;
+#endif
   VP8YuvToRgba4444KeepA(y, u, v, argb);
 }
 
diff --git a/src/enc/frame.c b/src/enc/frame.c
index 5bc0731..d0270d7 100644
--- a/src/enc/frame.c
+++ b/src/enc/frame.c
@@ -58,8 +58,6 @@
   VP8Proba* const proba = &enc->proba_;
   if (precalc_cost) VP8CalculateLevelCosts(proba);
   proba->nb_skip_ = 0;
-  proba->nb_i4_ = 0;
-  proba->nb_i16_ = 0;
 }
 
 //-----------------------------------------------------------------------------
diff --git a/src/enc/vp8enci.h b/src/enc/vp8enci.h
index b9c476d..2be079e 100644
--- a/src/enc/vp8enci.h
+++ b/src/enc/vp8enci.h
@@ -187,7 +187,7 @@
   StatsArray stats_[NUM_TYPES][NUM_BANDS];       // 7.4k
   CostArray level_cost_[NUM_TYPES][NUM_BANDS];   // 11.4k
   int use_skip_proba_;      // Note: we always use skip_proba for now.
-  int nb_skip_, nb_i4_, nb_i16_;   // block type counters
+  int nb_skip_;             // number of skipped blocks
 } VP8Proba;
 
 // Filter parameters. Not actually used in the code (we don't perform
