refresh from skia/trunk

support for SSE2 blitrow optimizations
fix flattening in 2-point gradient
fix rotation in colormatrix
diff --git a/include/core/SkColorPriv.h b/include/core/SkColorPriv.h
index 5129ac6..15b4d6a 100644
--- a/include/core/SkColorPriv.h
+++ b/include/core/SkColorPriv.h
@@ -50,7 +50,7 @@
 //  The caller may want negative values, so keep all params signed (int)
 //  so we don't accidentally slip into unsigned math and lose the sign
 //  extension when we shift (in SkAlphaMul)
-inline int SkAlphaBlend(int src, int dst, int scale256) {
+static inline int SkAlphaBlend(int src, int dst, int scale256) {
     SkASSERT((unsigned)scale256 <= 256);
     return dst + SkAlphaMul(src - dst, scale256);
 }
@@ -200,7 +200,7 @@
 #define SkB32Assert(b)  SkASSERT((unsigned)(b) <= SK_B32_MASK)
 
 #ifdef SK_DEBUG
-    inline void SkPMColorAssert(SkPMColor c) {
+    static inline void SkPMColorAssert(SkPMColor c) {
         unsigned a = SkGetPackedA32(c);
         unsigned r = SkGetPackedR32(c);
         unsigned g = SkGetPackedG32(c);
@@ -215,7 +215,7 @@
     #define SkPMColorAssert(c)
 #endif
 
-inline SkPMColor SkPackARGB32(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
+static inline SkPMColor SkPackARGB32(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
     SkA32Assert(a);
     SkASSERT(r <= a);
     SkASSERT(g <= a);
@@ -227,7 +227,7 @@
 
 extern const uint32_t gMask_00FF00FF;
 
-inline uint32_t SkAlphaMulQ(uint32_t c, unsigned scale) {
+static inline uint32_t SkAlphaMulQ(uint32_t c, unsigned scale) {
     uint32_t mask = gMask_00FF00FF;
 //    uint32_t mask = 0xFF00FF;
 
@@ -236,11 +236,11 @@
     return (rb & mask) | (ag & ~mask);
 }
 
-inline SkPMColor SkPMSrcOver(SkPMColor src, SkPMColor dst) {
+static inline SkPMColor SkPMSrcOver(SkPMColor src, SkPMColor dst) {
     return src + SkAlphaMulQ(dst, SkAlpha255To256(255 - SkGetPackedA32(src)));
 }
 
-inline SkPMColor SkBlendARGB32(SkPMColor src, SkPMColor dst, U8CPU aa) {
+static inline SkPMColor SkBlendARGB32(SkPMColor src, SkPMColor dst, U8CPU aa) {
     SkASSERT((unsigned)aa <= 255);
 
     unsigned src_scale = SkAlpha255To256(aa);
@@ -257,18 +257,15 @@
 #define SkB32ToB16_MACRO(b)   ((unsigned)(b) >> (SK_B32_BITS - SK_B16_BITS))
 
 #ifdef SK_DEBUG
-    inline unsigned SkR32ToR16(unsigned r)
-    {
+    static inline unsigned SkR32ToR16(unsigned r) {
         SkR32Assert(r);
         return SkR32ToR16_MACRO(r);
     }
-    inline unsigned SkG32ToG16(unsigned g)
-    {
+    static inline unsigned SkG32ToG16(unsigned g) {
         SkG32Assert(g);
         return SkG32ToG16_MACRO(g);
     }
-    inline unsigned SkB32ToB16(unsigned b)
-    {
+    static inline unsigned SkB32ToB16(unsigned b) {
         SkB32Assert(b);
         return SkB32ToB16_MACRO(b);
     }
@@ -282,16 +279,14 @@
 #define SkPacked32ToG16(c)  (((unsigned)(c) >> (SK_G32_SHIFT + SK_G32_BITS - SK_G16_BITS)) & SK_G16_MASK)
 #define SkPacked32ToB16(c)  (((unsigned)(c) >> (SK_B32_SHIFT + SK_B32_BITS - SK_B16_BITS)) & SK_B16_MASK)
 
-inline U16CPU SkPixel32ToPixel16(SkPMColor c)
-{
+static inline U16CPU SkPixel32ToPixel16(SkPMColor c) {
     unsigned r = ((c >> (SK_R32_SHIFT + (8 - SK_R16_BITS))) & SK_R16_MASK) << SK_R16_SHIFT;
     unsigned g = ((c >> (SK_G32_SHIFT + (8 - SK_G16_BITS))) & SK_G16_MASK) << SK_G16_SHIFT;
     unsigned b = ((c >> (SK_B32_SHIFT + (8 - SK_B16_BITS))) & SK_B16_MASK) << SK_B16_SHIFT;
     return r | g | b;
 }
 
-inline U16CPU SkPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b)
-{
+static inline U16CPU SkPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) {
     return  (SkR32ToR16(r) << SK_R16_SHIFT) |
             (SkG32ToG16(g) << SK_G16_SHIFT) |
             (SkB32ToB16(b) << SK_B16_SHIFT);
@@ -304,8 +299,7 @@
 
 #define SkShouldDitherXY(x, y)  (((x) ^ (y)) & 1)
 
-inline uint16_t SkDitherPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b)
-{
+static inline uint16_t SkDitherPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b) {
     r = ((r << 1) - ((r >> (8 - SK_R16_BITS) << (8 - SK_R16_BITS)) | (r >> SK_R16_BITS))) >> (8 - SK_R16_BITS);
     g = ((g << 1) - ((g >> (8 - SK_G16_BITS) << (8 - SK_G16_BITS)) | (g >> SK_G16_BITS))) >> (8 - SK_G16_BITS);
     b = ((b << 1) - ((b >> (8 - SK_B16_BITS) << (8 - SK_B16_BITS)) | (b >> SK_B16_BITS))) >> (8 - SK_B16_BITS);
@@ -313,8 +307,7 @@
     return SkPackRGB16(r, g, b);
 }
 
-inline uint16_t SkDitherPixel32ToPixel16(SkPMColor c)
-{
+static inline uint16_t SkDitherPixel32ToPixel16(SkPMColor c) {
     return SkDitherPack888ToRGB16(SkGetPackedR32(c), SkGetPackedG32(c), SkGetPackedB32(c));
 }
 
@@ -325,8 +318,7 @@
     to saturate properly (and not overflow). If we take the 8 bits as is, it is
     possible to overflow.
 */
-static inline uint32_t SkPMColorToExpanded16x5(SkPMColor c)
-{
+static inline uint32_t SkPMColorToExpanded16x5(SkPMColor c) {
     unsigned sr = SkPacked32ToR16(c);
     unsigned sg = SkPacked32ToG16(c);
     unsigned sb = SkPacked32ToB16(c);
@@ -361,16 +353,15 @@
 ////////////////////////////////////////////////////////////////////////////////////////////
 // Convert a 16bit pixel to a 32bit pixel
 
-inline unsigned SkR16ToR32(unsigned r)
-{
+static inline unsigned SkR16ToR32(unsigned r) {
     return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8));
 }
-inline unsigned SkG16ToG32(unsigned g)
-{
+
+static inline unsigned SkG16ToG32(unsigned g) {
     return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8));
 }
-inline unsigned SkB16ToB32(unsigned b)
-{
+
+static inline unsigned SkB16ToB32(unsigned b) {
     return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8));
 }
 
@@ -378,8 +369,7 @@
 #define SkPacked16ToG32(c)      SkG16ToG32(SkGetPackedG16(c))
 #define SkPacked16ToB32(c)      SkB16ToB32(SkGetPackedB16(c))
 
-inline SkPMColor SkPixel16ToPixel32(U16CPU src)
-{
+static inline SkPMColor SkPixel16ToPixel32(U16CPU src) {
     SkASSERT(src == SkToU16(src));
 
     unsigned    r = SkPacked16ToR32(src);
@@ -423,8 +413,7 @@
 #define SkG32To4444(g)  ((unsigned)(g) >> 4)
 #define SkB32To4444(b)  ((unsigned)(b) >> 4)
 
-static inline U8CPU SkReplicateNibble(unsigned nib)
-{
+static inline U8CPU SkReplicateNibble(unsigned nib) {
     SkASSERT(nib <= 0xF);
     return (nib << 4) | nib;
 }
@@ -445,8 +434,7 @@
 #define SkPacked4444ToB32(c)    SkReplicateNibble(SkGetPackedB4444(c))
 
 #ifdef SK_DEBUG
-static inline void SkPMColor16Assert(U16CPU c)
-{
+static inline void SkPMColor16Assert(U16CPU c) {
     unsigned a = SkGetPackedA4444(c);
     unsigned r = SkGetPackedR4444(c);
     unsigned g = SkGetPackedG4444(c);
@@ -461,15 +449,13 @@
 #define SkPMColor16Assert(c)
 #endif
 
-static inline unsigned SkAlpha15To16(unsigned a)
-{
+static inline unsigned SkAlpha15To16(unsigned a) {
     SkASSERT(a <= 0xF);
     return a + (a >> 3);
 }
 
 #ifdef SK_DEBUG
-    static inline int SkAlphaMul4(int value, int scale)
-    {
+    static inline int SkAlphaMul4(int value, int scale) {
         SkASSERT((unsigned)scale <= 0x10);
         return value * scale >> 4;
     }
@@ -477,27 +463,23 @@
     #define SkAlphaMul4(value, scale)   ((value) * (scale) >> 4)
 #endif
 
-static inline unsigned SkR4444ToR565(unsigned r)
-{
+static inline unsigned SkR4444ToR565(unsigned r) {
     SkASSERT(r <= 0xF);
     return (r << (SK_R16_BITS - 4)) | (r >> (8 - SK_R16_BITS));
 }
 
-static inline unsigned SkG4444ToG565(unsigned g)
-{
+static inline unsigned SkG4444ToG565(unsigned g) {
     SkASSERT(g <= 0xF);
     return (g << (SK_G16_BITS - 4)) | (g >> (8 - SK_G16_BITS));
 }
 
-static inline unsigned SkB4444ToB565(unsigned b)
-{
+static inline unsigned SkB4444ToB565(unsigned b) {
     SkASSERT(b <= 0xF);
     return (b << (SK_B16_BITS - 4)) | (b >> (8 - SK_B16_BITS));
 }
 
 static inline SkPMColor16 SkPackARGB4444(unsigned a, unsigned r,
-                                         unsigned g, unsigned b)
-{
+                                         unsigned g, unsigned b) {
     SkASSERT(a <= 0xF);
     SkASSERT(r <= a);
     SkASSERT(g <= a);
@@ -509,8 +491,7 @@
 
 extern const uint16_t gMask_0F0F;
 
-inline U16CPU SkAlphaMulQ4(U16CPU c, unsigned scale)
-{
+static inline U16CPU SkAlphaMulQ4(U16CPU c, unsigned scale) {
     SkASSERT(scale <= 16);
 
     const unsigned mask = 0xF0F;    //gMask_0F0F;
@@ -529,8 +510,7 @@
 /** Expand the SkPMColor16 color into a 32bit value that can be scaled all at
     once by a value up to 16. Used in conjunction with SkCompact_4444.
 */
-inline uint32_t SkExpand_4444(U16CPU c)
-{
+static inline uint32_t SkExpand_4444(U16CPU c) {
     SkASSERT(c == (uint16_t)c);
     
     const unsigned mask = 0xF0F;    //gMask_0F0F;
@@ -544,14 +524,12 @@
     would add 2 more instructions, slow us down. It is up to the caller to
     perform the cast if needed.
 */
-static inline U16CPU SkCompact_4444(uint32_t c)
-{
+static inline U16CPU SkCompact_4444(uint32_t c) {
     const unsigned mask = 0xF0F;    //gMask_0F0F;
     return (c & mask) | ((c >> 12) & ~mask);
 }
 
-static inline uint16_t SkSrcOver4444To16(SkPMColor16 s, uint16_t d)
-{
+static inline uint16_t SkSrcOver4444To16(SkPMColor16 s, uint16_t d) {
     unsigned sa = SkGetPackedA4444(s);
     unsigned sr = SkR4444ToR565(SkGetPackedR4444(s));
     unsigned sg = SkG4444ToG565(SkGetPackedG4444(s));
@@ -576,15 +554,13 @@
     return SkPackRGB16(sr + dr, sg + dg, sb + db);
 }
 
-static inline uint16_t SkBlend4444To16(SkPMColor16 src, uint16_t dst, int scale16)
-{
+static inline uint16_t SkBlend4444To16(SkPMColor16 src, uint16_t dst, int scale16) {
     SkASSERT((unsigned)scale16 <= 16);
     
     return SkSrcOver4444To16(SkAlphaMulQ4(src, scale16), dst);
 }
 
-static inline uint16_t SkBlend4444(SkPMColor16 src, SkPMColor16 dst, int scale16)
-{
+static inline uint16_t SkBlend4444(SkPMColor16 src, SkPMColor16 dst, int scale16) {
     SkASSERT((unsigned)scale16 <= 16);
     
     uint32_t src32 = SkExpand_4444(src) * scale16;
@@ -600,8 +576,7 @@
     return SkCompact_4444((src32 + dst32) >> 4);
 }
 
-static inline SkPMColor SkPixel4444ToPixel32(U16CPU c)
-{
+static inline SkPMColor SkPixel4444ToPixel32(U16CPU c) {
     uint32_t d = (SkGetPackedA4444(c) << SK_A32_SHIFT) |
                  (SkGetPackedR4444(c) << SK_R32_SHIFT) |
                  (SkGetPackedG4444(c) << SK_G32_SHIFT) |
@@ -609,8 +584,7 @@
     return d | (d << 4);
 }
 
-static inline SkPMColor16 SkPixel32ToPixel4444(SkPMColor c)
-{
+static inline SkPMColor16 SkPixel32ToPixel4444(SkPMColor c) {
     return  (((c >> (SK_A32_SHIFT + 4)) & 0xF) << SK_A4444_SHIFT) |
     (((c >> (SK_R32_SHIFT + 4)) & 0xF) << SK_R4444_SHIFT) |
     (((c >> (SK_G32_SHIFT + 4)) & 0xF) << SK_G4444_SHIFT) |
@@ -619,8 +593,7 @@
 
 // cheap 2x2 dither
 static inline SkPMColor16 SkDitherARGB32To4444(U8CPU a, U8CPU r,
-                                               U8CPU g, U8CPU b)
-{
+                                               U8CPU g, U8CPU b) {
     a = ((a << 1) - ((a >> 4 << 4) | (a >> 4))) >> 4;
     r = ((r << 1) - ((r >> 4 << 4) | (r >> 4))) >> 4;
     g = ((g << 1) - ((g >> 4 << 4) | (g >> 4))) >> 4;
@@ -629,8 +602,7 @@
     return SkPackARGB4444(a, r, g, b);
 }
 
-static inline SkPMColor16 SkDitherPixel32To4444(SkPMColor c)
-{
+static inline SkPMColor16 SkDitherPixel32To4444(SkPMColor c) {
     return SkDitherARGB32To4444(SkGetPackedA32(c), SkGetPackedR32(c),
                                 SkGetPackedG32(c), SkGetPackedB32(c));
 }
@@ -639,8 +611,7 @@
     Transforms a normal ARGB_8888 into the same byte order as
     expanded ARGB_4444, but keeps each component 8bits
 */
-static inline uint32_t SkExpand_8888(SkPMColor c)
-{
+static inline uint32_t SkExpand_8888(SkPMColor c) {
     return  (((c >> SK_R32_SHIFT) & 0xFF) << 24) |
             (((c >> SK_G32_SHIFT) & 0xFF) <<  8) |
             (((c >> SK_B32_SHIFT) & 0xFF) << 16) |
@@ -650,8 +621,7 @@
 /*  Undo the operation of SkExpand_8888, turning the argument back into
     a SkPMColor.
 */
-static inline SkPMColor SkCompact_8888(uint32_t c)
-{
+static inline SkPMColor SkCompact_8888(uint32_t c) {
     return  (((c >> 24) & 0xFF) << SK_R32_SHIFT) |
             (((c >>  8) & 0xFF) << SK_G32_SHIFT) |
             (((c >> 16) & 0xFF) << SK_B32_SHIFT) |
@@ -662,8 +632,7 @@
     but this routine just keeps the high 4bits of each component in the low
     4bits of the result (just like a newly expanded PMColor16).
 */
-static inline uint32_t SkExpand32_4444(SkPMColor c)
-{
+static inline uint32_t SkExpand32_4444(SkPMColor c) {
     return  (((c >> (SK_R32_SHIFT + 4)) & 0xF) << 24) |
             (((c >> (SK_G32_SHIFT + 4)) & 0xF) <<  8) |
             (((c >> (SK_B32_SHIFT + 4)) & 0xF) << 16) |
diff --git a/include/effects/SkAvoidXfermode.h b/include/effects/SkAvoidXfermode.h
index 32bc049..2803c07 100644
--- a/include/effects/SkAvoidXfermode.h
+++ b/include/effects/SkAvoidXfermode.h
@@ -31,16 +31,21 @@
         kTargetColor_Mode   //!< draw only on top of the opColor
     };
 
-    /** This xfermode will draw the src everywhere except on top of the opColor
-        or, depending on the Mode, draw only on top of the opColor.
-        @param opColor  the color to avoid (or to target depending on Mode).
-                        note: the alpha in opColor is ignored
-        @param tolerance    How closely we compare a pixel to the opColor.
-                            0 - only operate if exact match
-                            255 - maximum gradation (blending) based on how
-                            similar the pixel is to our opColor (max tolerance)
-        @param mode If we should avoid or target the opColor
-    */
+    /** This xfermode draws, or doesn't draw, based on the destination's
+        distance from an op-color.
+     
+        There are two modes, and each mode interprets a tolerance value.
+     
+        Avoid: In this mode, drawing is allowed only on destination pixels that
+               are different from the op-color.
+               Tolerance near 0: avoid anything close to the op-color
+               Tolerance near 255: avoid only colors very close to the op-color
+     
+        Target: In this mode, drawing only occurs on destination pixels that
+                are similar to the op-color
+                Tolerance near 0: draw on colors that are very close to op-color
+                Tolerance near 255: draw on colors that  to the op-color
+     */
     SkAvoidXfermode(SkColor opColor, U8CPU tolerance, Mode mode);
 
     // overrides from SkXfermode
diff --git a/include/views/SkView.h b/include/views/SkView.h
index 050deba..1bdd0b6 100644
--- a/include/views/SkView.h
+++ b/include/views/SkView.h
@@ -155,8 +155,14 @@
     /** Send the event to the view's parent, and its parent etc. until one of them
         returns true from its onEvent call. This view is returned. If no parent handles
         the event, null is returned.
-    */
+     */
     SkView*     sendEventToParents(const SkEvent&);
+    /** Send the query to the view's parent, and its parent etc. until one of them
+        returns true from its onQuery call. This view is returned. If no parent handles
+        the query, null is returned.
+     */
+    SkView* sendQueryToParents(SkEvent*);
+
     /** Depricated helper function. Just call event->post(sinkID, delay);
     */
     bool    postEvent(SkEvent* evt, SkEventSinkID sinkID, SkMSec delay) { return evt->post(sinkID, delay); }
diff --git a/src/core/SkBlitRow.h b/src/core/SkBlitRow.h
index dbbd84d..7d48b0c 100644
--- a/src/core/SkBlitRow.h
+++ b/src/core/SkBlitRow.h
@@ -65,18 +65,21 @@
         Color32(row, row, count, color);
     }
 
+    /** These static functions are called by the Factory and Factory32
+        functions, and should return either NULL, or a
+        platform-specific function-ptr to be used in place of the
+        system default.
+     */
+
+    static Proc32 PlatformProcs32(unsigned flags);
+    static Proc PlatformProcs565(unsigned flags);
+    static Proc PlatformProcs4444(unsigned flags);
+
 private:
     enum {
         kFlags16_Mask = 7,
         kFlags32_Mask = 3
     };
-    /** These global arrays are indexed using the flags parameter to Factory,
-        and contain either NULL, or a platform-specific function-ptr to be used
-        in place of the system default.
-     */
-    static const Proc gPlatform_565_Procs[];
-    static const Proc gPlatform_4444_Procs[];
-    static const Proc32 gPlatform_Procs32[];
 };
 
 #endif
diff --git a/src/core/SkBlitRow_D16.cpp b/src/core/SkBlitRow_D16.cpp
index 66ac90e..07c42ce 100644
--- a/src/core/SkBlitRow_D16.cpp
+++ b/src/core/SkBlitRow_D16.cpp
@@ -242,13 +242,13 @@
 
     switch (config) {
         case SkBitmap::kRGB_565_Config:
-            proc = gPlatform_565_Procs[flags];
+            proc = PlatformProcs565(flags);
             if (NULL == proc) {
                 proc = gDefault_565_Procs[flags];
             }
             break;
         case SkBitmap::kARGB_4444_Config:
-            proc = gPlatform_4444_Procs[flags];
+            proc = PlatformProcs4444(flags);
             if (NULL == proc) {
                 proc = SkBlitRow_Factory_4444(flags);
             }
diff --git a/src/core/SkBlitRow_D32.cpp b/src/core/SkBlitRow_D32.cpp
index f67bb9a..0036025 100644
--- a/src/core/SkBlitRow_D32.cpp
+++ b/src/core/SkBlitRow_D32.cpp
@@ -78,7 +78,7 @@
     // just so we don't crash
     flags &= kFlags32_Mask;
     
-    SkBlitRow::Proc32 proc = gPlatform_Procs32[flags];
+    SkBlitRow::Proc32 proc = PlatformProcs32(flags);
     if (NULL == proc) {
         proc = gDefault_Procs32[flags];
     }
diff --git a/src/core/SkBlitter_4444.cpp b/src/core/SkBlitter_4444.cpp
index 736f8c3..81bbc48 100644
--- a/src/core/SkBlitter_4444.cpp
+++ b/src/core/SkBlitter_4444.cpp
@@ -82,9 +82,12 @@
         fRawColor16Other = fRawColor16;
     }
     
+#if 0 /// don't think this assertion is true, but need it be?
+
     // our dithered color will be the same or more opaque than the original
     // so use dithered to compute our scale
     SkASSERT(SkGetPackedA4444(fPMColor16Other) >= SkGetPackedA4444(fPMColor16));
+#endif
 
     fScale16 = SkAlpha15To16(SkGetPackedA4444(fPMColor16Other));
     if (16 == fScale16) {
diff --git a/src/core/SkGraphics.cpp b/src/core/SkGraphics.cpp
index 9859913..65a16e2 100644
--- a/src/core/SkGraphics.cpp
+++ b/src/core/SkGraphics.cpp
@@ -146,138 +146,9 @@
 
 #endif
 
-#ifdef SK_CAN_USE_FLOAT
-#include "SkFloatBits.h"
-
-static inline float fast_inc(float x) {
-    SkFloatIntUnion data;
-    data.fFloat = x;
-    data.fSignBitInt += 1;
-    return data.fFloat;
-}
-
-extern float dummy();
-static int time_math() {
-    SkMSec now;
-    int i;
-    int sum = 0;
-    const int repeat = 1000000;
-    float f;
-
-    f = dummy();
-    now = SkTime::GetMSecs();
-    for (i = repeat - 1; i >= 0; --i) {
-        sum += (int)f; f = fast_inc(f);
-        sum += (int)f; f = fast_inc(f);
-        sum += (int)f; f = fast_inc(f);
-        sum += (int)f; f = fast_inc(f);
-    }
-    SkDebugf("---- native cast %d\n", SkTime::GetMSecs() - now);
-
-    f = dummy();
-    now = SkTime::GetMSecs();
-    for (i = repeat - 1; i >= 0; --i) {
-        sum += SkFloatToIntCast(f); f = fast_inc(f);
-        sum += SkFloatToIntCast(f); f = fast_inc(f);
-        sum += SkFloatToIntCast(f); f = fast_inc(f);
-        sum += SkFloatToIntCast(f); f = fast_inc(f);
-    }
-    SkDebugf("---- hack cast %d\n", SkTime::GetMSecs() - now);
-
-    f = dummy();
-    now = SkTime::GetMSecs();
-    for (i = repeat - 1; i >= 0; --i) {
-        sum += (int)sk_float_floor(f + 0.5f); f = fast_inc(f);
-        sum += (int)sk_float_floor(f + 0.5f); f = fast_inc(f);
-        sum += (int)sk_float_floor(f + 0.5f); f = fast_inc(f);
-        sum += (int)sk_float_floor(f + 0.5f); f = fast_inc(f);
-    }
-    SkDebugf("---- native round %d\n", SkTime::GetMSecs() - now);
-    
-    f = dummy();
-    now = SkTime::GetMSecs();
-    for (i = repeat - 1; i >= 0; --i) {
-        sum += SkFloatToIntRound(f); f = fast_inc(f);
-        sum += SkFloatToIntRound(f); f = fast_inc(f);
-        sum += SkFloatToIntRound(f); f = fast_inc(f);
-        sum += SkFloatToIntRound(f); f = fast_inc(f);
-    }
-    SkDebugf("---- hack round %d\n", SkTime::GetMSecs() - now);
-    
-    f = dummy();
-    now = SkTime::GetMSecs();
-    for (i = repeat - 1; i >= 0; --i) {
-        sum += SkFloat2Bits(floorf(f)); f = fast_inc(f);
-        sum += SkFloat2Bits(floorf(f)); f = fast_inc(f);
-        sum += SkFloat2Bits(floorf(f)); f = fast_inc(f);
-        sum += SkFloat2Bits(floorf(f)); f = fast_inc(f);
-    }
-    SkDebugf("---- native floor %d\n", SkTime::GetMSecs() - now);
-    
-    f = dummy();
-    now = SkTime::GetMSecs();
-    for (i = repeat - 1; i >= 0; --i) {
-        sum += SkFloatToIntFloor(f); f = fast_inc(f);
-        sum += SkFloatToIntFloor(f); f = fast_inc(f);
-        sum += SkFloatToIntFloor(f); f = fast_inc(f);
-        sum += SkFloatToIntFloor(f); f = fast_inc(f);
-    }
-    SkDebugf("---- hack floor %d\n", SkTime::GetMSecs() - now);
-    
-    return sum;
-}
-
-#if 0
-static float time_intToFloat() {
-    const int repeat = 1000000;
-    int i, n;
-    SkMSec now;
-    float sum = 0;
-    
-    n = (int)dummy();
-    now = SkTime::GetMSecs();
-    for (i = repeat - 1; i >= 0; --i) {
-        sum += (float)n; n += 1;
-        sum += (float)n; n += 1;
-        sum += (float)n; n += 1;
-        sum += (float)n; n += 1;
-    }
-    SkDebugf("---- native i2f %d\n", SkTime::GetMSecs() - now);
-    
-    n = (int)dummy();
-    now = SkTime::GetMSecs();
-    for (i = repeat - 1; i >= 0; --i) {
-        sum += SkIntToFloatCast(n); n += 1;
-        sum += SkIntToFloatCast(n); n += 1;
-        sum += SkIntToFloatCast(n); n += 1;
-        sum += SkIntToFloatCast(n); n += 1;
-    }
-    SkDebugf("---- check i2f %d\n", SkTime::GetMSecs() - now);
-
-    n = (int)dummy();
-    now = SkTime::GetMSecs();
-    for (i = repeat - 1; i >= 0; --i) {
-        sum += SkIntToFloatCast_NoOverflowCheck(n); n += 1;
-        sum += SkIntToFloatCast_NoOverflowCheck(n); n += 1;
-        sum += SkIntToFloatCast_NoOverflowCheck(n); n += 1;
-        sum += SkIntToFloatCast_NoOverflowCheck(n); n += 1;
-    }
-    SkDebugf("---- nocheck i2f %d\n", SkTime::GetMSecs() - now);
-
-    return sum;
-}
-#endif
-#endif
-
-void SkGraphics::Init()
-{
+void SkGraphics::Init() {
     SkGlobals::Init();
 
-#ifdef SK_CAN_USE_FLOAT
-//    time_math();
-//    time_intToFloat();
-#endif
-    
 #ifdef BUILD_EMBOSS_TABLE
     SkEmbossMask_BuildTable();
 #endif
@@ -503,4 +374,3 @@
     return SkGlyphCache::SetCacheUsed(usageInBytes);
 }
 
-float dummy() { return 1.25f; }
diff --git a/src/core/SkScan_AntiPath.cpp b/src/core/SkScan_AntiPath.cpp
index c5504c6..7b24f73 100644
--- a/src/core/SkScan_AntiPath.cpp
+++ b/src/core/SkScan_AntiPath.cpp
@@ -366,7 +366,6 @@
     if (ir.isEmpty()) {
         return;
     }
-    SkASSERT(SkIntToScalar(ir.fTop) <= path.getBounds().fTop);
 
     // use bit-or since we expect all to pass, so no need to go slower with
     // a short-circuiting logical-or
diff --git a/src/core/SkScan_Path.cpp b/src/core/SkScan_Path.cpp
index e274579..8246376 100644
--- a/src/core/SkScan_Path.cpp
+++ b/src/core/SkScan_Path.cpp
@@ -446,11 +446,23 @@
 
 /* Much faster than worst_case_edge_count, but over estimates even more
 */
-static int cheap_worst_case_edge_count(const SkPath& path, size_t* storage)
-{
+static int cheap_worst_case_edge_count(const SkPath& path, size_t* storage) {
     int ptCount = path.getPoints(NULL, 0);
-    int edgeCount = ptCount;
-    *storage = edgeCount * sizeof(SkCubicEdge);
+    // worst case is curve, close, curve, close, as that is 
+    //     2 lines per pt, or             : pts * 2
+    //     2 quads + 1 line per 2 pts, or : pts * 3 / 2
+    //     3 cubics + 1 line per 3 pts    : pts * 4 / 3
+    int edgeCount = ptCount << 1;
+    // worst storage, due to relative size of different edge types, is
+    // quads * 3 / 2
+    size_t quadSize = (ptCount * 3 >> 1) * sizeof(SkQuadraticEdge);
+#if 0
+    size_t lineSize = (ptCount << 1) * sizeof(SkEdge);
+    size_t cubicSize = (ptCount * 3 / 4) * sizeof(SkCubicEdge);
+    SkASSERT(lineSize <= quadSize);
+    SkASSERT(cubicSize <= quadSize);
+#endif
+    *storage = quadSize;
     return edgeCount;
 }
 
diff --git a/src/effects/SkAvoidXfermode.cpp b/src/effects/SkAvoidXfermode.cpp
index eed4012..d26039c 100644
--- a/src/effects/SkAvoidXfermode.cpp
+++ b/src/effects/SkAvoidXfermode.cpp
@@ -174,7 +174,7 @@
     unsigned    opG = SkColorGetG(fOpColor) >> (8 - SK_G16_BITS);
     unsigned    opB = SkColorGetB(fOpColor) >> (8 - SK_R16_BITS);
     uint32_t    mul = fDistMul;
-    uint32_t    sub = (fDistMul - (1 << 14)) << 8;
+    uint32_t    sub = (fDistMul - (1 << 14)) << SK_R16_BITS;
 
     int MAX, mask;
     
@@ -193,7 +193,6 @@
         SkASSERT((unsigned)d <= 31);
         // convert from 0..31 to 0..32
         d += d >> 4;
-
         d = scale_dist_14(d, mul, sub);
         SkASSERT(d <= 32);
 
@@ -216,7 +215,7 @@
     unsigned    opG = SkColorGetG(fOpColor) >> 4;
     unsigned    opB = SkColorGetB(fOpColor) >> 4;
     uint32_t    mul = fDistMul;
-    uint32_t    sub = (fDistMul - (1 << 14)) << 8;
+    uint32_t    sub = (fDistMul - (1 << 14)) << 4;
     
     int MAX, mask;
     
@@ -233,8 +232,8 @@
         // now reverse d if we need to
         d = MAX + (d ^ mask) - mask;
         SkASSERT((unsigned)d <= 15);
-        d = SkAlpha255To256(d);
-        
+        // convert from 0..15 to 0..16
+        d += d >> 3;
         d = scale_dist_14(d, mul, sub);
         SkASSERT(d <= 16);
         
diff --git a/src/effects/SkGradientShader.cpp b/src/effects/SkGradientShader.cpp
index a30de6a..780ad18 100644
--- a/src/effects/SkGradientShader.cpp
+++ b/src/effects/SkGradientShader.cpp
@@ -1330,8 +1330,28 @@
         return SkNEW_ARGS(Two_Point_Radial_Gradient, (buffer));
     }
 
+    virtual void flatten(SkFlattenableWriteBuffer& buffer) {
+        this->INHERITED::flatten(buffer);
+        buffer.writeScalar(fDiff.fX);
+        buffer.writeScalar(fDiff.fY);
+        buffer.writeScalar(fStartRadius);
+        buffer.writeScalar(fDiffRadius);
+        buffer.writeScalar(fSr2D2);
+        buffer.writeScalar(fA);
+        buffer.writeScalar(fOneOverTwoA);
+    }
+    
 protected:
-    Two_Point_Radial_Gradient(SkFlattenableReadBuffer& buffer) : Gradient_Shader(buffer) {};
+    Two_Point_Radial_Gradient(SkFlattenableReadBuffer& buffer)
+            : Gradient_Shader(buffer) {
+        fDiff.fX = buffer.readScalar();
+        fDiff.fY = buffer.readScalar();
+        fStartRadius = buffer.readScalar();
+        fDiffRadius = buffer.readScalar();
+        fSr2D2 = buffer.readScalar();
+        fA = buffer.readScalar();
+        fOneOverTwoA = buffer.readScalar();
+    };
     virtual Factory getFactory() { return CreateProc; }
     virtual void onCacheReset() {}
 
diff --git a/src/images/SkImageRef.cpp b/src/images/SkImageRef.cpp
index 7ef5f40..60e01c6 100644
--- a/src/images/SkImageRef.cpp
+++ b/src/images/SkImageRef.cpp
@@ -6,6 +6,8 @@
 #include "SkTemplates.h"
 #include "SkThread.h"
 
+//#define DUMP_IMAGEREF_LIFECYCLE
+
 // can't be static, as SkImageRef_Pool needs to see it
 SkMutex gImageRefMutex;
 
@@ -160,6 +162,8 @@
         : INHERITED(buffer, &gImageRefMutex), fErrorInDecoding(false) {
     fConfig = (SkBitmap::Config)buffer.readU8();
     fSampleSize = buffer.readU8();
+    fDoDither = buffer.readBool();
+
     size_t length = buffer.readU32();
     fStream = SkNEW_ARGS(SkMemoryStream, (length));
     buffer.read((void*)fStream->getMemoryBase(), length);
@@ -173,6 +177,7 @@
 
     buffer.write8(fConfig);
     buffer.write8(fSampleSize);
+    buffer.writeBool(fDoDither);
     size_t length = fStream->getLength();
     buffer.write32(length);
     fStream->rewind();
diff --git a/src/opts/SkBlitRow_opts_arm.cpp b/src/opts/SkBlitRow_opts_arm.cpp
index be1cbdf..ae77bca 100644
--- a/src/opts/SkBlitRow_opts_arm.cpp
+++ b/src/opts/SkBlitRow_opts_arm.cpp
@@ -976,7 +976,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////
 
-const SkBlitRow::Proc SkBlitRow::gPlatform_565_Procs[] = {
+static const SkBlitRow::Proc platform_565_procs[] = {
     // no dither
     S32_D565_Opaque_PROC,
     S32_D565_Blend_PROC,
@@ -990,7 +990,7 @@
     NULL,   // S32A_D565_Blend_Dither
 };
 
-const SkBlitRow::Proc SkBlitRow::gPlatform_4444_Procs[] = {
+static const SkBlitRow::Proc platform_4444_procs[] = {
     // no dither
     NULL,   // S32_D4444_Opaque,
     NULL,   // S32_D4444_Blend,
@@ -1004,10 +1004,21 @@
     NULL,   // S32A_D4444_Blend_Dither
 };
 
-const SkBlitRow::Proc32 SkBlitRow::gPlatform_Procs32[] = {
+static const SkBlitRow::Proc32 platform_32_procs[] = {
     NULL,   // S32_Opaque,
     S32_Blend_BlitRow32_PROC,		// S32_Blend,
     S32A_Opaque_BlitRow32_PROC,		// S32A_Opaque,
     NULL,   // S32A_Blend,
 };
 
+SkBlitRow::Proc SkBlitRow::PlatformProcs4444(unsigned flags) {
+    return platform_4444_procs[flags];
+}
+
+SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) {
+    return platform_565_procs[flags];
+}
+
+SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
+    return platform_32_procs[flags];
+}
diff --git a/src/opts/SkBlitRow_opts_none.cpp b/src/opts/SkBlitRow_opts_none.cpp
index 7a77759..8e0cddc 100644
--- a/src/opts/SkBlitRow_opts_none.cpp
+++ b/src/opts/SkBlitRow_opts_none.cpp
@@ -2,38 +2,14 @@
 
 // Platform impl of Platform_procs with no overrides
 
-const SkBlitRow::Proc SkBlitRow::gPlatform_565_Procs[] = {
-    // no dither
-    NULL,   // S32_D565_Opaque,
-    NULL,   // S32_D565_Blend,
-    NULL,   // S32A_D565_Opaque,
-    NULL,   // S32A_D565_Blend,
-    
-    // dither
-    NULL,   // S32_D565_Opaque_Dither,
-    NULL,   // S32_D565_Blend_Dither,
-    NULL,   // S32A_D565_Opaque_Dither,
-    NULL,   // S32A_D565_Blend_Dither
-};
+SkBlitRow::Proc SkBlitRow::PlatformProcs4444(unsigned flags) {
+    return NULL;
+}
 
-const SkBlitRow::Proc SkBlitRow::gPlatform_4444_Procs[] = {
-    // no dither
-    NULL,   // S32_D4444_Opaque,
-    NULL,   // S32_D4444_Blend,
-    NULL,   // S32A_D4444_Opaque,
-    NULL,   // S32A_D4444_Blend,
-    
-    // dither
-    NULL,   // S32_D4444_Opaque_Dither,
-    NULL,   // S32_D4444_Blend_Dither,
-    NULL,   // S32A_D4444_Opaque_Dither,
-    NULL,   // S32A_D4444_Blend_Dither
-};
+SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) {
+    return NULL;
+}
 
-const SkBlitRow::Proc32 SkBlitRow::gPlatform_Procs32[] = {
-    NULL,   // S32_Opaque,
-    NULL,   // S32_Blend,
-    NULL,   // S32A_Opaque,
-    NULL,   // S32A_Blend,
-};
-
+SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
+    return NULL;
+}
diff --git a/src/utils/SkColorMatrix.cpp b/src/utils/SkColorMatrix.cpp
index 0a20990..f598f97 100644
--- a/src/utils/SkColorMatrix.cpp
+++ b/src/utils/SkColorMatrix.cpp
@@ -38,7 +38,7 @@
 
     static const uint8_t gRotateIndex[] = {
         6, 7, 11, 12,
-        0, 2, 15, 17,
+        0, 10, 2, 12,
         0, 1,  5,  6,
     };
     const uint8_t* index = gRotateIndex + axis * 4;
diff --git a/src/views/SkView.cpp b/src/views/SkView.cpp
index f8008af..7797abe 100644
--- a/src/views/SkView.cpp
+++ b/src/views/SkView.cpp
@@ -649,7 +649,7 @@
 SkView* SkView::sendEventToParents(const SkEvent& evt)
 {
 	SkView* parent = fParent;
-
+    
 	while (parent)
 	{
 		if (parent->doEvent(evt))
@@ -659,6 +659,18 @@
 	return NULL;
 }
 
+SkView* SkView::sendQueryToParents(SkEvent* evt) {
+	SkView* parent = fParent;
+    
+	while (parent) {
+		if (parent->doQuery(evt)) {
+			return parent;
+        }
+		parent = parent->fParent;
+	}
+	return NULL;
+}
+
 //////////////////////////////////////////////////////////////////
 //////////////////////////////////////////////////////////////////