Fix kernel launch clipping.

bug 7648342

Change-Id: Ife61345bc834f876b03f87cefb2408b4aaba9e88
diff --git a/driver/rsdIntrinsicBlend.cpp b/driver/rsdIntrinsicBlend.cpp
index 22ad108..c35c379 100644
--- a/driver/rsdIntrinsicBlend.cpp
+++ b/driver/rsdIntrinsicBlend.cpp
@@ -103,9 +103,6 @@
     uint32_t x1 = xstart;
     uint32_t x2 = xend;
 
-    in += xstart;
-    out += xstart;
-
     switch (p->slot) {
     case BLEND_CLEAR:
         for (;x1 < x2; x1++, out++) {
diff --git a/driver/rsdIntrinsicBlur.cpp b/driver/rsdIntrinsicBlur.cpp
index 8919df8..5cd671e 100644
--- a/driver/rsdIntrinsicBlur.cpp
+++ b/driver/rsdIntrinsicBlur.cpp
@@ -188,14 +188,27 @@
     float4 *fout = (float4 *)buf;
 
     int y = p->y;
+    uint32_t vx1 = x1;
+    uint32_t vx2 = x2;
+
+    if (vx1 > (uint32_t)cp->iradius) {
+        vx1 -= cp->iradius;
+    } else {
+        vx1 = 0;
+    }
+    vx2 += cp->iradius;
+    if (vx2 >= p->dimX) {
+        vx2 = p->dimX - 1;
+    }
+
     if ((y > cp->iradius) && (y < ((int)p->dimY - cp->iradius))) {
         const uchar *pi = pin + (y - cp->iradius) * din->lod[0].stride;
-        OneVF(fout, pi, din->lod[0].stride, cp->fp, cp->iradius * 2 + 1, x1, x2);
+        OneVF(fout + vx1, pi, din->lod[0].stride, cp->fp, cp->iradius * 2 + 1, vx1, vx2);
     } else {
-        while(x2 > x1) {
-            OneV(p, fout, x1, y, pin, din->lod[0].stride, cp->fp, cp->iradius);
+        while(vx2 > vx1) {
+            OneV(p, fout, vx1, y, pin, din->lod[0].stride, cp->fp, cp->iradius);
             fout++;
-            x1++;
+            vx1++;
         }
     }
 
diff --git a/driver/rsdIntrinsicConvolve5x5.cpp b/driver/rsdIntrinsicConvolve5x5.cpp
index fc6b029..ac06304 100644
--- a/driver/rsdIntrinsicConvolve5x5.cpp
+++ b/driver/rsdIntrinsicConvolve5x5.cpp
@@ -134,7 +134,7 @@
 #if defined(ARCH_ARM_HAVE_NEON)
     if((x1 + 3) < x2) {
         uint32_t len = (x2 - x1 - 3) >> 1;
-        rsdIntrinsicConvolve5x5_K(out, py0, py1, py2, py3, py4, cp->ip, len);
+        rsdIntrinsicConvolve5x5_K(out, py0+x1-2, py1+x1-2, py2+x1-2, py3+x1-2, py4+x1-2, cp->ip, len);
         out += len << 1;
         x1 += len << 1;
     }
diff --git a/driver/rsdIntrinsicLUT.cpp b/driver/rsdIntrinsicLUT.cpp
index a75534e..818a132 100644
--- a/driver/rsdIntrinsicLUT.cpp
+++ b/driver/rsdIntrinsicLUT.cpp
@@ -44,9 +44,6 @@
     uint32_t x1 = xstart;
     uint32_t x2 = xend;
 
-    in += xstart;
-    out += xstart;
-
     DrvAllocation *din = (DrvAllocation *)cp->lut->mHal.drv;
     const uchar *tr = (const uchar *)din->lod[0].mallocPtr;
     const uchar *tg = &tr[256];