src/share/native/sun/java2d/loops/ScaledBlit.c - toolchain/jdk/jdk9_jdk - Git at Google

 /*
  * Copyright (c) 2001, 2005, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.  Oracle designates this
  * particular file as subject to the "Classpath" exception as provided
  * by Oracle in the LICENSE file that accompanied this code.
  *
  * This code is distributed in the hope that it will be useful, but WITHOUT
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  * version 2 for more details (a copy is included in the LICENSE file that
  * accompanied this code).
  *
  * You should have received a copy of the GNU General Public License version
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  * or visit www.oracle.com if you need additional information or have any
  * questions.
  */

 #include <math.h>

 #include "jni_util.h"
 #include "GraphicsPrimitiveMgr.h"
 #include "Region.h"

 #include "sun_java2d_loops_ScaledBlit.h"

 /*
  * The scaling loops used inside the helper functions are based on the
  * following pseudocode for stepping through the source image:
  *
  * shift - number of bits of sub-pixel precision in scaled values
  * srcxorig, srcyorig - scaled location of first pixel
  * srcxinc, srcyinc - scaled x and y increments
  * dstwidth, dstheight - number of pixels to process across and down
  *
  * 1. srcy = srcyorig;
  * 2. for (dstheight) {
  * 3.     srcx = srcxorig;
  * 4.     for (dstwidth) {
  * 5.         fetch and process pixel for (srcx >> shift, srcy >> shift)
  * 6.         srcx += srcxinc;
  * 7.     }
  * 8.     srcy += srcyinc;
  * 9. }
  *
  * Note that each execution of line 6 or 8 accumulates error of
  * +/- 1 into the scaled coordinate variables.  These lines are
  * each executed once per pixel across or once per pixel down
  * the region being iterated over, thus the error can accumulate
  * up to a magnitude of dstwidth in the horizontal direction and
  * dstheight in the vertical direction.
  *
  * If the error ever reaches a magnitude of (1 << shift) then we
  * will be off by at least 1 source pixel in our mapping.
  *
  * Note that we increment the source coordinates by the srcxinc
  * and srcyinc variables in each step.  Thus, if our error ever
  * accumulates to a magnitude equal to srcxinc or srcyinc then
  * we will be ahead or behind of "where we should be" by at least
  * one iteration.  Since each iteration is a destination pixel,
  * this means that our actual location will be off by at least
  * one destination pixel.
  *
  * This means that all of the values:
  *
  *     - (1 << shift)
  *     - srcxinc
  *     - srcyinc
  *
  * all represent a maximum bound on how much error we can accumulate
  * before we are off by a source or a destination pixel.  Thus,
  * we should make sure that we never process more than that many
  * pixels if we want to maintain single pixel accuracy.  Even
  * better would be to process many fewer pixels than those bounds
  * to ensure that our accumulated error is much smaller than a
  * pixel.
  */

 /*
  * Find and return the largest tile size that is a power of 2 and
  * which is small enough to yield some reassuring degree of subpixel
  * accuracy.  The degree of subpixel accuracy that will be preserved
  * by the tile size it chooses will vary and the details on how
  * it makes this decision are detailed in the comments below.
  */
 static jint
 findpow2tilesize(jint shift, jint sxinc, jint syinc)
 {
     /*
      * The initial value of shift is our first estimate for
      * the power of 2 for our tilesize since it ensures
      * less than 1 source pixel of error.
      *
      * Reducing it until (1 << shift) is not larger than the
      * smallest of our increments ensures we will have no more
      * than 1 destination pixel of error as well.
      */
     if (sxinc > syinc) {
         sxinc = syinc;
     }
     if (sxinc == 0) {
         /* Degenerate case will cause infinite loop in next loop... */
         return 1;
     }
     while ((1 << shift) > sxinc) {
         shift--;
     }
     /*
      * shift is now the largest it can be for less than 1 pixel
      * of error in either source or destination spaces.
      *
      * Now we will try for at least 8 bits of subpixel accuracy
      * with a tile size of at least 256x256 and reduce our subpixel
      * accuracy on a sliding scale down to a tilesize of 1x1 when
      * we have no bits of sub-pixel accuracy.
      */
     if (shift >= 16) {
         /* Subtracting 8 asks for 8 bits of subpixel accuracy. */
         shift -= 8;
     } else {
         /* Ask for half of the remaining bits to be subpixel accuracy. */
         /* Rounding is in favor of subpixel accuracy over tile size. */
         /* Worst case, shift == 0 and tilesize == (1 << 0) == 1 */
         shift /= 2;
     }
     return (1 << shift);
 }

 /*
  * For a given integer destination pixel coordinate "id", calculate the
  * integer destination coordinate of the start of the "ts" sized tile
  * in which it resides.
  * Tiles all start at even multiples of the tile size from the integer
  * destination origin "io".
  *
  * id == integer destination coordinate
  * io == integer destination operation origin
  * ts == tilesize (must be power of 2)
  */
 #define TILESTART(id, io, ts)   ((io) + (((id)-(io)) & (~((ts)-1))))

 /*
  * For a given integer destination pixel coordinate "id", calculate the
  * sub-pixel accurate source coordinate from which its sample comes.
  * The returned source coordinate is expressed in a shifted fractional
  * arithmetic number system.
  *
  * id == integer destination coordinate
  * fo == floating point destination operation origin,
  * sf == source coordinate scale factor per destination pixel
  *       (multiplied by fractional arithmetic "shift")
  *
  * The caller is required to cast this value to the appropriate
  * integer type for the needed precision.  The rendering code which
  * deals only with valid coordinates within the bounds of the source
  * rectangle uses jint.  The setup code, which occasionally deals
  * with coordinates that run out of bounds, uses jlong.
  *
  * Note that the rounding in this calculation is at a fraction of a
  * source pixel of (1.0 / (1<<shift)) since the scale factor includes
  * the fractional shift.  As a result, the type of rounding used is
  * not very significant (floor, floor(x+.5), or ceil(x-.5)), but the
  * ceil(x-.5) version is used for consistency with the way that pixel
  * coordinates are rounded to assign the ".5" value to the lower
  * integer.
  */
 #define SRCLOC(id, fo, sf)   (ceil((((id) + 0.5) - (fo)) * (sf) - 0.5))

 /*
  * Reverse map a srctarget coordinate into device space and refine the
  * answer.  More specifically, what we are looking for is the smallest
  * destination coordinate that maps to a source coordinate that is
  * greater than or equal to the given target source coordinate.
  *
  * Note that since the inner loops use math that maps a destination
  * coordinate into source space and that, even though the equation
  * we use below is the theoretical inverse of the dst->src mapping,
  * we cannot rely on floating point math to guarantee that applying
  * both of these equations in sequence will give us an exact mapping
  * of src->dst->src.  Thus, we must search back and forth to see if
  * we really map back to the given source coordinate and that we are
  * the smallest destination coordinate that does so.
  *
  * Note that, in practice, the answer from the initial guess tends to
  * be the right answer most of the time and the loop ends up finding
  * one iteration to be ">= srctarget" and the next to be "< srctarget"
  * and thus finds the answer in 2 iterations.  A small number of
  * times, the initial guess is 1 too low and so we do one iteration
  * at "< srctarget" and the next at ">= srctarget" and again find the
  * answer in 2 iterations.  All cases encountered during testing ended
  * up falling into one of those 2 categories and so the loop was always
  * executed exactly twice.
  *
  * Note also that the calculation of srcloc below may attempt to calculate
  * the src location of the destination pixel which is "1 beyond" the
  * end of the source image.  Since our shift calculation code in the
  * main function only guaranteed that "srcw << shift" did not overflow
  * a 32-bit signed integer, we cannot guarantee that "(srcw+1) << shift"
  * or, more generally, "(srcw << shift)+srcinc" does not overflow.
  * As a result, we perform our calculations here with jlong values
  * so that we aren't affected by this overflow.  Since srcw (shifted)
  * and srcinc are both 32-bit values, their sum cannot possibly overflow
  * a jlong.  In fact, we can step up to a couple of billion steps of
  * size "srcinc" past the end of the image before we have to worry
  * about overflow - in practice, though, the search never steps more
  * than 1 past the end of the image so this buffer is more than enough.
  */
 static jint
 refine(jint intorigin, jdouble dblorigin, jint tilesize,
        jdouble scale, jint srctarget, jint srcinc)
 {
     /* Make a first estimate of dest coordinate from srctarget */
     jint dstloc = (jint) ceil(dblorigin + srctarget / scale - 0.5);
     /* Loop until we get at least one value < and one >= the target */
     jboolean wasneg = JNI_FALSE;
     jboolean waspos = JNI_FALSE;
     jlong lsrcinc = srcinc;
     jlong lsrctarget = srctarget;

     while (JNI_TRUE) {
         /*
          * Find src coordinate from dest coordinate using the same
          * math we will use below when iterating over tiles.
          */
         jint tilestart = TILESTART(dstloc, intorigin, tilesize);
         jlong lsrcloc = (jlong) SRCLOC(tilestart, dblorigin, scale);
         if (dstloc > tilestart) {
             lsrcloc += lsrcinc * ((jlong) dstloc - tilestart);
         }
         if (lsrcloc >= lsrctarget) {
             /*
              * If we were previously less than target, then the current
              * dstloc is the smallest dst which maps >= the target.
              */
             if (wasneg) break;
             dstloc--;
             waspos = JNI_TRUE;
         } else {
             /*
              * If we were previously greater than target, then this must
              * be the first dstloc which maps to < the target.  Since we
              * want the smallest which maps >= the target, increment it
              * first before returning.
              */
             dstloc++;
             if (waspos) break;
             wasneg = JNI_TRUE;
         }
     }
     return dstloc;
 }

 /*
  * Class:     sun_java2d_loops_ScaledBlit
  * Method:    Scale
  * Signature: (Lsun/java2d/SurfaceData;Lsun/java2d/SurfaceData;Ljava/awt/Composite;Lsun/java2d/pipe/Region;IIIIDDDD)V
  */
 JNIEXPORT void JNICALL
 Java_sun_java2d_loops_ScaledBlit_Scale
     (JNIEnv *env, jobject self,
      jobject srcData, jobject dstData,
      jobject comp, jobject clip,
      jint sx1, jint sy1, jint sx2, jint sy2,
      jdouble ddx1, jdouble ddy1, jdouble ddx2, jdouble ddy2)
 {
     SurfaceDataOps *srcOps;
     SurfaceDataOps *dstOps;
     SurfaceDataRasInfo srcInfo;
     SurfaceDataRasInfo dstInfo;
     NativePrimitive *pPrim;
     CompositeInfo compInfo;
     jint sxinc, syinc, shift;
     jint tilesize;
     jint idx1, idy1;
     jdouble scalex, scaley;
     RegionData clipInfo;
     jint dstFlags;
     jboolean xunderflow, yunderflow;

     pPrim = GetNativePrim(env, self);
     if (pPrim == NULL) {
         return;
     }
     if (pPrim->pCompType->getCompInfo != NULL) {
         (*pPrim->pCompType->getCompInfo)(env, &compInfo, comp);
     }
     if (Region_GetInfo(env, clip, &clipInfo)) {
         return;
     }

     srcOps = SurfaceData_GetOps(env, srcData);
     dstOps = SurfaceData_GetOps(env, dstData);
     if (srcOps == 0 || dstOps == 0) {
         return;
     }

     /*
      * Determine the precision to use for the fixed point math
      * for the coordinate scaling.
      * - OR together srcw and srch to get the MSB between the two
      * - Next shift it up until it goes negative
      * - Count the shifts and that will be the most accurate
      *   precision available for the fixed point math
      * - a source coordinate of 1.0 will be (1 << shift)
      * - srcw & srch will be (srcw << shift) and (srch << shift)
      *   and will not overflow
      * Note that if srcw or srch are so large that they are
      * negative numbers before shifting, then:
      * - shift will be 0
      * - tilesize will end up being 1x1 tiles
      * - we will brute force calculate the source location
      *   of every destination pixel using the TILESTART and
      *   SRCLOC macros in this function and then call the
      *   scale helper function to copy one pixel at a time.
      * - TILESTART involves mostly jdouble calculations so
      *   it should not have integer overflow problems.
      */
     sxinc = (sx2 - sx1) | (sy2 - sy1);
     shift = 0;
     if (sxinc > 0) {
         while ((sxinc <<= 1) > 0) {
             shift++;
         }
     }
     /*
      * Now determine the scaled integer increments used to traverse
      * the source image for each destination pixel.  Our shift value
      * has been calculated above so that any location within the
      * destination image can be represented as a scaled integer
      * without incurring integer overflow.
      *
      * But we also need to worry about overflow of the sxinc and syinc
      * parameters.  We already know that "srcw<<shift" and "srch<<shift"
      * cannot overflow a jint, and the only time that sxinc and syinc
      * can be larger than those two values is if ddy2-ddy1 or ddx2-ddx1
      * are smaller than 1.  Since this situation implies that the
      * output area is no more than one pixel wide or tall, then we are
      * stepping by distances that are at least the size of the image
      * and only one destination pixel will ever be rendered - thus the
      * amount by which we step is largely irrelevant since after
      * drawing the first "in bounds" pixel, we will step completely
      * out of the source image and render nothing more.  As a result,
      * we assign the appropriate "size of image" stepping parameter
      * for any scale to smaller than one device pixel.
      */
     yunderflow = (ddy2 - ddy1) < 1.0;
     scaley = (((jdouble) (sy2 - sy1)) / (ddy2 - ddy1)) * (1 << shift);
     syinc = (yunderflow ? ((sy2 - sy1) << shift) : (jint) scaley);
     xunderflow = (ddx2 - ddx1) < 1.0;
     scalex = (((jdouble) (sx2 - sx1)) / (ddx2 - ddx1)) * (1 << shift);
     sxinc = (xunderflow ? ((sx2 - sx1) << shift) : (jint) scalex);
     tilesize = findpow2tilesize(shift, sxinc, syinc);


     srcInfo.bounds.x1 = sx1;
     srcInfo.bounds.y1 = sy1;
     srcInfo.bounds.x2 = sx2;
     srcInfo.bounds.y2 = sy2;
     if (srcOps->Lock(env, srcOps, &srcInfo, pPrim->srcflags) != SD_SUCCESS) {
         return;
     }
     if (srcInfo.bounds.x2 <= srcInfo.bounds.x1 ||
         srcInfo.bounds.y2 <= srcInfo.bounds.y1)
     {
         SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
         return;
     }

     /*
      * Only refine lower bounds if lower source coordinate was clipped
      * because the math will work out to be exactly idx1, idy1 if not.
      * Always refine upper bounds since we want to make sure not to
      * overstep the source bounds based on the tiled iteration math.
      *
      * For underflow cases, simply check if the SRCLOC for the single
      * destination pixel maps inside the source bounds.  If it does,
      * we render that pixel row or column (and only that pixel row
      * or column).  If it does not, we render nothing.
      */
     idx1 = (jint) ceil(ddx1 - 0.5);
     idy1 = (jint) ceil(ddy1 - 0.5);
     if (xunderflow) {
         jdouble x = sx1 + (SRCLOC(idx1, ddx1, scalex) / (1 << shift));
         dstInfo.bounds.x1 = dstInfo.bounds.x2 = idx1;
         if (x >= srcInfo.bounds.x1 && x < srcInfo.bounds.x2) {
             dstInfo.bounds.x2++;
         }
     } else {
         dstInfo.bounds.x1 = ((srcInfo.bounds.x1 <= sx1)
                              ? idx1
                              : refine(idx1, ddx1, tilesize, scalex,
                                       (srcInfo.bounds.x1-sx1) << shift, sxinc));
         dstInfo.bounds.x2 = refine(idx1, ddx1, tilesize, scalex,
                                    (srcInfo.bounds.x2-sx1) << shift, sxinc);
     }
     if (yunderflow) {
         jdouble y = sy1 + (SRCLOC(idy1, ddy1, scaley) / (1 << shift));
         dstInfo.bounds.y1 = dstInfo.bounds.y2 = idy1;
         if (y >= srcInfo.bounds.y1 && y < srcInfo.bounds.y2) {
             dstInfo.bounds.y2++;
         }
     } else {
         dstInfo.bounds.y1 = ((srcInfo.bounds.y1 <= sy1)
                              ? idy1
                              : refine(idy1, ddy1, tilesize, scaley,
                                       (srcInfo.bounds.y1-sy1) << shift, syinc));
         dstInfo.bounds.y2 = refine(idy1, ddy1, tilesize, scaley,
                                    (srcInfo.bounds.y2-sy1) << shift, syinc);
     }

     SurfaceData_IntersectBounds(&dstInfo.bounds, &clipInfo.bounds);
     dstFlags = pPrim->dstflags;
     if (!Region_IsRectangular(&clipInfo)) {
         dstFlags |= SD_LOCK_PARTIAL_WRITE;
     }
     if (dstOps->Lock(env, dstOps, &dstInfo, dstFlags) != SD_SUCCESS) {
         SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
         return;
     }

     if (dstInfo.bounds.x2 > dstInfo.bounds.x1 &&
         dstInfo.bounds.y2 > dstInfo.bounds.y1)
     {
         srcOps->GetRasInfo(env, srcOps, &srcInfo);
         dstOps->GetRasInfo(env, dstOps, &dstInfo);
         if (srcInfo.rasBase && dstInfo.rasBase) {
             SurfaceDataBounds span;
             void *pSrc = PtrCoord(srcInfo.rasBase,
                                   sx1, srcInfo.pixelStride,
                                   sy1, srcInfo.scanStride);

             Region_IntersectBounds(&clipInfo, &dstInfo.bounds);
             Region_StartIteration(env, &clipInfo);
             if (tilesize >= (ddx2 - ddx1) &&
                 tilesize >= (ddy2 - ddy1))
             {
                 /* Do everything in one tile */
                 jint sxloc = (jint) SRCLOC(idx1, ddx1, scalex);
                 jint syloc = (jint) SRCLOC(idy1, ddy1, scaley);
                 while (Region_NextIteration(&clipInfo, &span)) {
                     jint tsxloc = sxloc;
                     jint tsyloc = syloc;
                     void *pDst;

                     if (span.y1 > idy1) {
                         tsyloc += syinc * (span.y1 - idy1);
                     }
                     if (span.x1 > idx1) {
                         tsxloc += sxinc * (span.x1 - idx1);
                     }

                     pDst = PtrCoord(dstInfo.rasBase,
                                     span.x1, dstInfo.pixelStride,
                                     span.y1, dstInfo.scanStride);
                     (*pPrim->funcs.scaledblit)(pSrc, pDst,
                                                span.x2-span.x1, span.y2-span.y1,
                                                tsxloc, tsyloc,
                                                sxinc, syinc, shift,
                                                &srcInfo, &dstInfo,
                                                pPrim, &compInfo);
                 }
             } else {
                 /* Break each clip span into tiles for better accuracy. */
                 while (Region_NextIteration(&clipInfo, &span)) {
                     jint tilex, tiley;
                     jint sxloc, syloc;
                     jint x1, y1, x2, y2;
                     void *pDst;

                     for (tiley = TILESTART(span.y1, idy1, tilesize);
                          tiley < span.y2;
                          tiley += tilesize)
                     {
                         /* Clip span to Y range of current tile */
                         y1 = tiley;
                         y2 = tiley + tilesize;
                         if (y1 < span.y1) y1 = span.y1;
                         if (y2 > span.y2) y2 = span.y2;

                         /* Find scaled source coordinate of first pixel */
                         syloc = (jint) SRCLOC(tiley, ddy1, scaley);
                         if (y1 > tiley) {
                             syloc += syinc * (y1 - tiley);
                         }

                         for (tilex = TILESTART(span.x1, idx1, tilesize);
                              tilex < span.x2;
                              tilex += tilesize)
                         {
                             /* Clip span to X range of current tile */
                             x1 = tilex;
                             x2 = tilex + tilesize;
                             if (x1 < span.x1) x1 = span.x1;
                             if (x2 > span.x2) x2 = span.x2;

                             /* Find scaled source coordinate of first pixel */
                             sxloc = (jint) SRCLOC(tilex, ddx1, scalex);
                             if (x1 > tilex) {
                                 sxloc += sxinc * (x1 - tilex);
                             }

                             pDst = PtrCoord(dstInfo.rasBase,
                                             x1, dstInfo.pixelStride,
                                             y1, dstInfo.scanStride);
                             (*pPrim->funcs.scaledblit)(pSrc, pDst, x2-x1, y2-y1,
                                                        sxloc, syloc,
                                                        sxinc, syinc, shift,
                                                        &srcInfo, &dstInfo,
                                                        pPrim, &compInfo);
                         }
                     }
                 }
             }
             Region_EndIteration(env, &clipInfo);
         }
         SurfaceData_InvokeRelease(env, dstOps, &dstInfo);
         SurfaceData_InvokeRelease(env, srcOps, &srcInfo);
     }
     SurfaceData_InvokeUnlock(env, dstOps, &dstInfo);
     SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
 }
	/*
	* Copyright (c) 2001, 2005, Oracle and/or its affiliates. All rights reserved.
	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
	*
	* This code is free software; you can redistribute it and/or modify it
	* under the terms of the GNU General Public License version 2 only, as
	* published by the Free Software Foundation. Oracle designates this
	* particular file as subject to the "Classpath" exception as provided
	* by Oracle in the LICENSE file that accompanied this code.
	*
	* This code is distributed in the hope that it will be useful, but WITHOUT
	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
	* version 2 for more details (a copy is included in the LICENSE file that
	* accompanied this code).
	*
	* You should have received a copy of the GNU General Public License version
	* 2 along with this work; if not, write to the Free Software Foundation,
	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
	*
	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
	* or visit www.oracle.com if you need additional information or have any
	* questions.
	*/

	#include <math.h>

	#include "jni_util.h"
	#include "GraphicsPrimitiveMgr.h"
	#include "Region.h"

	#include "sun_java2d_loops_ScaledBlit.h"

	/*
	* The scaling loops used inside the helper functions are based on the
	* following pseudocode for stepping through the source image:
	*
	* shift - number of bits of sub-pixel precision in scaled values
	* srcxorig, srcyorig - scaled location of first pixel
	* srcxinc, srcyinc - scaled x and y increments
	* dstwidth, dstheight - number of pixels to process across and down
	*
	* 1. srcy = srcyorig;
	* 2. for (dstheight) {
	* 3. srcx = srcxorig;
	* 4. for (dstwidth) {
	* 5. fetch and process pixel for (srcx >> shift, srcy >> shift)
	* 6. srcx += srcxinc;
	* 7. }
	* 8. srcy += srcyinc;
	* 9. }
	*
	* Note that each execution of line 6 or 8 accumulates error of
	* +/- 1 into the scaled coordinate variables. These lines are
	* each executed once per pixel across or once per pixel down
	* the region being iterated over, thus the error can accumulate
	* up to a magnitude of dstwidth in the horizontal direction and
	* dstheight in the vertical direction.
	*
	* If the error ever reaches a magnitude of (1 << shift) then we
	* will be off by at least 1 source pixel in our mapping.
	*
	* Note that we increment the source coordinates by the srcxinc
	* and srcyinc variables in each step. Thus, if our error ever
	* accumulates to a magnitude equal to srcxinc or srcyinc then
	* we will be ahead or behind of "where we should be" by at least
	* one iteration. Since each iteration is a destination pixel,
	* this means that our actual location will be off by at least
	* one destination pixel.
	*
	* This means that all of the values:
	*
	* - (1 << shift)
	* - srcxinc
	* - srcyinc
	*
	* all represent a maximum bound on how much error we can accumulate
	* before we are off by a source or a destination pixel. Thus,
	* we should make sure that we never process more than that many
	* pixels if we want to maintain single pixel accuracy. Even
	* better would be to process many fewer pixels than those bounds
	* to ensure that our accumulated error is much smaller than a
	* pixel.
	*/

	/*
	* Find and return the largest tile size that is a power of 2 and
	* which is small enough to yield some reassuring degree of subpixel
	* accuracy. The degree of subpixel accuracy that will be preserved
	* by the tile size it chooses will vary and the details on how
	* it makes this decision are detailed in the comments below.
	*/
	static jint
	findpow2tilesize(jint shift, jint sxinc, jint syinc)
	{
	/*
	* The initial value of shift is our first estimate for
	* the power of 2 for our tilesize since it ensures
	* less than 1 source pixel of error.
	*
	* Reducing it until (1 << shift) is not larger than the
	* smallest of our increments ensures we will have no more
	* than 1 destination pixel of error as well.
	*/
	if (sxinc > syinc) {
	sxinc = syinc;
	}
	if (sxinc == 0) {
	/* Degenerate case will cause infinite loop in next loop... */
	return 1;
	}
	while ((1 << shift) > sxinc) {
	shift--;
	}
	/*
	* shift is now the largest it can be for less than 1 pixel
	* of error in either source or destination spaces.
	*
	* Now we will try for at least 8 bits of subpixel accuracy
	* with a tile size of at least 256x256 and reduce our subpixel
	* accuracy on a sliding scale down to a tilesize of 1x1 when
	* we have no bits of sub-pixel accuracy.
	*/
	if (shift >= 16) {
	/* Subtracting 8 asks for 8 bits of subpixel accuracy. */
	shift -= 8;
	} else {
	/* Ask for half of the remaining bits to be subpixel accuracy. */
	/* Rounding is in favor of subpixel accuracy over tile size. */
	/* Worst case, shift == 0 and tilesize == (1 << 0) == 1 */
	shift /= 2;
	}
	return (1 << shift);
	}

	/*
	* For a given integer destination pixel coordinate "id", calculate the
	* integer destination coordinate of the start of the "ts" sized tile
	* in which it resides.
	* Tiles all start at even multiples of the tile size from the integer
	* destination origin "io".
	*
	* id == integer destination coordinate
	* io == integer destination operation origin
	* ts == tilesize (must be power of 2)
	*/
	#define TILESTART(id, io, ts) ((io) + (((id)-(io)) & (~((ts)-1))))

	/*
	* For a given integer destination pixel coordinate "id", calculate the
	* sub-pixel accurate source coordinate from which its sample comes.
	* The returned source coordinate is expressed in a shifted fractional
	* arithmetic number system.
	*
	* id == integer destination coordinate
	* fo == floating point destination operation origin,
	* sf == source coordinate scale factor per destination pixel
	* (multiplied by fractional arithmetic "shift")
	*
	* The caller is required to cast this value to the appropriate
	* integer type for the needed precision. The rendering code which
	* deals only with valid coordinates within the bounds of the source
	* rectangle uses jint. The setup code, which occasionally deals
	* with coordinates that run out of bounds, uses jlong.
	*
	* Note that the rounding in this calculation is at a fraction of a
	* source pixel of (1.0 / (1<<shift)) since the scale factor includes
	* the fractional shift. As a result, the type of rounding used is
	* not very significant (floor, floor(x+.5), or ceil(x-.5)), but the
	* ceil(x-.5) version is used for consistency with the way that pixel
	* coordinates are rounded to assign the ".5" value to the lower
	* integer.
	*/
	#define SRCLOC(id, fo, sf) (ceil((((id) + 0.5) - (fo)) * (sf) - 0.5))

	/*
	* Reverse map a srctarget coordinate into device space and refine the
	* answer. More specifically, what we are looking for is the smallest
	* destination coordinate that maps to a source coordinate that is
	* greater than or equal to the given target source coordinate.
	*
	* Note that since the inner loops use math that maps a destination
	* coordinate into source space and that, even though the equation
	* we use below is the theoretical inverse of the dst->src mapping,
	* we cannot rely on floating point math to guarantee that applying
	* both of these equations in sequence will give us an exact mapping
	* of src->dst->src. Thus, we must search back and forth to see if
	* we really map back to the given source coordinate and that we are
	* the smallest destination coordinate that does so.
	*
	* Note that, in practice, the answer from the initial guess tends to
	* be the right answer most of the time and the loop ends up finding
	* one iteration to be ">= srctarget" and the next to be "< srctarget"
	* and thus finds the answer in 2 iterations. A small number of
	* times, the initial guess is 1 too low and so we do one iteration
	* at "< srctarget" and the next at ">= srctarget" and again find the
	* answer in 2 iterations. All cases encountered during testing ended
	* up falling into one of those 2 categories and so the loop was always
	* executed exactly twice.
	*
	* Note also that the calculation of srcloc below may attempt to calculate
	* the src location of the destination pixel which is "1 beyond" the
	* end of the source image. Since our shift calculation code in the
	* main function only guaranteed that "srcw << shift" did not overflow
	* a 32-bit signed integer, we cannot guarantee that "(srcw+1) << shift"
	* or, more generally, "(srcw << shift)+srcinc" does not overflow.
	* As a result, we perform our calculations here with jlong values
	* so that we aren't affected by this overflow. Since srcw (shifted)
	* and srcinc are both 32-bit values, their sum cannot possibly overflow
	* a jlong. In fact, we can step up to a couple of billion steps of
	* size "srcinc" past the end of the image before we have to worry
	* about overflow - in practice, though, the search never steps more
	* than 1 past the end of the image so this buffer is more than enough.
	*/
	static jint
	refine(jint intorigin, jdouble dblorigin, jint tilesize,
	jdouble scale, jint srctarget, jint srcinc)
	{
	/* Make a first estimate of dest coordinate from srctarget */
	jint dstloc = (jint) ceil(dblorigin + srctarget / scale - 0.5);
	/* Loop until we get at least one value < and one >= the target */
	jboolean wasneg = JNI_FALSE;
	jboolean waspos = JNI_FALSE;
	jlong lsrcinc = srcinc;
	jlong lsrctarget = srctarget;

	while (JNI_TRUE) {
	/*
	* Find src coordinate from dest coordinate using the same
	* math we will use below when iterating over tiles.
	*/
	jint tilestart = TILESTART(dstloc, intorigin, tilesize);
	jlong lsrcloc = (jlong) SRCLOC(tilestart, dblorigin, scale);
	if (dstloc > tilestart) {
	lsrcloc += lsrcinc * ((jlong) dstloc - tilestart);
	}
	if (lsrcloc >= lsrctarget) {
	/*
	* If we were previously less than target, then the current
	* dstloc is the smallest dst which maps >= the target.
	*/
	if (wasneg) break;
	dstloc--;
	waspos = JNI_TRUE;
	} else {
	/*
	* If we were previously greater than target, then this must
	* be the first dstloc which maps to < the target. Since we
	* want the smallest which maps >= the target, increment it
	* first before returning.
	*/
	dstloc++;
	if (waspos) break;
	wasneg = JNI_TRUE;
	}
	}
	return dstloc;
	}

	/*
	* Class: sun_java2d_loops_ScaledBlit
	* Method: Scale
	* Signature: (Lsun/java2d/SurfaceData;Lsun/java2d/SurfaceData;Ljava/awt/Composite;Lsun/java2d/pipe/Region;IIIIDDDD)V
	*/
	JNIEXPORT void JNICALL
	Java_sun_java2d_loops_ScaledBlit_Scale
	(JNIEnv *env, jobject self,
	jobject srcData, jobject dstData,
	jobject comp, jobject clip,
	jint sx1, jint sy1, jint sx2, jint sy2,
	jdouble ddx1, jdouble ddy1, jdouble ddx2, jdouble ddy2)
	{
	SurfaceDataOps *srcOps;
	SurfaceDataOps *dstOps;
	SurfaceDataRasInfo srcInfo;
	SurfaceDataRasInfo dstInfo;
	NativePrimitive *pPrim;
	CompositeInfo compInfo;
	jint sxinc, syinc, shift;
	jint tilesize;
	jint idx1, idy1;
	jdouble scalex, scaley;
	RegionData clipInfo;
	jint dstFlags;
	jboolean xunderflow, yunderflow;

	pPrim = GetNativePrim(env, self);
	if (pPrim == NULL) {
	return;
	}
	if (pPrim->pCompType->getCompInfo != NULL) {
	(*pPrim->pCompType->getCompInfo)(env, &compInfo, comp);
	}
	if (Region_GetInfo(env, clip, &clipInfo)) {
	return;
	}

	srcOps = SurfaceData_GetOps(env, srcData);
	dstOps = SurfaceData_GetOps(env, dstData);
	if (srcOps == 0 \|\| dstOps == 0) {
	return;
	}

	/*
	* Determine the precision to use for the fixed point math
	* for the coordinate scaling.
	* - OR together srcw and srch to get the MSB between the two
	* - Next shift it up until it goes negative
	* - Count the shifts and that will be the most accurate
	* precision available for the fixed point math
	* - a source coordinate of 1.0 will be (1 << shift)
	* - srcw & srch will be (srcw << shift) and (srch << shift)
	* and will not overflow
	* Note that if srcw or srch are so large that they are
	* negative numbers before shifting, then:
	* - shift will be 0
	* - tilesize will end up being 1x1 tiles
	* - we will brute force calculate the source location
	* of every destination pixel using the TILESTART and
	* SRCLOC macros in this function and then call the
	* scale helper function to copy one pixel at a time.
	* - TILESTART involves mostly jdouble calculations so
	* it should not have integer overflow problems.
	*/
	sxinc = (sx2 - sx1) \| (sy2 - sy1);
	shift = 0;
	if (sxinc > 0) {
	while ((sxinc <<= 1) > 0) {
	shift++;
	}
	}
	/*
	* Now determine the scaled integer increments used to traverse
	* the source image for each destination pixel. Our shift value
	* has been calculated above so that any location within the
	* destination image can be represented as a scaled integer
	* without incurring integer overflow.
	*
	* But we also need to worry about overflow of the sxinc and syinc
	* parameters. We already know that "srcw<<shift" and "srch<<shift"
	* cannot overflow a jint, and the only time that sxinc and syinc
	* can be larger than those two values is if ddy2-ddy1 or ddx2-ddx1
	* are smaller than 1. Since this situation implies that the
	* output area is no more than one pixel wide or tall, then we are
	* stepping by distances that are at least the size of the image
	* and only one destination pixel will ever be rendered - thus the
	* amount by which we step is largely irrelevant since after
	* drawing the first "in bounds" pixel, we will step completely
	* out of the source image and render nothing more. As a result,
	* we assign the appropriate "size of image" stepping parameter
	* for any scale to smaller than one device pixel.
	*/
	yunderflow = (ddy2 - ddy1) < 1.0;
	scaley = (((jdouble) (sy2 - sy1)) / (ddy2 - ddy1)) * (1 << shift);
	syinc = (yunderflow ? ((sy2 - sy1) << shift) : (jint) scaley);
	xunderflow = (ddx2 - ddx1) < 1.0;
	scalex = (((jdouble) (sx2 - sx1)) / (ddx2 - ddx1)) * (1 << shift);
	sxinc = (xunderflow ? ((sx2 - sx1) << shift) : (jint) scalex);
	tilesize = findpow2tilesize(shift, sxinc, syinc);


	srcInfo.bounds.x1 = sx1;
	srcInfo.bounds.y1 = sy1;
	srcInfo.bounds.x2 = sx2;
	srcInfo.bounds.y2 = sy2;
	if (srcOps->Lock(env, srcOps, &srcInfo, pPrim->srcflags) != SD_SUCCESS) {
	return;
	}
	if (srcInfo.bounds.x2 <= srcInfo.bounds.x1 \|\|
	srcInfo.bounds.y2 <= srcInfo.bounds.y1)
	{
	SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
	return;
	}

	/*
	* Only refine lower bounds if lower source coordinate was clipped
	* because the math will work out to be exactly idx1, idy1 if not.
	* Always refine upper bounds since we want to make sure not to
	* overstep the source bounds based on the tiled iteration math.
	*
	* For underflow cases, simply check if the SRCLOC for the single
	* destination pixel maps inside the source bounds. If it does,
	* we render that pixel row or column (and only that pixel row
	* or column). If it does not, we render nothing.
	*/
	idx1 = (jint) ceil(ddx1 - 0.5);
	idy1 = (jint) ceil(ddy1 - 0.5);
	if (xunderflow) {
	jdouble x = sx1 + (SRCLOC(idx1, ddx1, scalex) / (1 << shift));
	dstInfo.bounds.x1 = dstInfo.bounds.x2 = idx1;
	if (x >= srcInfo.bounds.x1 && x < srcInfo.bounds.x2) {
	dstInfo.bounds.x2++;
	}
	} else {
	dstInfo.bounds.x1 = ((srcInfo.bounds.x1 <= sx1)
	? idx1
	: refine(idx1, ddx1, tilesize, scalex,
	(srcInfo.bounds.x1-sx1) << shift, sxinc));
	dstInfo.bounds.x2 = refine(idx1, ddx1, tilesize, scalex,
	(srcInfo.bounds.x2-sx1) << shift, sxinc);
	}
	if (yunderflow) {
	jdouble y = sy1 + (SRCLOC(idy1, ddy1, scaley) / (1 << shift));
	dstInfo.bounds.y1 = dstInfo.bounds.y2 = idy1;
	if (y >= srcInfo.bounds.y1 && y < srcInfo.bounds.y2) {
	dstInfo.bounds.y2++;
	}
	} else {
	dstInfo.bounds.y1 = ((srcInfo.bounds.y1 <= sy1)
	? idy1
	: refine(idy1, ddy1, tilesize, scaley,
	(srcInfo.bounds.y1-sy1) << shift, syinc));
	dstInfo.bounds.y2 = refine(idy1, ddy1, tilesize, scaley,
	(srcInfo.bounds.y2-sy1) << shift, syinc);
	}

	SurfaceData_IntersectBounds(&dstInfo.bounds, &clipInfo.bounds);
	dstFlags = pPrim->dstflags;
	if (!Region_IsRectangular(&clipInfo)) {
	dstFlags \|= SD_LOCK_PARTIAL_WRITE;
	}
	if (dstOps->Lock(env, dstOps, &dstInfo, dstFlags) != SD_SUCCESS) {
	SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
	return;
	}

	if (dstInfo.bounds.x2 > dstInfo.bounds.x1 &&
	dstInfo.bounds.y2 > dstInfo.bounds.y1)
	{
	srcOps->GetRasInfo(env, srcOps, &srcInfo);
	dstOps->GetRasInfo(env, dstOps, &dstInfo);
	if (srcInfo.rasBase && dstInfo.rasBase) {
	SurfaceDataBounds span;
	void *pSrc = PtrCoord(srcInfo.rasBase,
	sx1, srcInfo.pixelStride,
	sy1, srcInfo.scanStride);

	Region_IntersectBounds(&clipInfo, &dstInfo.bounds);
	Region_StartIteration(env, &clipInfo);
	if (tilesize >= (ddx2 - ddx1) &&
	tilesize >= (ddy2 - ddy1))
	{
	/* Do everything in one tile */
	jint sxloc = (jint) SRCLOC(idx1, ddx1, scalex);
	jint syloc = (jint) SRCLOC(idy1, ddy1, scaley);
	while (Region_NextIteration(&clipInfo, &span)) {
	jint tsxloc = sxloc;
	jint tsyloc = syloc;
	void *pDst;

	if (span.y1 > idy1) {
	tsyloc += syinc * (span.y1 - idy1);
	}
	if (span.x1 > idx1) {
	tsxloc += sxinc * (span.x1 - idx1);
	}

	pDst = PtrCoord(dstInfo.rasBase,
	span.x1, dstInfo.pixelStride,
	span.y1, dstInfo.scanStride);
	(*pPrim->funcs.scaledblit)(pSrc, pDst,
	span.x2-span.x1, span.y2-span.y1,
	tsxloc, tsyloc,
	sxinc, syinc, shift,
	&srcInfo, &dstInfo,
	pPrim, &compInfo);
	}
	} else {
	/* Break each clip span into tiles for better accuracy. */
	while (Region_NextIteration(&clipInfo, &span)) {
	jint tilex, tiley;
	jint sxloc, syloc;
	jint x1, y1, x2, y2;
	void *pDst;

	for (tiley = TILESTART(span.y1, idy1, tilesize);
	tiley < span.y2;
	tiley += tilesize)
	{
	/* Clip span to Y range of current tile */
	y1 = tiley;
	y2 = tiley + tilesize;
	if (y1 < span.y1) y1 = span.y1;
	if (y2 > span.y2) y2 = span.y2;

	/* Find scaled source coordinate of first pixel */
	syloc = (jint) SRCLOC(tiley, ddy1, scaley);
	if (y1 > tiley) {
	syloc += syinc * (y1 - tiley);
	}

	for (tilex = TILESTART(span.x1, idx1, tilesize);
	tilex < span.x2;
	tilex += tilesize)
	{
	/* Clip span to X range of current tile */
	x1 = tilex;
	x2 = tilex + tilesize;
	if (x1 < span.x1) x1 = span.x1;
	if (x2 > span.x2) x2 = span.x2;

	/* Find scaled source coordinate of first pixel */
	sxloc = (jint) SRCLOC(tilex, ddx1, scalex);
	if (x1 > tilex) {
	sxloc += sxinc * (x1 - tilex);
	}

	pDst = PtrCoord(dstInfo.rasBase,
	x1, dstInfo.pixelStride,
	y1, dstInfo.scanStride);
	(*pPrim->funcs.scaledblit)(pSrc, pDst, x2-x1, y2-y1,
	sxloc, syloc,
	sxinc, syinc, shift,
	&srcInfo, &dstInfo,
	pPrim, &compInfo);
	}
	}
	}
	}
	Region_EndIteration(env, &clipInfo);
	}
	SurfaceData_InvokeRelease(env, dstOps, &dstInfo);
	SurfaceData_InvokeRelease(env, srcOps, &srcInfo);
	}
	SurfaceData_InvokeUnlock(env, dstOps, &dstInfo);
	SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
	}