blob: 625c3662f3c2be767ad721d42e70df0ea6fa2df1 [file] [log] [blame]
/*
* Copyright (c) 2001, 2005, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
#include <math.h>
#include "jni_util.h"
#include "GraphicsPrimitiveMgr.h"
#include "Region.h"
#include "sun_java2d_loops_ScaledBlit.h"
/*
* The scaling loops used inside the helper functions are based on the
* following pseudocode for stepping through the source image:
*
* shift - number of bits of sub-pixel precision in scaled values
* srcxorig, srcyorig - scaled location of first pixel
* srcxinc, srcyinc - scaled x and y increments
* dstwidth, dstheight - number of pixels to process across and down
*
* 1. srcy = srcyorig;
* 2. for (dstheight) {
* 3. srcx = srcxorig;
* 4. for (dstwidth) {
* 5. fetch and process pixel for (srcx >> shift, srcy >> shift)
* 6. srcx += srcxinc;
* 7. }
* 8. srcy += srcyinc;
* 9. }
*
* Note that each execution of line 6 or 8 accumulates error of
* +/- 1 into the scaled coordinate variables. These lines are
* each executed once per pixel across or once per pixel down
* the region being iterated over, thus the error can accumulate
* up to a magnitude of dstwidth in the horizontal direction and
* dstheight in the vertical direction.
*
* If the error ever reaches a magnitude of (1 << shift) then we
* will be off by at least 1 source pixel in our mapping.
*
* Note that we increment the source coordinates by the srcxinc
* and srcyinc variables in each step. Thus, if our error ever
* accumulates to a magnitude equal to srcxinc or srcyinc then
* we will be ahead or behind of "where we should be" by at least
* one iteration. Since each iteration is a destination pixel,
* this means that our actual location will be off by at least
* one destination pixel.
*
* This means that all of the values:
*
* - (1 << shift)
* - srcxinc
* - srcyinc
*
* all represent a maximum bound on how much error we can accumulate
* before we are off by a source or a destination pixel. Thus,
* we should make sure that we never process more than that many
* pixels if we want to maintain single pixel accuracy. Even
* better would be to process many fewer pixels than those bounds
* to ensure that our accumulated error is much smaller than a
* pixel.
*/
/*
* Find and return the largest tile size that is a power of 2 and
* which is small enough to yield some reassuring degree of subpixel
* accuracy. The degree of subpixel accuracy that will be preserved
* by the tile size it chooses will vary and the details on how
* it makes this decision are detailed in the comments below.
*/
static jint
findpow2tilesize(jint shift, jint sxinc, jint syinc)
{
/*
* The initial value of shift is our first estimate for
* the power of 2 for our tilesize since it ensures
* less than 1 source pixel of error.
*
* Reducing it until (1 << shift) is not larger than the
* smallest of our increments ensures we will have no more
* than 1 destination pixel of error as well.
*/
if (sxinc > syinc) {
sxinc = syinc;
}
if (sxinc == 0) {
/* Degenerate case will cause infinite loop in next loop... */
return 1;
}
while ((1 << shift) > sxinc) {
shift--;
}
/*
* shift is now the largest it can be for less than 1 pixel
* of error in either source or destination spaces.
*
* Now we will try for at least 8 bits of subpixel accuracy
* with a tile size of at least 256x256 and reduce our subpixel
* accuracy on a sliding scale down to a tilesize of 1x1 when
* we have no bits of sub-pixel accuracy.
*/
if (shift >= 16) {
/* Subtracting 8 asks for 8 bits of subpixel accuracy. */
shift -= 8;
} else {
/* Ask for half of the remaining bits to be subpixel accuracy. */
/* Rounding is in favor of subpixel accuracy over tile size. */
/* Worst case, shift == 0 and tilesize == (1 << 0) == 1 */
shift /= 2;
}
return (1 << shift);
}
/*
* For a given integer destination pixel coordinate "id", calculate the
* integer destination coordinate of the start of the "ts" sized tile
* in which it resides.
* Tiles all start at even multiples of the tile size from the integer
* destination origin "io".
*
* id == integer destination coordinate
* io == integer destination operation origin
* ts == tilesize (must be power of 2)
*/
#define TILESTART(id, io, ts) ((io) + (((id)-(io)) & (~((ts)-1))))
/*
* For a given integer destination pixel coordinate "id", calculate the
* sub-pixel accurate source coordinate from which its sample comes.
* The returned source coordinate is expressed in a shifted fractional
* arithmetic number system.
*
* id == integer destination coordinate
* fo == floating point destination operation origin,
* sf == source coordinate scale factor per destination pixel
* (multiplied by fractional arithmetic "shift")
*
* The caller is required to cast this value to the appropriate
* integer type for the needed precision. The rendering code which
* deals only with valid coordinates within the bounds of the source
* rectangle uses jint. The setup code, which occasionally deals
* with coordinates that run out of bounds, uses jlong.
*
* Note that the rounding in this calculation is at a fraction of a
* source pixel of (1.0 / (1<<shift)) since the scale factor includes
* the fractional shift. As a result, the type of rounding used is
* not very significant (floor, floor(x+.5), or ceil(x-.5)), but the
* ceil(x-.5) version is used for consistency with the way that pixel
* coordinates are rounded to assign the ".5" value to the lower
* integer.
*/
#define SRCLOC(id, fo, sf) (ceil((((id) + 0.5) - (fo)) * (sf) - 0.5))
/*
* Reverse map a srctarget coordinate into device space and refine the
* answer. More specifically, what we are looking for is the smallest
* destination coordinate that maps to a source coordinate that is
* greater than or equal to the given target source coordinate.
*
* Note that since the inner loops use math that maps a destination
* coordinate into source space and that, even though the equation
* we use below is the theoretical inverse of the dst->src mapping,
* we cannot rely on floating point math to guarantee that applying
* both of these equations in sequence will give us an exact mapping
* of src->dst->src. Thus, we must search back and forth to see if
* we really map back to the given source coordinate and that we are
* the smallest destination coordinate that does so.
*
* Note that, in practice, the answer from the initial guess tends to
* be the right answer most of the time and the loop ends up finding
* one iteration to be ">= srctarget" and the next to be "< srctarget"
* and thus finds the answer in 2 iterations. A small number of
* times, the initial guess is 1 too low and so we do one iteration
* at "< srctarget" and the next at ">= srctarget" and again find the
* answer in 2 iterations. All cases encountered during testing ended
* up falling into one of those 2 categories and so the loop was always
* executed exactly twice.
*
* Note also that the calculation of srcloc below may attempt to calculate
* the src location of the destination pixel which is "1 beyond" the
* end of the source image. Since our shift calculation code in the
* main function only guaranteed that "srcw << shift" did not overflow
* a 32-bit signed integer, we cannot guarantee that "(srcw+1) << shift"
* or, more generally, "(srcw << shift)+srcinc" does not overflow.
* As a result, we perform our calculations here with jlong values
* so that we aren't affected by this overflow. Since srcw (shifted)
* and srcinc are both 32-bit values, their sum cannot possibly overflow
* a jlong. In fact, we can step up to a couple of billion steps of
* size "srcinc" past the end of the image before we have to worry
* about overflow - in practice, though, the search never steps more
* than 1 past the end of the image so this buffer is more than enough.
*/
static jint
refine(jint intorigin, jdouble dblorigin, jint tilesize,
jdouble scale, jint srctarget, jint srcinc)
{
/* Make a first estimate of dest coordinate from srctarget */
jint dstloc = (jint) ceil(dblorigin + srctarget / scale - 0.5);
/* Loop until we get at least one value < and one >= the target */
jboolean wasneg = JNI_FALSE;
jboolean waspos = JNI_FALSE;
jlong lsrcinc = srcinc;
jlong lsrctarget = srctarget;
while (JNI_TRUE) {
/*
* Find src coordinate from dest coordinate using the same
* math we will use below when iterating over tiles.
*/
jint tilestart = TILESTART(dstloc, intorigin, tilesize);
jlong lsrcloc = (jlong) SRCLOC(tilestart, dblorigin, scale);
if (dstloc > tilestart) {
lsrcloc += lsrcinc * ((jlong) dstloc - tilestart);
}
if (lsrcloc >= lsrctarget) {
/*
* If we were previously less than target, then the current
* dstloc is the smallest dst which maps >= the target.
*/
if (wasneg) break;
dstloc--;
waspos = JNI_TRUE;
} else {
/*
* If we were previously greater than target, then this must
* be the first dstloc which maps to < the target. Since we
* want the smallest which maps >= the target, increment it
* first before returning.
*/
dstloc++;
if (waspos) break;
wasneg = JNI_TRUE;
}
}
return dstloc;
}
/*
* Class: sun_java2d_loops_ScaledBlit
* Method: Scale
* Signature: (Lsun/java2d/SurfaceData;Lsun/java2d/SurfaceData;Ljava/awt/Composite;Lsun/java2d/pipe/Region;IIIIDDDD)V
*/
JNIEXPORT void JNICALL
Java_sun_java2d_loops_ScaledBlit_Scale
(JNIEnv *env, jobject self,
jobject srcData, jobject dstData,
jobject comp, jobject clip,
jint sx1, jint sy1, jint sx2, jint sy2,
jdouble ddx1, jdouble ddy1, jdouble ddx2, jdouble ddy2)
{
SurfaceDataOps *srcOps;
SurfaceDataOps *dstOps;
SurfaceDataRasInfo srcInfo;
SurfaceDataRasInfo dstInfo;
NativePrimitive *pPrim;
CompositeInfo compInfo;
jint sxinc, syinc, shift;
jint tilesize;
jint idx1, idy1;
jdouble scalex, scaley;
RegionData clipInfo;
jint dstFlags;
jboolean xunderflow, yunderflow;
pPrim = GetNativePrim(env, self);
if (pPrim == NULL) {
return;
}
if (pPrim->pCompType->getCompInfo != NULL) {
(*pPrim->pCompType->getCompInfo)(env, &compInfo, comp);
}
if (Region_GetInfo(env, clip, &clipInfo)) {
return;
}
srcOps = SurfaceData_GetOps(env, srcData);
dstOps = SurfaceData_GetOps(env, dstData);
if (srcOps == 0 || dstOps == 0) {
return;
}
/*
* Determine the precision to use for the fixed point math
* for the coordinate scaling.
* - OR together srcw and srch to get the MSB between the two
* - Next shift it up until it goes negative
* - Count the shifts and that will be the most accurate
* precision available for the fixed point math
* - a source coordinate of 1.0 will be (1 << shift)
* - srcw & srch will be (srcw << shift) and (srch << shift)
* and will not overflow
* Note that if srcw or srch are so large that they are
* negative numbers before shifting, then:
* - shift will be 0
* - tilesize will end up being 1x1 tiles
* - we will brute force calculate the source location
* of every destination pixel using the TILESTART and
* SRCLOC macros in this function and then call the
* scale helper function to copy one pixel at a time.
* - TILESTART involves mostly jdouble calculations so
* it should not have integer overflow problems.
*/
sxinc = (sx2 - sx1) | (sy2 - sy1);
shift = 0;
if (sxinc > 0) {
while ((sxinc <<= 1) > 0) {
shift++;
}
}
/*
* Now determine the scaled integer increments used to traverse
* the source image for each destination pixel. Our shift value
* has been calculated above so that any location within the
* destination image can be represented as a scaled integer
* without incurring integer overflow.
*
* But we also need to worry about overflow of the sxinc and syinc
* parameters. We already know that "srcw<<shift" and "srch<<shift"
* cannot overflow a jint, and the only time that sxinc and syinc
* can be larger than those two values is if ddy2-ddy1 or ddx2-ddx1
* are smaller than 1. Since this situation implies that the
* output area is no more than one pixel wide or tall, then we are
* stepping by distances that are at least the size of the image
* and only one destination pixel will ever be rendered - thus the
* amount by which we step is largely irrelevant since after
* drawing the first "in bounds" pixel, we will step completely
* out of the source image and render nothing more. As a result,
* we assign the appropriate "size of image" stepping parameter
* for any scale to smaller than one device pixel.
*/
yunderflow = (ddy2 - ddy1) < 1.0;
scaley = (((jdouble) (sy2 - sy1)) / (ddy2 - ddy1)) * (1 << shift);
syinc = (yunderflow ? ((sy2 - sy1) << shift) : (jint) scaley);
xunderflow = (ddx2 - ddx1) < 1.0;
scalex = (((jdouble) (sx2 - sx1)) / (ddx2 - ddx1)) * (1 << shift);
sxinc = (xunderflow ? ((sx2 - sx1) << shift) : (jint) scalex);
tilesize = findpow2tilesize(shift, sxinc, syinc);
srcInfo.bounds.x1 = sx1;
srcInfo.bounds.y1 = sy1;
srcInfo.bounds.x2 = sx2;
srcInfo.bounds.y2 = sy2;
if (srcOps->Lock(env, srcOps, &srcInfo, pPrim->srcflags) != SD_SUCCESS) {
return;
}
if (srcInfo.bounds.x2 <= srcInfo.bounds.x1 ||
srcInfo.bounds.y2 <= srcInfo.bounds.y1)
{
SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
return;
}
/*
* Only refine lower bounds if lower source coordinate was clipped
* because the math will work out to be exactly idx1, idy1 if not.
* Always refine upper bounds since we want to make sure not to
* overstep the source bounds based on the tiled iteration math.
*
* For underflow cases, simply check if the SRCLOC for the single
* destination pixel maps inside the source bounds. If it does,
* we render that pixel row or column (and only that pixel row
* or column). If it does not, we render nothing.
*/
idx1 = (jint) ceil(ddx1 - 0.5);
idy1 = (jint) ceil(ddy1 - 0.5);
if (xunderflow) {
jdouble x = sx1 + (SRCLOC(idx1, ddx1, scalex) / (1 << shift));
dstInfo.bounds.x1 = dstInfo.bounds.x2 = idx1;
if (x >= srcInfo.bounds.x1 && x < srcInfo.bounds.x2) {
dstInfo.bounds.x2++;
}
} else {
dstInfo.bounds.x1 = ((srcInfo.bounds.x1 <= sx1)
? idx1
: refine(idx1, ddx1, tilesize, scalex,
(srcInfo.bounds.x1-sx1) << shift, sxinc));
dstInfo.bounds.x2 = refine(idx1, ddx1, tilesize, scalex,
(srcInfo.bounds.x2-sx1) << shift, sxinc);
}
if (yunderflow) {
jdouble y = sy1 + (SRCLOC(idy1, ddy1, scaley) / (1 << shift));
dstInfo.bounds.y1 = dstInfo.bounds.y2 = idy1;
if (y >= srcInfo.bounds.y1 && y < srcInfo.bounds.y2) {
dstInfo.bounds.y2++;
}
} else {
dstInfo.bounds.y1 = ((srcInfo.bounds.y1 <= sy1)
? idy1
: refine(idy1, ddy1, tilesize, scaley,
(srcInfo.bounds.y1-sy1) << shift, syinc));
dstInfo.bounds.y2 = refine(idy1, ddy1, tilesize, scaley,
(srcInfo.bounds.y2-sy1) << shift, syinc);
}
SurfaceData_IntersectBounds(&dstInfo.bounds, &clipInfo.bounds);
dstFlags = pPrim->dstflags;
if (!Region_IsRectangular(&clipInfo)) {
dstFlags |= SD_LOCK_PARTIAL_WRITE;
}
if (dstOps->Lock(env, dstOps, &dstInfo, dstFlags) != SD_SUCCESS) {
SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
return;
}
if (dstInfo.bounds.x2 > dstInfo.bounds.x1 &&
dstInfo.bounds.y2 > dstInfo.bounds.y1)
{
srcOps->GetRasInfo(env, srcOps, &srcInfo);
dstOps->GetRasInfo(env, dstOps, &dstInfo);
if (srcInfo.rasBase && dstInfo.rasBase) {
SurfaceDataBounds span;
void *pSrc = PtrCoord(srcInfo.rasBase,
sx1, srcInfo.pixelStride,
sy1, srcInfo.scanStride);
Region_IntersectBounds(&clipInfo, &dstInfo.bounds);
Region_StartIteration(env, &clipInfo);
if (tilesize >= (ddx2 - ddx1) &&
tilesize >= (ddy2 - ddy1))
{
/* Do everything in one tile */
jint sxloc = (jint) SRCLOC(idx1, ddx1, scalex);
jint syloc = (jint) SRCLOC(idy1, ddy1, scaley);
while (Region_NextIteration(&clipInfo, &span)) {
jint tsxloc = sxloc;
jint tsyloc = syloc;
void *pDst;
if (span.y1 > idy1) {
tsyloc += syinc * (span.y1 - idy1);
}
if (span.x1 > idx1) {
tsxloc += sxinc * (span.x1 - idx1);
}
pDst = PtrCoord(dstInfo.rasBase,
span.x1, dstInfo.pixelStride,
span.y1, dstInfo.scanStride);
(*pPrim->funcs.scaledblit)(pSrc, pDst,
span.x2-span.x1, span.y2-span.y1,
tsxloc, tsyloc,
sxinc, syinc, shift,
&srcInfo, &dstInfo,
pPrim, &compInfo);
}
} else {
/* Break each clip span into tiles for better accuracy. */
while (Region_NextIteration(&clipInfo, &span)) {
jint tilex, tiley;
jint sxloc, syloc;
jint x1, y1, x2, y2;
void *pDst;
for (tiley = TILESTART(span.y1, idy1, tilesize);
tiley < span.y2;
tiley += tilesize)
{
/* Clip span to Y range of current tile */
y1 = tiley;
y2 = tiley + tilesize;
if (y1 < span.y1) y1 = span.y1;
if (y2 > span.y2) y2 = span.y2;
/* Find scaled source coordinate of first pixel */
syloc = (jint) SRCLOC(tiley, ddy1, scaley);
if (y1 > tiley) {
syloc += syinc * (y1 - tiley);
}
for (tilex = TILESTART(span.x1, idx1, tilesize);
tilex < span.x2;
tilex += tilesize)
{
/* Clip span to X range of current tile */
x1 = tilex;
x2 = tilex + tilesize;
if (x1 < span.x1) x1 = span.x1;
if (x2 > span.x2) x2 = span.x2;
/* Find scaled source coordinate of first pixel */
sxloc = (jint) SRCLOC(tilex, ddx1, scalex);
if (x1 > tilex) {
sxloc += sxinc * (x1 - tilex);
}
pDst = PtrCoord(dstInfo.rasBase,
x1, dstInfo.pixelStride,
y1, dstInfo.scanStride);
(*pPrim->funcs.scaledblit)(pSrc, pDst, x2-x1, y2-y1,
sxloc, syloc,
sxinc, syinc, shift,
&srcInfo, &dstInfo,
pPrim, &compInfo);
}
}
}
}
Region_EndIteration(env, &clipInfo);
}
SurfaceData_InvokeRelease(env, dstOps, &dstInfo);
SurfaceData_InvokeRelease(env, srcOps, &srcInfo);
}
SurfaceData_InvokeUnlock(env, dstOps, &dstInfo);
SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
}