| /* |
| ** 2013-05-28 |
| ** |
| ** The author disclaims copyright to this source code. In place of |
| ** a legal notice, here is a blessing: |
| ** |
| ** May you do good and not evil. |
| ** May you find forgiveness for yourself and forgive others. |
| ** May you share freely, never taking more than you give. |
| ** |
| ****************************************************************************** |
| ** |
| ** This file contains code to implement the percentile(Y,P) SQL function |
| ** as described below: |
| ** |
| ** (1) The percentile(Y,P) function is an aggregate function taking |
| ** exactly two arguments. |
| ** |
| ** (2) If the P argument to percentile(Y,P) is not the same for every |
| ** row in the aggregate then an error is thrown. The word "same" |
| ** in the previous sentence means that the value differ by less |
| ** than 0.001. |
| ** |
| ** (3) If the P argument to percentile(Y,P) evaluates to anything other |
| ** than a number in the range of 0.0 to 100.0 inclusive then an |
| ** error is thrown. |
| ** |
| ** (4) If any Y argument to percentile(Y,P) evaluates to a value that |
| ** is not NULL and is not numeric then an error is thrown. |
| ** |
| ** (5) If any Y argument to percentile(Y,P) evaluates to plus or minus |
| ** infinity then an error is thrown. (SQLite always interprets NaN |
| ** values as NULL.) |
| ** |
| ** (6) Both Y and P in percentile(Y,P) can be arbitrary expressions, |
| ** including CASE WHEN expressions. |
| ** |
| ** (7) The percentile(Y,P) aggregate is able to handle inputs of at least |
| ** one million (1,000,000) rows. |
| ** |
| ** (8) If there are no non-NULL values for Y, then percentile(Y,P) |
| ** returns NULL. |
| ** |
| ** (9) If there is exactly one non-NULL value for Y, the percentile(Y,P) |
| ** returns the one Y value. |
| ** |
| ** (10) If there N non-NULL values of Y where N is two or more and |
| ** the Y values are ordered from least to greatest and a graph is |
| ** drawn from 0 to N-1 such that the height of the graph at J is |
| ** the J-th Y value and such that straight lines are drawn between |
| ** adjacent Y values, then the percentile(Y,P) function returns |
| ** the height of the graph at P*(N-1)/100. |
| ** |
| ** (11) The percentile(Y,P) function always returns either a floating |
| ** point number or NULL. |
| ** |
| ** (12) The percentile(Y,P) is implemented as a single C99 source-code |
| ** file that compiles into a shared-library or DLL that can be loaded |
| ** into SQLite using the sqlite3_load_extension() interface. |
| */ |
| #include "sqlite3ext.h" |
| SQLITE_EXTENSION_INIT1 |
| #include <assert.h> |
| #include <string.h> |
| #include <stdlib.h> |
| |
| /* The following object is the session context for a single percentile() |
| ** function. We have to remember all input Y values until the very end. |
| ** Those values are accumulated in the Percentile.a[] array. |
| */ |
| typedef struct Percentile Percentile; |
| struct Percentile { |
| unsigned nAlloc; /* Number of slots allocated for a[] */ |
| unsigned nUsed; /* Number of slots actually used in a[] */ |
| double rPct; /* 1.0 more than the value for P */ |
| double *a; /* Array of Y values */ |
| }; |
| |
| /* |
| ** Return TRUE if the input floating-point number is an infinity. |
| */ |
| static int isInfinity(double r){ |
| sqlite3_uint64 u; |
| assert( sizeof(u)==sizeof(r) ); |
| memcpy(&u, &r, sizeof(u)); |
| return ((u>>52)&0x7ff)==0x7ff; |
| } |
| |
| /* |
| ** Return TRUE if two doubles differ by 0.001 or less |
| */ |
| static int sameValue(double a, double b){ |
| a -= b; |
| return a>=-0.001 && a<=0.001; |
| } |
| |
| /* |
| ** The "step" function for percentile(Y,P) is called once for each |
| ** input row. |
| */ |
| static void percentStep(sqlite3_context *pCtx, int argc, sqlite3_value **argv){ |
| Percentile *p; |
| double rPct; |
| int eType; |
| double y; |
| assert( argc==2 ); |
| |
| /* Requirement 3: P must be a number between 0 and 100 */ |
| eType = sqlite3_value_numeric_type(argv[1]); |
| rPct = sqlite3_value_double(argv[1]); |
| if( (eType!=SQLITE_INTEGER && eType!=SQLITE_FLOAT) |
| || rPct<0.0 || rPct>100.0 ){ |
| sqlite3_result_error(pCtx, "2nd argument to percentile() is not " |
| "a number between 0.0 and 100.0", -1); |
| return; |
| } |
| |
| /* Allocate the session context. */ |
| p = (Percentile*)sqlite3_aggregate_context(pCtx, sizeof(*p)); |
| if( p==0 ) return; |
| |
| /* Remember the P value. Throw an error if the P value is different |
| ** from any prior row, per Requirement (2). */ |
| if( p->rPct==0.0 ){ |
| p->rPct = rPct+1.0; |
| }else if( !sameValue(p->rPct,rPct+1.0) ){ |
| sqlite3_result_error(pCtx, "2nd argument to percentile() is not the " |
| "same for all input rows", -1); |
| return; |
| } |
| |
| /* Ignore rows for which Y is NULL */ |
| eType = sqlite3_value_type(argv[0]); |
| if( eType==SQLITE_NULL ) return; |
| |
| /* If not NULL, then Y must be numeric. Otherwise throw an error. |
| ** Requirement 4 */ |
| if( eType!=SQLITE_INTEGER && eType!=SQLITE_FLOAT ){ |
| sqlite3_result_error(pCtx, "1st argument to percentile() is not " |
| "numeric", -1); |
| return; |
| } |
| |
| /* Throw an error if the Y value is infinity or NaN */ |
| y = sqlite3_value_double(argv[0]); |
| if( isInfinity(y) ){ |
| sqlite3_result_error(pCtx, "Inf input to percentile()", -1); |
| return; |
| } |
| |
| /* Allocate and store the Y */ |
| if( p->nUsed>=p->nAlloc ){ |
| unsigned n = p->nAlloc*2 + 250; |
| double *a = sqlite3_realloc64(p->a, sizeof(double)*n); |
| if( a==0 ){ |
| sqlite3_free(p->a); |
| memset(p, 0, sizeof(*p)); |
| sqlite3_result_error_nomem(pCtx); |
| return; |
| } |
| p->nAlloc = n; |
| p->a = a; |
| } |
| p->a[p->nUsed++] = y; |
| } |
| |
| /* |
| ** Compare to doubles for sorting using qsort() |
| */ |
| static int SQLITE_CDECL doubleCmp(const void *pA, const void *pB){ |
| double a = *(double*)pA; |
| double b = *(double*)pB; |
| if( a==b ) return 0; |
| if( a<b ) return -1; |
| return +1; |
| } |
| |
| /* |
| ** Called to compute the final output of percentile() and to clean |
| ** up all allocated memory. |
| */ |
| static void percentFinal(sqlite3_context *pCtx){ |
| Percentile *p; |
| unsigned i1, i2; |
| double v1, v2; |
| double ix, vx; |
| p = (Percentile*)sqlite3_aggregate_context(pCtx, 0); |
| if( p==0 ) return; |
| if( p->a==0 ) return; |
| if( p->nUsed ){ |
| qsort(p->a, p->nUsed, sizeof(double), doubleCmp); |
| ix = (p->rPct-1.0)*(p->nUsed-1)*0.01; |
| i1 = (unsigned)ix; |
| i2 = ix==(double)i1 || i1==p->nUsed-1 ? i1 : i1+1; |
| v1 = p->a[i1]; |
| v2 = p->a[i2]; |
| vx = v1 + (v2-v1)*(ix-i1); |
| sqlite3_result_double(pCtx, vx); |
| } |
| sqlite3_free(p->a); |
| memset(p, 0, sizeof(*p)); |
| } |
| |
| |
| #ifdef _WIN32 |
| __declspec(dllexport) |
| #endif |
| int sqlite3_percentile_init( |
| sqlite3 *db, |
| char **pzErrMsg, |
| const sqlite3_api_routines *pApi |
| ){ |
| int rc = SQLITE_OK; |
| SQLITE_EXTENSION_INIT2(pApi); |
| (void)pzErrMsg; /* Unused parameter */ |
| rc = sqlite3_create_function(db, "percentile", 2, |
| SQLITE_UTF8|SQLITE_INNOCUOUS, 0, |
| 0, percentStep, percentFinal); |
| return rc; |
| } |