blob: 0f8baaf16cfeb1aa5708997153cca3f73f85e87a [file] [log] [blame]
/*
* Copyright (c) 2006-2007 Jan Behrens, FlexiGuided GmbH, Berlin
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/*
* File name: pgsql/utf8proc_pgsql.c
* Version: 1.1.1
* Last changed: 2007-07-22
*
* Description:
* PostgreSQL extension to provide two functions 'unifold' and 'unistrip',
* which can be used to case-fold and normalize index fields and
* optionally strip marks (e.g. accents) from strings.
*/
#include "../utf8proc.c"
#include <postgres.h>
#include <utils/elog.h>
#include <fmgr.h>
#include <string.h>
#include <unistd.h>
#include <utils/builtins.h>
#ifdef PG_MODULE_MAGIC
PG_MODULE_MAGIC;
#endif
#define UTF8PROC_PGSQL_FOLD_OPTS ( UTF8PROC_REJECTNA | UTF8PROC_COMPAT | \
UTF8PROC_COMPOSE | UTF8PROC_STABLE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | \
UTF8PROC_NLF2LF | UTF8PROC_CASEFOLD | UTF8PROC_LUMP )
#define UTF8PROC_PGSQL_STRIP_OPTS ( UTF8PROC_REJECTNA | UTF8PROC_COMPAT | \
UTF8PROC_COMPOSE | UTF8PROC_STABLE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | \
UTF8PROC_NLF2LF | UTF8PROC_CASEFOLD | UTF8PROC_LUMP | UTF8PROC_STRIPMARK )
ssize_t utf8proc_pgsql_utf8map(
text *input_string, text **output_string_ptr, int options
) {
ssize_t result;
text *output_string;
result = utf8proc_decompose(
VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ,
NULL, 0, options
);
if (result < 0) return result;
if (result > (SIZE_MAX-1-VARHDRSZ)/sizeof(int32_t))
return UTF8PROC_ERROR_OVERFLOW;
// reserve one extra byte for termination
*output_string_ptr = palloc(result * sizeof(int32_t) + 1 + VARHDRSZ);
output_string = *output_string_ptr;
if (!output_string) return UTF8PROC_ERROR_NOMEM;
result = utf8proc_decompose(
VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ,
(int32_t *)VARDATA(output_string), result, options
);
if (result < 0) return result;
result = utf8proc_reencode(
(int32_t *)VARDATA(output_string), result, options
);
if (result >= 0) VARATT_SIZEP(output_string) = result + VARHDRSZ;
return result;
}
void utf8proc_pgsql_utf8map_errchk(ssize_t result, text *output_string) {
if (result < 0) {
int sqlerrcode;
if (output_string) pfree(output_string);
switch(result) {
case UTF8PROC_ERROR_NOMEM:
sqlerrcode = ERRCODE_OUT_OF_MEMORY; break;
case UTF8PROC_ERROR_OVERFLOW:
sqlerrcode = ERRCODE_PROGRAM_LIMIT_EXCEEDED; break;
case UTF8PROC_ERROR_INVALIDUTF8:
case UTF8PROC_ERROR_NOTASSIGNED:
return;
default:
sqlerrcode = ERRCODE_INTERNAL_ERROR;
}
ereport(ERROR, (
errcode(sqlerrcode),
errmsg("%s", utf8proc_errmsg(result))
));
}
}
PG_FUNCTION_INFO_V1(utf8proc_pgsql_unifold);
Datum utf8proc_pgsql_unifold(PG_FUNCTION_ARGS) {
text *input_string;
text *output_string = NULL;
ssize_t result;
input_string = PG_GETARG_TEXT_P(0);
result = utf8proc_pgsql_utf8map(
input_string, &output_string, UTF8PROC_PGSQL_FOLD_OPTS
);
PG_FREE_IF_COPY(input_string, 0);
utf8proc_pgsql_utf8map_errchk(result, output_string);
if (result >= 0) {
PG_RETURN_TEXT_P(output_string);
} else {
PG_RETURN_NULL();
}
}
PG_FUNCTION_INFO_V1(utf8proc_pgsql_unistrip);
Datum utf8proc_pgsql_unistrip(PG_FUNCTION_ARGS) {
text *input_string;
text *output_string = NULL;
ssize_t result;
input_string = PG_GETARG_TEXT_P(0);
result = utf8proc_pgsql_utf8map(
input_string, &output_string, UTF8PROC_PGSQL_STRIP_OPTS
);
PG_FREE_IF_COPY(input_string, 0);
utf8proc_pgsql_utf8map_errchk(result, output_string);
if (result >= 0) {
PG_RETURN_TEXT_P(output_string);
} else {
PG_RETURN_NULL();
}
}