/* strop module */ | |
#define PY_SSIZE_T_CLEAN | |
#include "Python.h" | |
#include <ctype.h> | |
PyDoc_STRVAR(strop_module__doc__, | |
"Common string manipulations, optimized for speed.\n" | |
"\n" | |
"Always use \"import string\" rather than referencing\n" | |
"this module directly."); | |
/* XXX This file assumes that the <ctype.h> is*() functions | |
XXX are defined for all 8-bit characters! */ | |
#define WARN if (PyErr_Warn(PyExc_DeprecationWarning, \ | |
"strop functions are obsolete; use string methods")) \ | |
return NULL | |
/* The lstrip(), rstrip() and strip() functions are implemented | |
in do_strip(), which uses an additional parameter to indicate what | |
type of strip should occur. */ | |
#define LEFTSTRIP 0 | |
#define RIGHTSTRIP 1 | |
#define BOTHSTRIP 2 | |
static PyObject * | |
split_whitespace(char *s, Py_ssize_t len, Py_ssize_t maxsplit) | |
{ | |
Py_ssize_t i = 0, j; | |
int err; | |
Py_ssize_t countsplit = 0; | |
PyObject* item; | |
PyObject *list = PyList_New(0); | |
if (list == NULL) | |
return NULL; | |
while (i < len) { | |
while (i < len && isspace(Py_CHARMASK(s[i]))) { | |
i = i+1; | |
} | |
j = i; | |
while (i < len && !isspace(Py_CHARMASK(s[i]))) { | |
i = i+1; | |
} | |
if (j < i) { | |
item = PyString_FromStringAndSize(s+j, i-j); | |
if (item == NULL) | |
goto finally; | |
err = PyList_Append(list, item); | |
Py_DECREF(item); | |
if (err < 0) | |
goto finally; | |
countsplit++; | |
while (i < len && isspace(Py_CHARMASK(s[i]))) { | |
i = i+1; | |
} | |
if (maxsplit && (countsplit >= maxsplit) && i < len) { | |
item = PyString_FromStringAndSize( | |
s+i, len - i); | |
if (item == NULL) | |
goto finally; | |
err = PyList_Append(list, item); | |
Py_DECREF(item); | |
if (err < 0) | |
goto finally; | |
i = len; | |
} | |
} | |
} | |
return list; | |
finally: | |
Py_DECREF(list); | |
return NULL; | |
} | |
PyDoc_STRVAR(splitfields__doc__, | |
"split(s [,sep [,maxsplit]]) -> list of strings\n" | |
"splitfields(s [,sep [,maxsplit]]) -> list of strings\n" | |
"\n" | |
"Return a list of the words in the string s, using sep as the\n" | |
"delimiter string. If maxsplit is nonzero, splits into at most\n" | |
"maxsplit words. If sep is not specified, any whitespace string\n" | |
"is a separator. Maxsplit defaults to 0.\n" | |
"\n" | |
"(split and splitfields are synonymous)"); | |
static PyObject * | |
strop_splitfields(PyObject *self, PyObject *args) | |
{ | |
Py_ssize_t len, n, i, j, err; | |
Py_ssize_t splitcount, maxsplit; | |
char *s, *sub; | |
PyObject *list, *item; | |
WARN; | |
sub = NULL; | |
n = 0; | |
splitcount = 0; | |
maxsplit = 0; | |
if (!PyArg_ParseTuple(args, "t#|z#n:split", &s, &len, &sub, &n, &maxsplit)) | |
return NULL; | |
if (sub == NULL) | |
return split_whitespace(s, len, maxsplit); | |
if (n == 0) { | |
PyErr_SetString(PyExc_ValueError, "empty separator"); | |
return NULL; | |
} | |
list = PyList_New(0); | |
if (list == NULL) | |
return NULL; | |
i = j = 0; | |
while (i+n <= len) { | |
if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) { | |
item = PyString_FromStringAndSize(s+j, i-j); | |
if (item == NULL) | |
goto fail; | |
err = PyList_Append(list, item); | |
Py_DECREF(item); | |
if (err < 0) | |
goto fail; | |
i = j = i + n; | |
splitcount++; | |
if (maxsplit && (splitcount >= maxsplit)) | |
break; | |
} | |
else | |
i++; | |
} | |
item = PyString_FromStringAndSize(s+j, len-j); | |
if (item == NULL) | |
goto fail; | |
err = PyList_Append(list, item); | |
Py_DECREF(item); | |
if (err < 0) | |
goto fail; | |
return list; | |
fail: | |
Py_DECREF(list); | |
return NULL; | |
} | |
PyDoc_STRVAR(joinfields__doc__, | |
"join(list [,sep]) -> string\n" | |
"joinfields(list [,sep]) -> string\n" | |
"\n" | |
"Return a string composed of the words in list, with\n" | |
"intervening occurrences of sep. Sep defaults to a single\n" | |
"space.\n" | |
"\n" | |
"(join and joinfields are synonymous)"); | |
static PyObject * | |
strop_joinfields(PyObject *self, PyObject *args) | |
{ | |
PyObject *seq; | |
char *sep = NULL; | |
Py_ssize_t seqlen, seplen = 0; | |
Py_ssize_t i, reslen = 0, slen = 0, sz = 100; | |
PyObject *res = NULL; | |
char* p = NULL; | |
ssizeargfunc getitemfunc; | |
WARN; | |
if (!PyArg_ParseTuple(args, "O|t#:join", &seq, &sep, &seplen)) | |
return NULL; | |
if (sep == NULL) { | |
sep = " "; | |
seplen = 1; | |
} | |
seqlen = PySequence_Size(seq); | |
if (seqlen < 0 && PyErr_Occurred()) | |
return NULL; | |
if (seqlen == 1) { | |
/* Optimization if there's only one item */ | |
PyObject *item = PySequence_GetItem(seq, 0); | |
if (item && !PyString_Check(item)) { | |
PyErr_SetString(PyExc_TypeError, | |
"first argument must be sequence of strings"); | |
Py_DECREF(item); | |
return NULL; | |
} | |
return item; | |
} | |
if (!(res = PyString_FromStringAndSize((char*)NULL, sz))) | |
return NULL; | |
p = PyString_AsString(res); | |
/* optimize for lists, since it's the most common case. all others | |
* (tuples and arbitrary sequences) just use the sequence abstract | |
* interface. | |
*/ | |
if (PyList_Check(seq)) { | |
for (i = 0; i < seqlen; i++) { | |
PyObject *item = PyList_GET_ITEM(seq, i); | |
if (!PyString_Check(item)) { | |
PyErr_SetString(PyExc_TypeError, | |
"first argument must be sequence of strings"); | |
Py_DECREF(res); | |
return NULL; | |
} | |
slen = PyString_GET_SIZE(item); | |
if (slen > PY_SSIZE_T_MAX - reslen || | |
seplen > PY_SSIZE_T_MAX - reslen - seplen) { | |
PyErr_SetString(PyExc_OverflowError, | |
"input too long"); | |
Py_DECREF(res); | |
return NULL; | |
} | |
while (reslen + slen + seplen >= sz) { | |
if (_PyString_Resize(&res, sz * 2) < 0) | |
return NULL; | |
sz *= 2; | |
p = PyString_AsString(res) + reslen; | |
} | |
if (i > 0) { | |
memcpy(p, sep, seplen); | |
p += seplen; | |
reslen += seplen; | |
} | |
memcpy(p, PyString_AS_STRING(item), slen); | |
p += slen; | |
reslen += slen; | |
} | |
_PyString_Resize(&res, reslen); | |
return res; | |
} | |
if (seq->ob_type->tp_as_sequence == NULL || | |
(getitemfunc = seq->ob_type->tp_as_sequence->sq_item) == NULL) | |
{ | |
PyErr_SetString(PyExc_TypeError, | |
"first argument must be a sequence"); | |
return NULL; | |
} | |
/* This is now type safe */ | |
for (i = 0; i < seqlen; i++) { | |
PyObject *item = getitemfunc(seq, i); | |
if (!item || !PyString_Check(item)) { | |
PyErr_SetString(PyExc_TypeError, | |
"first argument must be sequence of strings"); | |
Py_DECREF(res); | |
Py_XDECREF(item); | |
return NULL; | |
} | |
slen = PyString_GET_SIZE(item); | |
if (slen > PY_SSIZE_T_MAX - reslen || | |
seplen > PY_SSIZE_T_MAX - reslen - seplen) { | |
PyErr_SetString(PyExc_OverflowError, | |
"input too long"); | |
Py_DECREF(res); | |
Py_XDECREF(item); | |
return NULL; | |
} | |
while (reslen + slen + seplen >= sz) { | |
if (_PyString_Resize(&res, sz * 2) < 0) { | |
Py_DECREF(item); | |
return NULL; | |
} | |
sz *= 2; | |
p = PyString_AsString(res) + reslen; | |
} | |
if (i > 0) { | |
memcpy(p, sep, seplen); | |
p += seplen; | |
reslen += seplen; | |
} | |
memcpy(p, PyString_AS_STRING(item), slen); | |
p += slen; | |
reslen += slen; | |
Py_DECREF(item); | |
} | |
_PyString_Resize(&res, reslen); | |
return res; | |
} | |
PyDoc_STRVAR(find__doc__, | |
"find(s, sub [,start [,end]]) -> in\n" | |
"\n" | |
"Return the lowest index in s where substring sub is found,\n" | |
"such that sub is contained within s[start,end]. Optional\n" | |
"arguments start and end are interpreted as in slice notation.\n" | |
"\n" | |
"Return -1 on failure."); | |
static PyObject * | |
strop_find(PyObject *self, PyObject *args) | |
{ | |
char *s, *sub; | |
Py_ssize_t len, n, i = 0, last = PY_SSIZE_T_MAX; | |
WARN; | |
if (!PyArg_ParseTuple(args, "t#t#|nn:find", &s, &len, &sub, &n, &i, &last)) | |
return NULL; | |
if (last > len) | |
last = len; | |
if (last < 0) | |
last += len; | |
if (last < 0) | |
last = 0; | |
if (i < 0) | |
i += len; | |
if (i < 0) | |
i = 0; | |
if (n == 0 && i <= last) | |
return PyInt_FromLong((long)i); | |
last -= n; | |
for (; i <= last; ++i) | |
if (s[i] == sub[0] && | |
(n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0)) | |
return PyInt_FromLong((long)i); | |
return PyInt_FromLong(-1L); | |
} | |
PyDoc_STRVAR(rfind__doc__, | |
"rfind(s, sub [,start [,end]]) -> int\n" | |
"\n" | |
"Return the highest index in s where substring sub is found,\n" | |
"such that sub is contained within s[start,end]. Optional\n" | |
"arguments start and end are interpreted as in slice notation.\n" | |
"\n" | |
"Return -1 on failure."); | |
static PyObject * | |
strop_rfind(PyObject *self, PyObject *args) | |
{ | |
char *s, *sub; | |
Py_ssize_t len, n, j; | |
Py_ssize_t i = 0, last = PY_SSIZE_T_MAX; | |
WARN; | |
if (!PyArg_ParseTuple(args, "t#t#|nn:rfind", &s, &len, &sub, &n, &i, &last)) | |
return NULL; | |
if (last > len) | |
last = len; | |
if (last < 0) | |
last += len; | |
if (last < 0) | |
last = 0; | |
if (i < 0) | |
i += len; | |
if (i < 0) | |
i = 0; | |
if (n == 0 && i <= last) | |
return PyInt_FromLong((long)last); | |
for (j = last-n; j >= i; --j) | |
if (s[j] == sub[0] && | |
(n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0)) | |
return PyInt_FromLong((long)j); | |
return PyInt_FromLong(-1L); | |
} | |
static PyObject * | |
do_strip(PyObject *args, int striptype) | |
{ | |
char *s; | |
Py_ssize_t len, i, j; | |
if (PyString_AsStringAndSize(args, &s, &len)) | |
return NULL; | |
i = 0; | |
if (striptype != RIGHTSTRIP) { | |
while (i < len && isspace(Py_CHARMASK(s[i]))) { | |
i++; | |
} | |
} | |
j = len; | |
if (striptype != LEFTSTRIP) { | |
do { | |
j--; | |
} while (j >= i && isspace(Py_CHARMASK(s[j]))); | |
j++; | |
} | |
if (i == 0 && j == len) { | |
Py_INCREF(args); | |
return args; | |
} | |
else | |
return PyString_FromStringAndSize(s+i, j-i); | |
} | |
PyDoc_STRVAR(strip__doc__, | |
"strip(s) -> string\n" | |
"\n" | |
"Return a copy of the string s with leading and trailing\n" | |
"whitespace removed."); | |
static PyObject * | |
strop_strip(PyObject *self, PyObject *args) | |
{ | |
WARN; | |
return do_strip(args, BOTHSTRIP); | |
} | |
PyDoc_STRVAR(lstrip__doc__, | |
"lstrip(s) -> string\n" | |
"\n" | |
"Return a copy of the string s with leading whitespace removed."); | |
static PyObject * | |
strop_lstrip(PyObject *self, PyObject *args) | |
{ | |
WARN; | |
return do_strip(args, LEFTSTRIP); | |
} | |
PyDoc_STRVAR(rstrip__doc__, | |
"rstrip(s) -> string\n" | |
"\n" | |
"Return a copy of the string s with trailing whitespace removed."); | |
static PyObject * | |
strop_rstrip(PyObject *self, PyObject *args) | |
{ | |
WARN; | |
return do_strip(args, RIGHTSTRIP); | |
} | |
PyDoc_STRVAR(lower__doc__, | |
"lower(s) -> string\n" | |
"\n" | |
"Return a copy of the string s converted to lowercase."); | |
static PyObject * | |
strop_lower(PyObject *self, PyObject *args) | |
{ | |
char *s, *s_new; | |
Py_ssize_t i, n; | |
PyObject *newstr; | |
int changed; | |
WARN; | |
if (PyString_AsStringAndSize(args, &s, &n)) | |
return NULL; | |
newstr = PyString_FromStringAndSize(NULL, n); | |
if (newstr == NULL) | |
return NULL; | |
s_new = PyString_AsString(newstr); | |
changed = 0; | |
for (i = 0; i < n; i++) { | |
int c = Py_CHARMASK(*s++); | |
if (isupper(c)) { | |
changed = 1; | |
*s_new = tolower(c); | |
} else | |
*s_new = c; | |
s_new++; | |
} | |
if (!changed) { | |
Py_DECREF(newstr); | |
Py_INCREF(args); | |
return args; | |
} | |
return newstr; | |
} | |
PyDoc_STRVAR(upper__doc__, | |
"upper(s) -> string\n" | |
"\n" | |
"Return a copy of the string s converted to uppercase."); | |
static PyObject * | |
strop_upper(PyObject *self, PyObject *args) | |
{ | |
char *s, *s_new; | |
Py_ssize_t i, n; | |
PyObject *newstr; | |
int changed; | |
WARN; | |
if (PyString_AsStringAndSize(args, &s, &n)) | |
return NULL; | |
newstr = PyString_FromStringAndSize(NULL, n); | |
if (newstr == NULL) | |
return NULL; | |
s_new = PyString_AsString(newstr); | |
changed = 0; | |
for (i = 0; i < n; i++) { | |
int c = Py_CHARMASK(*s++); | |
if (islower(c)) { | |
changed = 1; | |
*s_new = toupper(c); | |
} else | |
*s_new = c; | |
s_new++; | |
} | |
if (!changed) { | |
Py_DECREF(newstr); | |
Py_INCREF(args); | |
return args; | |
} | |
return newstr; | |
} | |
PyDoc_STRVAR(capitalize__doc__, | |
"capitalize(s) -> string\n" | |
"\n" | |
"Return a copy of the string s with only its first character\n" | |
"capitalized."); | |
static PyObject * | |
strop_capitalize(PyObject *self, PyObject *args) | |
{ | |
char *s, *s_new; | |
Py_ssize_t i, n; | |
PyObject *newstr; | |
int changed; | |
WARN; | |
if (PyString_AsStringAndSize(args, &s, &n)) | |
return NULL; | |
newstr = PyString_FromStringAndSize(NULL, n); | |
if (newstr == NULL) | |
return NULL; | |
s_new = PyString_AsString(newstr); | |
changed = 0; | |
if (0 < n) { | |
int c = Py_CHARMASK(*s++); | |
if (islower(c)) { | |
changed = 1; | |
*s_new = toupper(c); | |
} else | |
*s_new = c; | |
s_new++; | |
} | |
for (i = 1; i < n; i++) { | |
int c = Py_CHARMASK(*s++); | |
if (isupper(c)) { | |
changed = 1; | |
*s_new = tolower(c); | |
} else | |
*s_new = c; | |
s_new++; | |
} | |
if (!changed) { | |
Py_DECREF(newstr); | |
Py_INCREF(args); | |
return args; | |
} | |
return newstr; | |
} | |
PyDoc_STRVAR(expandtabs__doc__, | |
"expandtabs(string, [tabsize]) -> string\n" | |
"\n" | |
"Expand tabs in a string, i.e. replace them by one or more spaces,\n" | |
"depending on the current column and the given tab size (default 8).\n" | |
"The column number is reset to zero after each newline occurring in the\n" | |
"string. This doesn't understand other non-printing characters."); | |
static PyObject * | |
strop_expandtabs(PyObject *self, PyObject *args) | |
{ | |
/* Original by Fredrik Lundh */ | |
char* e; | |
char* p; | |
char* q; | |
Py_ssize_t i, j, old_j; | |
PyObject* out; | |
char* string; | |
Py_ssize_t stringlen; | |
int tabsize = 8; | |
WARN; | |
/* Get arguments */ | |
if (!PyArg_ParseTuple(args, "s#|i:expandtabs", &string, &stringlen, &tabsize)) | |
return NULL; | |
if (tabsize < 1) { | |
PyErr_SetString(PyExc_ValueError, | |
"tabsize must be at least 1"); | |
return NULL; | |
} | |
/* First pass: determine size of output string */ | |
i = j = old_j = 0; /* j: current column; i: total of previous lines */ | |
e = string + stringlen; | |
for (p = string; p < e; p++) { | |
if (*p == '\t') { | |
j += tabsize - (j%tabsize); | |
if (old_j > j) { | |
PyErr_SetString(PyExc_OverflowError, | |
"new string is too long"); | |
return NULL; | |
} | |
old_j = j; | |
} else { | |
j++; | |
if (*p == '\n') { | |
i += j; | |
j = 0; | |
} | |
} | |
} | |
if ((i + j) < 0) { | |
PyErr_SetString(PyExc_OverflowError, "new string is too long"); | |
return NULL; | |
} | |
/* Second pass: create output string and fill it */ | |
out = PyString_FromStringAndSize(NULL, i+j); | |
if (out == NULL) | |
return NULL; | |
i = 0; | |
q = PyString_AS_STRING(out); | |
for (p = string; p < e; p++) { | |
if (*p == '\t') { | |
j = tabsize - (i%tabsize); | |
i += j; | |
while (j-- > 0) | |
*q++ = ' '; | |
} else { | |
*q++ = *p; | |
i++; | |
if (*p == '\n') | |
i = 0; | |
} | |
} | |
return out; | |
} | |
PyDoc_STRVAR(count__doc__, | |
"count(s, sub[, start[, end]]) -> int\n" | |
"\n" | |
"Return the number of occurrences of substring sub in string\n" | |
"s[start:end]. Optional arguments start and end are\n" | |
"interpreted as in slice notation."); | |
static PyObject * | |
strop_count(PyObject *self, PyObject *args) | |
{ | |
char *s, *sub; | |
Py_ssize_t len, n; | |
Py_ssize_t i = 0, last = PY_SSIZE_T_MAX; | |
Py_ssize_t m, r; | |
WARN; | |
if (!PyArg_ParseTuple(args, "t#t#|nn:count", &s, &len, &sub, &n, &i, &last)) | |
return NULL; | |
if (last > len) | |
last = len; | |
if (last < 0) | |
last += len; | |
if (last < 0) | |
last = 0; | |
if (i < 0) | |
i += len; | |
if (i < 0) | |
i = 0; | |
m = last + 1 - n; | |
if (n == 0) | |
return PyInt_FromLong((long) (m-i)); | |
r = 0; | |
while (i < m) { | |
if (!memcmp(s+i, sub, n)) { | |
r++; | |
i += n; | |
} else { | |
i++; | |
} | |
} | |
return PyInt_FromLong((long) r); | |
} | |
PyDoc_STRVAR(swapcase__doc__, | |
"swapcase(s) -> string\n" | |
"\n" | |
"Return a copy of the string s with upper case characters\n" | |
"converted to lowercase and vice versa."); | |
static PyObject * | |
strop_swapcase(PyObject *self, PyObject *args) | |
{ | |
char *s, *s_new; | |
Py_ssize_t i, n; | |
PyObject *newstr; | |
int changed; | |
WARN; | |
if (PyString_AsStringAndSize(args, &s, &n)) | |
return NULL; | |
newstr = PyString_FromStringAndSize(NULL, n); | |
if (newstr == NULL) | |
return NULL; | |
s_new = PyString_AsString(newstr); | |
changed = 0; | |
for (i = 0; i < n; i++) { | |
int c = Py_CHARMASK(*s++); | |
if (islower(c)) { | |
changed = 1; | |
*s_new = toupper(c); | |
} | |
else if (isupper(c)) { | |
changed = 1; | |
*s_new = tolower(c); | |
} | |
else | |
*s_new = c; | |
s_new++; | |
} | |
if (!changed) { | |
Py_DECREF(newstr); | |
Py_INCREF(args); | |
return args; | |
} | |
return newstr; | |
} | |
PyDoc_STRVAR(atoi__doc__, | |
"atoi(s [,base]) -> int\n" | |
"\n" | |
"Return the integer represented by the string s in the given\n" | |
"base, which defaults to 10. The string s must consist of one\n" | |
"or more digits, possibly preceded by a sign. If base is 0, it\n" | |
"is chosen from the leading characters of s, 0 for octal, 0x or\n" | |
"0X for hexadecimal. If base is 16, a preceding 0x or 0X is\n" | |
"accepted."); | |
static PyObject * | |
strop_atoi(PyObject *self, PyObject *args) | |
{ | |
char *s, *end; | |
int base = 10; | |
long x; | |
char buffer[256]; /* For errors */ | |
WARN; | |
if (!PyArg_ParseTuple(args, "s|i:atoi", &s, &base)) | |
return NULL; | |
if ((base != 0 && base < 2) || base > 36) { | |
PyErr_SetString(PyExc_ValueError, "invalid base for atoi()"); | |
return NULL; | |
} | |
while (*s && isspace(Py_CHARMASK(*s))) | |
s++; | |
errno = 0; | |
if (base == 0 && s[0] == '0') | |
x = (long) PyOS_strtoul(s, &end, base); | |
else | |
x = PyOS_strtol(s, &end, base); | |
if (end == s || !isalnum(Py_CHARMASK(end[-1]))) | |
goto bad; | |
while (*end && isspace(Py_CHARMASK(*end))) | |
end++; | |
if (*end != '\0') { | |
bad: | |
PyOS_snprintf(buffer, sizeof(buffer), | |
"invalid literal for atoi(): %.200s", s); | |
PyErr_SetString(PyExc_ValueError, buffer); | |
return NULL; | |
} | |
else if (errno != 0) { | |
PyOS_snprintf(buffer, sizeof(buffer), | |
"atoi() literal too large: %.200s", s); | |
PyErr_SetString(PyExc_ValueError, buffer); | |
return NULL; | |
} | |
return PyInt_FromLong(x); | |
} | |
PyDoc_STRVAR(atol__doc__, | |
"atol(s [,base]) -> long\n" | |
"\n" | |
"Return the long integer represented by the string s in the\n" | |
"given base, which defaults to 10. The string s must consist\n" | |
"of one or more digits, possibly preceded by a sign. If base\n" | |
"is 0, it is chosen from the leading characters of s, 0 for\n" | |
"octal, 0x or 0X for hexadecimal. If base is 16, a preceding\n" | |
"0x or 0X is accepted. A trailing L or l is not accepted,\n" | |
"unless base is 0."); | |
static PyObject * | |
strop_atol(PyObject *self, PyObject *args) | |
{ | |
char *s, *end; | |
int base = 10; | |
PyObject *x; | |
char buffer[256]; /* For errors */ | |
WARN; | |
if (!PyArg_ParseTuple(args, "s|i:atol", &s, &base)) | |
return NULL; | |
if ((base != 0 && base < 2) || base > 36) { | |
PyErr_SetString(PyExc_ValueError, "invalid base for atol()"); | |
return NULL; | |
} | |
while (*s && isspace(Py_CHARMASK(*s))) | |
s++; | |
if (s[0] == '\0') { | |
PyErr_SetString(PyExc_ValueError, "empty string for atol()"); | |
return NULL; | |
} | |
x = PyLong_FromString(s, &end, base); | |
if (x == NULL) | |
return NULL; | |
if (base == 0 && (*end == 'l' || *end == 'L')) | |
end++; | |
while (*end && isspace(Py_CHARMASK(*end))) | |
end++; | |
if (*end != '\0') { | |
PyOS_snprintf(buffer, sizeof(buffer), | |
"invalid literal for atol(): %.200s", s); | |
PyErr_SetString(PyExc_ValueError, buffer); | |
Py_DECREF(x); | |
return NULL; | |
} | |
return x; | |
} | |
PyDoc_STRVAR(atof__doc__, | |
"atof(s) -> float\n" | |
"\n" | |
"Return the floating point number represented by the string s."); | |
static PyObject * | |
strop_atof(PyObject *self, PyObject *args) | |
{ | |
char *s, *end; | |
double x; | |
char buffer[256]; /* For errors */ | |
WARN; | |
if (!PyArg_ParseTuple(args, "s:atof", &s)) | |
return NULL; | |
while (*s && isspace(Py_CHARMASK(*s))) | |
s++; | |
if (s[0] == '\0') { | |
PyErr_SetString(PyExc_ValueError, "empty string for atof()"); | |
return NULL; | |
} | |
PyFPE_START_PROTECT("strop_atof", return 0) | |
x = PyOS_string_to_double(s, &end, PyExc_OverflowError); | |
PyFPE_END_PROTECT(x) | |
if (x == -1 && PyErr_Occurred()) | |
return NULL; | |
while (*end && isspace(Py_CHARMASK(*end))) | |
end++; | |
if (*end != '\0') { | |
PyOS_snprintf(buffer, sizeof(buffer), | |
"invalid literal for atof(): %.200s", s); | |
PyErr_SetString(PyExc_ValueError, buffer); | |
return NULL; | |
} | |
return PyFloat_FromDouble(x); | |
} | |
PyDoc_STRVAR(maketrans__doc__, | |
"maketrans(frm, to) -> string\n" | |
"\n" | |
"Return a translation table (a string of 256 bytes long)\n" | |
"suitable for use in string.translate. The strings frm and to\n" | |
"must be of the same length."); | |
static PyObject * | |
strop_maketrans(PyObject *self, PyObject *args) | |
{ | |
unsigned char *c, *from=NULL, *to=NULL; | |
Py_ssize_t i, fromlen=0, tolen=0; | |
PyObject *result; | |
if (!PyArg_ParseTuple(args, "t#t#:maketrans", &from, &fromlen, &to, &tolen)) | |
return NULL; | |
if (fromlen != tolen) { | |
PyErr_SetString(PyExc_ValueError, | |
"maketrans arguments must have same length"); | |
return NULL; | |
} | |
result = PyString_FromStringAndSize((char *)NULL, 256); | |
if (result == NULL) | |
return NULL; | |
c = (unsigned char *) PyString_AS_STRING((PyStringObject *)result); | |
for (i = 0; i < 256; i++) | |
c[i]=(unsigned char)i; | |
for (i = 0; i < fromlen; i++) | |
c[from[i]]=to[i]; | |
return result; | |
} | |
PyDoc_STRVAR(translate__doc__, | |
"translate(s,table [,deletechars]) -> string\n" | |
"\n" | |
"Return a copy of the string s, where all characters occurring\n" | |
"in the optional argument deletechars are removed, and the\n" | |
"remaining characters have been mapped through the given\n" | |
"translation table, which must be a string of length 256."); | |
static PyObject * | |
strop_translate(PyObject *self, PyObject *args) | |
{ | |
register char *input, *table, *output; | |
Py_ssize_t i; | |
int c, changed = 0; | |
PyObject *input_obj; | |
char *table1, *output_start, *del_table=NULL; | |
Py_ssize_t inlen, tablen, dellen = 0; | |
PyObject *result; | |
int trans_table[256]; | |
WARN; | |
if (!PyArg_ParseTuple(args, "St#|t#:translate", &input_obj, | |
&table1, &tablen, &del_table, &dellen)) | |
return NULL; | |
if (tablen != 256) { | |
PyErr_SetString(PyExc_ValueError, | |
"translation table must be 256 characters long"); | |
return NULL; | |
} | |
table = table1; | |
inlen = PyString_GET_SIZE(input_obj); | |
result = PyString_FromStringAndSize((char *)NULL, inlen); | |
if (result == NULL) | |
return NULL; | |
output_start = output = PyString_AsString(result); | |
input = PyString_AsString(input_obj); | |
if (dellen == 0) { | |
/* If no deletions are required, use faster code */ | |
for (i = inlen; --i >= 0; ) { | |
c = Py_CHARMASK(*input++); | |
if (Py_CHARMASK((*output++ = table[c])) != c) | |
changed = 1; | |
} | |
if (changed) | |
return result; | |
Py_DECREF(result); | |
Py_INCREF(input_obj); | |
return input_obj; | |
} | |
for (i = 0; i < 256; i++) | |
trans_table[i] = Py_CHARMASK(table[i]); | |
for (i = 0; i < dellen; i++) | |
trans_table[(int) Py_CHARMASK(del_table[i])] = -1; | |
for (i = inlen; --i >= 0; ) { | |
c = Py_CHARMASK(*input++); | |
if (trans_table[c] != -1) | |
if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c) | |
continue; | |
changed = 1; | |
} | |
if (!changed) { | |
Py_DECREF(result); | |
Py_INCREF(input_obj); | |
return input_obj; | |
} | |
/* Fix the size of the resulting string */ | |
if (inlen > 0) | |
_PyString_Resize(&result, output - output_start); | |
return result; | |
} | |
/* What follows is used for implementing replace(). Perry Stoll. */ | |
/* | |
mymemfind | |
strstr replacement for arbitrary blocks of memory. | |
Locates the first occurrence in the memory pointed to by MEM of the | |
contents of memory pointed to by PAT. Returns the index into MEM if | |
found, or -1 if not found. If len of PAT is greater than length of | |
MEM, the function returns -1. | |
*/ | |
static Py_ssize_t | |
mymemfind(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len) | |
{ | |
register Py_ssize_t ii; | |
/* pattern can not occur in the last pat_len-1 chars */ | |
len -= pat_len; | |
for (ii = 0; ii <= len; ii++) { | |
if (mem[ii] == pat[0] && | |
(pat_len == 1 || | |
memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) { | |
return ii; | |
} | |
} | |
return -1; | |
} | |
/* | |
mymemcnt | |
Return the number of distinct times PAT is found in MEM. | |
meaning mem=1111 and pat==11 returns 2. | |
mem=11111 and pat==11 also return 2. | |
*/ | |
static Py_ssize_t | |
mymemcnt(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len) | |
{ | |
register Py_ssize_t offset = 0; | |
Py_ssize_t nfound = 0; | |
while (len >= 0) { | |
offset = mymemfind(mem, len, pat, pat_len); | |
if (offset == -1) | |
break; | |
mem += offset + pat_len; | |
len -= offset + pat_len; | |
nfound++; | |
} | |
return nfound; | |
} | |
/* | |
mymemreplace | |
Return a string in which all occurrences of PAT in memory STR are | |
replaced with SUB. | |
If length of PAT is less than length of STR or there are no occurrences | |
of PAT in STR, then the original string is returned. Otherwise, a new | |
string is allocated here and returned. | |
on return, out_len is: | |
the length of output string, or | |
-1 if the input string is returned, or | |
unchanged if an error occurs (no memory). | |
return value is: | |
the new string allocated locally, or | |
NULL if an error occurred. | |
*/ | |
static char * | |
mymemreplace(const char *str, Py_ssize_t len, /* input string */ | |
const char *pat, Py_ssize_t pat_len, /* pattern string to find */ | |
const char *sub, Py_ssize_t sub_len, /* substitution string */ | |
Py_ssize_t count, /* number of replacements */ | |
Py_ssize_t *out_len) | |
{ | |
char *out_s; | |
char *new_s; | |
Py_ssize_t nfound, offset, new_len; | |
if (len == 0 || pat_len > len) | |
goto return_same; | |
/* find length of output string */ | |
nfound = mymemcnt(str, len, pat, pat_len); | |
if (count < 0) | |
count = PY_SSIZE_T_MAX; | |
else if (nfound > count) | |
nfound = count; | |
if (nfound == 0) | |
goto return_same; | |
new_len = len + nfound*(sub_len - pat_len); | |
if (new_len == 0) { | |
/* Have to allocate something for the caller to free(). */ | |
out_s = (char *)PyMem_MALLOC(1); | |
if (out_s == NULL) | |
return NULL; | |
out_s[0] = '\0'; | |
} | |
else { | |
assert(new_len > 0); | |
new_s = (char *)PyMem_MALLOC(new_len); | |
if (new_s == NULL) | |
return NULL; | |
out_s = new_s; | |
for (; count > 0 && len > 0; --count) { | |
/* find index of next instance of pattern */ | |
offset = mymemfind(str, len, pat, pat_len); | |
if (offset == -1) | |
break; | |
/* copy non matching part of input string */ | |
memcpy(new_s, str, offset); | |
str += offset + pat_len; | |
len -= offset + pat_len; | |
/* copy substitute into the output string */ | |
new_s += offset; | |
memcpy(new_s, sub, sub_len); | |
new_s += sub_len; | |
} | |
/* copy any remaining values into output string */ | |
if (len > 0) | |
memcpy(new_s, str, len); | |
} | |
*out_len = new_len; | |
return out_s; | |
return_same: | |
*out_len = -1; | |
return (char *)str; /* cast away const */ | |
} | |
PyDoc_STRVAR(replace__doc__, | |
"replace (str, old, new[, maxsplit]) -> string\n" | |
"\n" | |
"Return a copy of string str with all occurrences of substring\n" | |
"old replaced by new. If the optional argument maxsplit is\n" | |
"given, only the first maxsplit occurrences are replaced."); | |
static PyObject * | |
strop_replace(PyObject *self, PyObject *args) | |
{ | |
char *str, *pat,*sub,*new_s; | |
Py_ssize_t len,pat_len,sub_len,out_len; | |
Py_ssize_t count = -1; | |
PyObject *newstr; | |
WARN; | |
if (!PyArg_ParseTuple(args, "t#t#t#|n:replace", | |
&str, &len, &pat, &pat_len, &sub, &sub_len, | |
&count)) | |
return NULL; | |
if (pat_len <= 0) { | |
PyErr_SetString(PyExc_ValueError, "empty pattern string"); | |
return NULL; | |
} | |
/* CAUTION: strop treats a replace count of 0 as infinity, unlke | |
* current (2.1) string.py and string methods. Preserve this for | |
* ... well, hard to say for what <wink>. | |
*/ | |
if (count == 0) | |
count = -1; | |
new_s = mymemreplace(str,len,pat,pat_len,sub,sub_len,count,&out_len); | |
if (new_s == NULL) { | |
PyErr_NoMemory(); | |
return NULL; | |
} | |
if (out_len == -1) { | |
/* we're returning another reference to the input string */ | |
newstr = PyTuple_GetItem(args, 0); | |
Py_XINCREF(newstr); | |
} | |
else { | |
newstr = PyString_FromStringAndSize(new_s, out_len); | |
PyMem_FREE(new_s); | |
} | |
return newstr; | |
} | |
/* List of functions defined in the module */ | |
static PyMethodDef | |
strop_methods[] = { | |
{"atof", strop_atof, METH_VARARGS, atof__doc__}, | |
{"atoi", strop_atoi, METH_VARARGS, atoi__doc__}, | |
{"atol", strop_atol, METH_VARARGS, atol__doc__}, | |
{"capitalize", strop_capitalize, METH_O, capitalize__doc__}, | |
{"count", strop_count, METH_VARARGS, count__doc__}, | |
{"expandtabs", strop_expandtabs, METH_VARARGS, expandtabs__doc__}, | |
{"find", strop_find, METH_VARARGS, find__doc__}, | |
{"join", strop_joinfields, METH_VARARGS, joinfields__doc__}, | |
{"joinfields", strop_joinfields, METH_VARARGS, joinfields__doc__}, | |
{"lstrip", strop_lstrip, METH_O, lstrip__doc__}, | |
{"lower", strop_lower, METH_O, lower__doc__}, | |
{"maketrans", strop_maketrans, METH_VARARGS, maketrans__doc__}, | |
{"replace", strop_replace, METH_VARARGS, replace__doc__}, | |
{"rfind", strop_rfind, METH_VARARGS, rfind__doc__}, | |
{"rstrip", strop_rstrip, METH_O, rstrip__doc__}, | |
{"split", strop_splitfields, METH_VARARGS, splitfields__doc__}, | |
{"splitfields", strop_splitfields, METH_VARARGS, splitfields__doc__}, | |
{"strip", strop_strip, METH_O, strip__doc__}, | |
{"swapcase", strop_swapcase, METH_O, swapcase__doc__}, | |
{"translate", strop_translate, METH_VARARGS, translate__doc__}, | |
{"upper", strop_upper, METH_O, upper__doc__}, | |
{NULL, NULL} /* sentinel */ | |
}; | |
PyMODINIT_FUNC | |
initstrop(void) | |
{ | |
PyObject *m, *s; | |
char buf[256]; | |
int c, n; | |
m = Py_InitModule4("strop", strop_methods, strop_module__doc__, | |
(PyObject*)NULL, PYTHON_API_VERSION); | |
if (m == NULL) | |
return; | |
/* Create 'whitespace' object */ | |
n = 0; | |
for (c = 0; c < 256; c++) { | |
if (isspace(c)) | |
buf[n++] = c; | |
} | |
s = PyString_FromStringAndSize(buf, n); | |
if (s) | |
PyModule_AddObject(m, "whitespace", s); | |
/* Create 'lowercase' object */ | |
n = 0; | |
for (c = 0; c < 256; c++) { | |
if (islower(c)) | |
buf[n++] = c; | |
} | |
s = PyString_FromStringAndSize(buf, n); | |
if (s) | |
PyModule_AddObject(m, "lowercase", s); | |
/* Create 'uppercase' object */ | |
n = 0; | |
for (c = 0; c < 256; c++) { | |
if (isupper(c)) | |
buf[n++] = c; | |
} | |
s = PyString_FromStringAndSize(buf, n); | |
if (s) | |
PyModule_AddObject(m, "uppercase", s); | |
} |