| /*====================================================================* |
| - Copyright (C) 2001 Leptonica. All rights reserved. |
| - This software is distributed in the hope that it will be |
| - useful, but with NO WARRANTY OF ANY KIND. |
| - No author or distributor accepts responsibility to anyone for the |
| - consequences of using this software, or for whether it serves any |
| - particular purpose or works at all, unless he or she says so in |
| - writing. Everyone is granted permission to copy, modify and |
| - redistribute this source code, for commercial or non-commercial |
| - purposes, with the following restrictions: (1) the origin of this |
| - source code must not be misrepresented; (2) modified versions must |
| - be plainly marked as such; and (3) this notice may not be removed |
| - or altered from any source or modified source distribution. |
| *====================================================================*/ |
| |
| |
| /* |
| * sarray.c |
| * |
| * Create/Destroy/Copy |
| * SARRAY *sarrayCreate() |
| * SARRAY *sarrayCreateWordsFromString() |
| * SARRAY *sarrayCreateLinesFromString() |
| * void *sarrayDestroy() |
| * SARRAY *sarrayCopy() |
| * SARRAY *sarrayClone() |
| * |
| * Add/Remove string |
| * l_int32 sarrayAddString() |
| * l_int32 sarrayExtendArray() |
| * char *sarrayRemoveString() |
| * l_int32 sarrayClear() |
| * |
| * Accessors |
| * l_int32 sarrayGetCount() |
| * char **sarrayGetArray() |
| * char *sarrayGetString() |
| * l_int32 sarrayGetRefcount() |
| * l_int32 sarrayChangeRefcount() |
| * |
| * Conversion back to string |
| * char *sarrayToString() |
| * char *sarrayToStringRange() |
| * |
| * Concatenate 2 sarrays |
| * l_int32 sarrayConcatenate() |
| * l_int32 sarrayAppendRange() |
| * |
| * Convert word sarray to (formatted) line sarray |
| * SARRAY *sarrayConvertWordsToLines() |
| * |
| * Split string on separator list |
| * SARRAY *sarraySplitString() |
| * |
| * Filter sarray |
| * SARRAY *sarraySelectBySubstring() |
| * l_int32 sarrayParseRange() |
| * |
| * Sort |
| * SARRAY *sarraySort() |
| * l_int32 stringCompareLexical() |
| * |
| * Serialize for I/O |
| * SARRAY *sarrayRead() |
| * SARRAY *sarrayReadStream() |
| * l_int32 sarrayWrite() |
| * l_int32 sarrayWriteStream() |
| * l_int32 sarrayAppend() |
| * |
| * Directory filenames |
| * SARRAY *getSortedPathnamesInDirectory() |
| * SARRAY *getFilenamesInDirectory() |
| * |
| * |
| * Comments on usage: |
| * |
| * These functions are important for efficient manipulation |
| * of string data. They have been used in leptonica for |
| * generating and parsing text files, and for generating |
| * code for compilation. The user is responsible for |
| * correctly disposing of strings that have been extracted |
| * from sarrays. |
| * |
| * - When you want a string from an Sarray to inspect it, or |
| * plan to make a copy of it later, use sarrayGetString() |
| * with copyflag = 0. In this case, you must neither free |
| * the string nor put it directly in another array. |
| * We provide the copyflag constant L_NOCOPY, which is 0, |
| * for this purpose: |
| * str-not-owned = sarrayGetString(sa, index, L_NOCOPY); |
| * To extract a copy of a string, use: |
| * str-owned = sarrayGetString(sa, index, L_COPY); |
| * |
| * - When you want to insert a string that is in one |
| * array into another array (always leaving the first |
| * array intact), you have two options: |
| * (1) use copyflag = L_COPY to make an immediate copy, |
| * which you must then add to the second array |
| * by insertion; namely, |
| * str-owned = sarrayGetString(sa, index, L_COPY); |
| * sarrayAddString(sa, str-owned, L_INSERT); |
| * (2) use copyflag = L_NOCOPY to get another handle to |
| * the string, in which case you must add |
| * a copy of it to the second string array: |
| * str-not-owned = sarrayGetString(sa, index, L_NOCOPY); |
| * sarrayAddString(sa, str-not-owned, L_COPY). |
| * |
| * In all cases, when you use copyflag = L_COPY to extract |
| * a string from an array, you must either free it |
| * or insert it in an array that will be freed later. |
| */ |
| |
| #include <stdio.h> |
| #include <string.h> |
| #include <stdlib.h> |
| #ifndef COMPILER_MSVC |
| #include <dirent.h> /* unix only */ |
| #endif /* !COMPILER_MSVC */ |
| #include "allheaders.h" |
| |
| static const l_int32 INITIAL_PTR_ARRAYSIZE = 50; /* n'importe quoi */ |
| static const l_int32 L_BUF_SIZE = 512; |
| |
| |
| /*--------------------------------------------------------------------------* |
| * String array create/destroy/copy/extend * |
| *--------------------------------------------------------------------------*/ |
| /*! |
| * sarrayCreate() |
| * |
| * Input: size of string ptr array to be alloc'd |
| * (use 0 for default) |
| * Return: sarray, or null on error |
| */ |
| SARRAY * |
| sarrayCreate(l_int32 n) |
| { |
| SARRAY *sa; |
| |
| PROCNAME("sarrayCreate"); |
| |
| if (n <= 0) |
| n = INITIAL_PTR_ARRAYSIZE; |
| |
| if ((sa = (SARRAY *)CALLOC(1, sizeof(SARRAY))) == NULL) |
| return (SARRAY *)ERROR_PTR("sa not made", procName, NULL); |
| if ((sa->array = (char **)CALLOC(n, sizeof(char *))) == NULL) |
| return (SARRAY *)ERROR_PTR("ptr array not made", procName, NULL); |
| |
| sa->nalloc = n; |
| sa->n = 0; |
| sa->refcount = 1; |
| return sa; |
| } |
| |
| |
| /*! |
| * sarrayCreateWordsFromString() |
| * |
| * Input: string |
| * Return: sarray, or null on error |
| * |
| * Notes: |
| * (1) This finds the number of word substrings, creates an sarray |
| * of this size, and puts copies of each substring into the sarray. |
| */ |
| SARRAY * |
| sarrayCreateWordsFromString(const char *string) |
| { |
| char separators[] = " \n\t"; |
| l_int32 i, nsub, size, inword; |
| SARRAY *sa; |
| |
| PROCNAME("sarrayCreateWordsFromString"); |
| |
| if (!string) |
| return (SARRAY *)ERROR_PTR("textstr not defined", procName, NULL); |
| |
| /* Find the number of words */ |
| size = strlen(string); |
| nsub = 0; |
| inword = FALSE; |
| for (i = 0; i < size; i++) { |
| if (inword == FALSE && |
| (string[i] != ' ' && string[i] != '\t' && string[i] != '\n')) { |
| inword = TRUE; |
| nsub++; |
| } |
| else if (inword == TRUE && |
| (string[i] == ' ' || string[i] == '\t' || string[i] == '\n')) { |
| inword = FALSE; |
| } |
| } |
| |
| if ((sa = sarrayCreate(nsub)) == NULL) |
| return (SARRAY *)ERROR_PTR("sa not made", procName, NULL); |
| sarraySplitString(sa, string, separators); |
| |
| return sa; |
| } |
| |
| |
| /*! |
| * sarrayCreateLinesFromString() |
| * |
| * Input: string |
| * blankflag (0 to exclude blank lines; 1 to include) |
| * Return: sarray, or null on error |
| * |
| * Notes: |
| * (1) This finds the number of line substrings, creates an sarray of |
| * this size, and puts copies of each substring into the sarray. |
| */ |
| SARRAY * |
| sarrayCreateLinesFromString(char *string, |
| l_int32 blankflag) |
| { |
| l_int32 i, nsub, size, startptr; |
| char *cstring, *substring; |
| SARRAY *sa; |
| |
| PROCNAME("sarrayCreateLinesFromString"); |
| |
| if (!string) |
| return (SARRAY *)ERROR_PTR("textstr not defined", procName, NULL); |
| |
| /* find the number of lines */ |
| size = strlen(string); |
| nsub = 0; |
| for (i = 0; i < size; i++) { |
| if (string[i] == '\n') |
| nsub++; |
| } |
| |
| if ((sa = sarrayCreate(nsub)) == NULL) |
| return (SARRAY *)ERROR_PTR("sa not made", procName, NULL); |
| |
| if (blankflag) { /* keep blank lines as null strings */ |
| /* Make a copy for munging */ |
| if ((cstring = stringNew(string)) == NULL) |
| return (SARRAY *)ERROR_PTR("cstring not made", procName, NULL); |
| /* We'll insert nulls like strtok */ |
| startptr = 0; |
| for (i = 0; i < size; i++) { |
| if (cstring[i] == '\n') { |
| cstring[i] = '\0'; |
| if ((substring = stringNew(cstring + startptr)) == NULL) |
| return (SARRAY *)ERROR_PTR("substring not made", |
| procName, NULL); |
| sarrayAddString(sa, substring, L_INSERT); |
| /* fprintf(stderr, "substring = %s\n", substring); */ |
| startptr = i + 1; |
| } |
| } |
| if (startptr < size) { /* no newline at end of last line */ |
| if ((substring = stringNew(cstring + startptr)) == NULL) |
| return (SARRAY *)ERROR_PTR("substring not made", |
| procName, NULL); |
| sarrayAddString(sa, substring, L_INSERT); |
| /* fprintf(stderr, "substring = %s\n", substring); */ |
| } |
| FREE(cstring); |
| } |
| else { /* remove blank lines; use strtok */ |
| sarraySplitString(sa, string, "\n"); |
| } |
| |
| return sa; |
| } |
| |
| |
| /*! |
| * sarrayDestroy() |
| * |
| * Input: &sarray <to be nulled> |
| * Return: void |
| * |
| * Notes: |
| * (1) Decrements the ref count and, if 0, destroys the sarray. |
| * (2) Always nulls the input ptr. |
| */ |
| void |
| sarrayDestroy(SARRAY **psa) |
| { |
| l_int32 i; |
| SARRAY *sa; |
| |
| PROCNAME("sarrayDestroy"); |
| |
| if (psa == NULL) { |
| L_WARNING("ptr address is NULL!", procName); |
| return; |
| } |
| if ((sa = *psa) == NULL) |
| return; |
| |
| sarrayChangeRefcount(sa, -1); |
| if (sarrayGetRefcount(sa) <= 0) { |
| if (sa->array) { |
| for (i = 0; i < sa->n; i++) |
| FREE(sa->array[i]); |
| FREE(sa->array); |
| } |
| FREE(sa); |
| } |
| |
| *psa = NULL; |
| return; |
| } |
| |
| |
| /*! |
| * sarrayCopy() |
| * |
| * Input: sarray |
| * Return: copy of sarray, or null on error |
| */ |
| SARRAY * |
| sarrayCopy(SARRAY *sa) |
| { |
| l_int32 i; |
| SARRAY *csa; |
| |
| PROCNAME("sarrayCopy"); |
| |
| if (!sa) |
| return (SARRAY *)ERROR_PTR("sa not defined", procName, NULL); |
| |
| if ((csa = sarrayCreate(sa->nalloc)) == NULL) |
| return (SARRAY *)ERROR_PTR("csa not made", procName, NULL); |
| |
| for (i = 0; i < sa->n; i++) |
| sarrayAddString(csa, sa->array[i], L_COPY); |
| |
| return csa; |
| } |
| |
| |
| /*! |
| * sarrayClone() |
| * |
| * Input: sarray |
| * Return: ptr to same sarray, or null on error |
| */ |
| SARRAY * |
| sarrayClone(SARRAY *sa) |
| { |
| PROCNAME("sarrayClone"); |
| |
| if (!sa) |
| return (SARRAY *)ERROR_PTR("sa not defined", procName, NULL); |
| sarrayChangeRefcount(sa, 1); |
| return sa; |
| } |
| |
| |
| /*! |
| * sarrayAddString() |
| * |
| * Input: sarray |
| * string (string to be added) |
| * copyflag (L_INSERT, L_COPY) |
| * Return: 0 if OK, 1 on error |
| * |
| * Notes: |
| * (1) Legacy usage decrees that we always use 0 to insert a string |
| * directly and 1 to insert a copy of the string. The |
| * enums for L_INSERT and L_COPY agree with this convention, |
| * and will not change in the future. |
| * (2) See usage comments at the top of this file. |
| */ |
| l_int32 |
| sarrayAddString(SARRAY *sa, |
| char *string, |
| l_int32 copyflag) |
| { |
| l_int32 n; |
| |
| PROCNAME("sarrayAddString"); |
| |
| if (!sa) |
| return ERROR_INT("sa not defined", procName, 1); |
| if (!string) |
| return ERROR_INT("string not defined", procName, 1); |
| if (copyflag != L_INSERT && copyflag != L_COPY) |
| return ERROR_INT("invalid copyflag", procName, 1); |
| |
| n = sarrayGetCount(sa); |
| if (n >= sa->nalloc) |
| sarrayExtendArray(sa); |
| |
| if (copyflag == L_INSERT) |
| sa->array[n] = string; |
| else /* L_COPY */ |
| sa->array[n] = stringNew(string); |
| sa->n++; |
| |
| return 0; |
| } |
| |
| |
| /*! |
| * sarrayExtendArray() |
| * |
| * Input: sarray |
| * Return: 0 if OK, 1 on error |
| */ |
| l_int32 |
| sarrayExtendArray(SARRAY *sa) |
| { |
| PROCNAME("sarrayExtendArray"); |
| |
| if (!sa) |
| return ERROR_INT("sa not defined", procName, 1); |
| |
| if ((sa->array = (char **)reallocNew((void **)&sa->array, |
| sizeof(char *) * sa->nalloc, |
| 2 * sizeof(char *) * sa->nalloc)) == NULL) |
| return ERROR_INT("new ptr array not returned", procName, 1); |
| |
| sa->nalloc *= 2; |
| return 0; |
| } |
| |
| |
| /*! |
| * sarrayRemoveString() |
| * |
| * Input: sarray |
| * index (of string within sarray) |
| * Return: removed string, or null on error |
| */ |
| char * |
| sarrayRemoveString(SARRAY *sa, |
| l_int32 index) |
| { |
| char *string; |
| char **array; |
| l_int32 i, n, nalloc; |
| |
| PROCNAME("sarrayRemoveString"); |
| |
| if (!sa) |
| return (char *)ERROR_PTR("sa not defined", procName, NULL); |
| |
| if ((array = sarrayGetArray(sa, &nalloc, &n)) == NULL) |
| return (char *)ERROR_PTR("array not returned", procName, NULL); |
| |
| if (index < 0 || index >= n) |
| return (char *)ERROR_PTR("array index out of bounds", procName, NULL); |
| |
| string = array[index]; |
| |
| /* If removed string is not at end of array, shift |
| * to fill in, maintaining original ordering. |
| * Note: if we didn't care about the order, we could |
| * put the last string array[n - 1] directly into the hole. */ |
| for (i = index; i < n - 1; i++) |
| array[i] = array[i + 1]; |
| |
| sa->n--; |
| return string; |
| } |
| |
| |
| /*! |
| * sarrayClear() |
| * |
| * Input: sarray |
| * Return: 0 if OK; 1 on error |
| */ |
| l_int32 |
| sarrayClear(SARRAY *sa) |
| { |
| l_int32 i; |
| |
| PROCNAME("sarrayClear"); |
| |
| if (!sa) |
| return ERROR_INT("sa not defined", procName, 1); |
| for (i = 0; i < sa->n; i++) { /* free strings and null ptrs */ |
| FREE(sa->array[i]); |
| sa->array[i] = NULL; |
| } |
| sa->n = 0; |
| return 0; |
| } |
| |
| |
| /*----------------------------------------------------------------------* |
| * Accessors * |
| *----------------------------------------------------------------------*/ |
| /*! |
| * sarrayGetCount() |
| * |
| * Input: sarray |
| * Return: count, or 0 if no strings or on error |
| */ |
| l_int32 |
| sarrayGetCount(SARRAY *sa) |
| { |
| PROCNAME("sarrayGetCount"); |
| |
| if (!sa) |
| return ERROR_INT("sa not defined", procName, 0); |
| return sa->n; |
| } |
| |
| |
| /*! |
| * sarrayGetArray() |
| * |
| * Input: sarray |
| * &nalloc (<optional return> number allocated string ptrs) |
| * &n (<optional return> number allocated strings) |
| * Return: ptr to string array, or null on error |
| * |
| * Notes: |
| * (1) Caution: the returned array is not a copy, so caller |
| * must not destroy it! |
| */ |
| char ** |
| sarrayGetArray(SARRAY *sa, |
| l_int32 *pnalloc, |
| l_int32 *pn) |
| { |
| char **array; |
| |
| PROCNAME("sarrayGetArray"); |
| |
| if (!sa) |
| return (char **)ERROR_PTR("sa not defined", procName, NULL); |
| |
| array = sa->array; |
| if (pnalloc) *pnalloc = sa->nalloc; |
| if (pn) *pn = sa->n; |
| |
| return array; |
| } |
| |
| |
| /*! |
| * sarrayGetString() |
| * |
| * Input: sarray |
| * index (to the index-th string) |
| * copyflag (L_NOCOPY or L_COPY) |
| * Return: string, or null on error |
| * |
| * Notes: |
| * (1) Legacy usage decrees that we always use 0 to get the |
| * pointer to the string itself, and 1 to get a copy of |
| * the string. |
| * (2) See usage comments at the top of this file. |
| * (3) To get a pointer to the string itself, use for copyflag: |
| * L_NOCOPY or 0 or FALSE |
| * To get a copy of the string, use for copyflag: |
| * L_COPY or 1 or TRUE |
| * The const values of L_NOCOPY and L_COPY are guaranteed not |
| * to change. |
| */ |
| char * |
| sarrayGetString(SARRAY *sa, |
| l_int32 index, |
| l_int32 copyflag) |
| { |
| PROCNAME("sarrayGetString"); |
| |
| if (!sa) |
| return (char *)ERROR_PTR("sa not defined", procName, NULL); |
| if (index < 0 || index >= sa->n) |
| return (char *)ERROR_PTR("index not valid", procName, NULL); |
| if (copyflag != L_NOCOPY && copyflag != L_COPY) |
| return (char *)ERROR_PTR("invalid copyflag", procName, NULL); |
| |
| if (copyflag == L_NOCOPY) |
| return sa->array[index]; |
| else /* L_COPY */ |
| return stringNew(sa->array[index]); |
| } |
| |
| |
| /*! |
| * sarrayGetRefCount() |
| * |
| * Input: sarray |
| * Return: refcount, or UNDEF on error |
| */ |
| l_int32 |
| sarrayGetRefcount(SARRAY *sa) |
| { |
| PROCNAME("sarrayGetRefcount"); |
| |
| if (!sa) |
| return ERROR_INT("sa not defined", procName, UNDEF); |
| return sa->refcount; |
| } |
| |
| |
| /*! |
| * sarrayChangeRefCount() |
| * |
| * Input: sarray |
| * delta (change to be applied) |
| * Return: 0 if OK, 1 on error |
| */ |
| l_int32 |
| sarrayChangeRefcount(SARRAY *sa, |
| l_int32 delta) |
| { |
| PROCNAME("sarrayChangeRefcount"); |
| |
| if (!sa) |
| return ERROR_INT("sa not defined", procName, UNDEF); |
| sa->refcount += delta; |
| return 0; |
| } |
| |
| |
| /*----------------------------------------------------------------------* |
| * Conversion to string * |
| *----------------------------------------------------------------------*/ |
| /*! |
| * sarrayToString() |
| * |
| * Input: sarray |
| * addnlflag (flag: 0 adds nothing to each substring |
| * 1 adds '\n' to each substring |
| * 2 adds ' ' to each substring) |
| * Return: dest string, or null on error |
| * |
| * Notes: |
| * (1) Concatenates all the strings in the sarray, preserving |
| * all white space. |
| * (2) If addnlflag != 0, adds either a '\n' or a ' ' after |
| * each substring. |
| * (3) This function was NOT implemented as: |
| * for (i = 0; i < n; i++) |
| * strcat(dest, sarrayGetString(sa, i, L_NOCOPY)); |
| * Do you see why? |
| */ |
| char * |
| sarrayToString(SARRAY *sa, |
| l_int32 addnlflag) |
| { |
| PROCNAME("sarrayToString"); |
| |
| if (!sa) |
| return (char *)ERROR_PTR("sa not defined", procName, NULL); |
| |
| return sarrayToStringRange(sa, 0, 0, addnlflag); |
| } |
| |
| |
| /*! |
| * sarrayToStringRange() |
| * |
| * Input: sarray |
| * first (index of first string to use; starts with 0) |
| * nstrings (number of strings to append into the result; use |
| * 0 to append to the end of the sarray) |
| * addnlflag (flag: 0 adds nothing to each substring |
| * 1 adds '\n' to each substring |
| * 2 adds ' ' to each substring) |
| * Return: dest string, or null on error |
| * |
| * Notes: |
| * (1) Concatenates the specified strings inthe sarray, preserving |
| * all white space. |
| * (2) If addnlflag != 0, adds either a '\n' or a ' ' after |
| * each substring. |
| * (3) If the sarray is empty, this returns a string with just |
| * the character corresponding to @addnlflag. |
| */ |
| char * |
| sarrayToStringRange(SARRAY *sa, |
| l_int32 first, |
| l_int32 nstrings, |
| l_int32 addnlflag) |
| { |
| char *dest, *src; |
| l_int32 n, i, last, size, index, len; |
| |
| PROCNAME("sarrayToStringRange"); |
| |
| if (!sa) |
| return (char *)ERROR_PTR("sa not defined", procName, NULL); |
| if (addnlflag != 0 && addnlflag != 1 && addnlflag != 2) |
| return (char *)ERROR_PTR("invalid addnlflag", procName, NULL); |
| |
| n = sarrayGetCount(sa); |
| |
| /* Empty sa; return char corresponding to addnlflag only */ |
| if (n == 0) { |
| if (first == 0) { |
| if (addnlflag == 0) |
| return stringNew(""); |
| if (addnlflag == 1) |
| return stringNew("\n"); |
| else /* addnlflag == 2) */ |
| return stringNew(" "); |
| } |
| else |
| return (char *)ERROR_PTR("first not valid", procName, NULL); |
| } |
| |
| if (first < 0 || first >= n) |
| return (char *)ERROR_PTR("first not valid", procName, NULL); |
| if (nstrings == 0 || (nstrings > n - first)) |
| nstrings = n - first; /* no overflow */ |
| last = first + nstrings - 1; |
| |
| size = 0; |
| for (i = first; i <= last; i++) |
| size += strlen(sarrayGetString(sa, i, L_NOCOPY)) + 2; |
| |
| if ((dest = (char *)CALLOC(size + 1, sizeof(char))) == NULL) |
| return (char *)ERROR_PTR("dest not made", procName, NULL); |
| |
| index = 0; |
| for (i = first; i <= last; i++) { |
| src = sa->array[i]; |
| len = strlen(src); |
| memcpy(dest + index, src, len); |
| index += len; |
| if (addnlflag == 1) { |
| dest[index] = '\n'; |
| index++; |
| } |
| else if (addnlflag == 2) { |
| dest[index] = ' '; |
| index++; |
| } |
| } |
| |
| return dest; |
| } |
| |
| |
| /*----------------------------------------------------------------------* |
| * Concatenate 2 sarrays * |
| *----------------------------------------------------------------------*/ |
| /*! |
| * sarrayConcatenate() |
| * |
| * Input: sa1 (to be added to) |
| * sa2 (append to sa1) |
| * Return: 0 if OK, 1 on error |
| * |
| * Notes: |
| * (1) Copies of the strings in sarray2 are added to sarray1. |
| */ |
| l_int32 |
| sarrayConcatenate(SARRAY *sa1, |
| SARRAY *sa2) |
| { |
| char *str; |
| l_int32 n, i; |
| |
| PROCNAME("sarrayConcatenate"); |
| |
| if (!sa1) |
| return ERROR_INT("sa1 not defined", procName, 1); |
| if (!sa2) |
| return ERROR_INT("sa2 not defined", procName, 1); |
| |
| n = sarrayGetCount(sa2); |
| for (i = 0; i < n; i++) { |
| str = sarrayGetString(sa2, i, L_NOCOPY); |
| sarrayAddString(sa1, str, L_COPY); |
| } |
| |
| return 0; |
| } |
| |
| |
| /*! |
| * sarrayAppendRange() |
| * |
| * Input: sa1 (to be added to) |
| * sa2 (append specified range of strings in sa2 to sa1) |
| * start (index of first string of sa2 to append) |
| * end (index of last string of sa2 to append) |
| * Return: 0 if OK, 1 on error |
| * |
| * Notes: |
| * (1) Copies of the strings in sarray2 are added to sarray1. |
| * (2) The [start ... end] range is truncated if necessary. |
| */ |
| l_int32 |
| sarrayAppendRange(SARRAY *sa1, |
| SARRAY *sa2, |
| l_int32 start, |
| l_int32 end) |
| { |
| char *str; |
| l_int32 n, i; |
| |
| PROCNAME("sarrayAppendRange"); |
| |
| if (!sa1) |
| return ERROR_INT("sa1 not defined", procName, 1); |
| if (!sa2) |
| return ERROR_INT("sa2 not defined", procName, 1); |
| if (start < 0) |
| start = 0; |
| n = sarrayGetCount(sa2); |
| if (end >= n) |
| end = n - 1; |
| if (start > end) |
| return ERROR_INT("start > end", procName, 1); |
| |
| for (i = start; i <= end; i++) { |
| str = sarrayGetString(sa2, i, L_NOCOPY); |
| sarrayAddString(sa1, str, L_COPY); |
| } |
| |
| return 0; |
| } |
| |
| |
| /*----------------------------------------------------------------------* |
| * Convert word sarray to line sarray * |
| *----------------------------------------------------------------------*/ |
| /*! |
| * sarrayConvertWordsToLines() |
| * |
| * Input: sa (sa of individual words) |
| * linesize (max num of chars in each line) |
| * Return: saout (sa of formatted lines), or null on error |
| * |
| * This is useful for re-typesetting text to a specific maximum |
| * line length. The individual words in the input sarray |
| * are concatenated into textlines. An input word string of zero |
| * length is taken to be a paragraph separator. Each time |
| * such a string is found, the current line is ended and |
| * a new line is also produced that contains just the |
| * string of zero length (""). When the output sarray |
| * of lines is eventually converted to a string with newlines |
| * (typically) appended to each line string, the empty |
| * strings are just converted to newlines, producing the visible |
| * paragraph separation. |
| * |
| * What happens when a word is larger than linesize? |
| * We write it out as a single line anyway! Words preceding |
| * or following this long word are placed on lines preceding |
| * or following the line with the long word. Why this choice? |
| * Long "words" found in text documents are typically URLs, and |
| * it's often desirable not to put newlines in the middle of a URL. |
| * The text display program (e.g., text editor) will typically |
| * wrap the long "word" to fit in the window. |
| */ |
| SARRAY * |
| sarrayConvertWordsToLines(SARRAY *sa, |
| l_int32 linesize) |
| { |
| char *wd, *strl; |
| char emptystring[] = ""; |
| l_int32 n, i, len, totlen; |
| SARRAY *sal, *saout; |
| |
| PROCNAME("sarrayConvertWordsToLines"); |
| |
| if (!sa) |
| return (SARRAY *)ERROR_PTR("sa not defined", procName, NULL); |
| |
| if ((saout = sarrayCreate(0)) == NULL) |
| return (SARRAY *)ERROR_PTR("saout not defined", procName, NULL); |
| |
| n = sarrayGetCount(sa); |
| totlen = 0; |
| sal = NULL; |
| for (i = 0; i < n; i++) { |
| if (!sal) { |
| if ((sal = sarrayCreate(0)) == NULL) |
| return (SARRAY *)ERROR_PTR("sal not made", procName, NULL); |
| } |
| wd = sarrayGetString(sa, i, L_NOCOPY); |
| len = strlen(wd); |
| if (len == 0) { /* end of paragraph: end line & insert blank line */ |
| if (totlen > 0) { |
| strl = sarrayToString(sal, 2); |
| sarrayAddString(saout, strl, L_INSERT); |
| } |
| sarrayAddString(saout, emptystring, L_COPY); |
| sarrayDestroy(&sal); |
| totlen = 0; |
| } |
| else if (totlen == 0 && len + 1 > linesize) { /* long word! */ |
| sarrayAddString(saout, wd, L_COPY); /* copy to one line */ |
| } |
| else if (totlen + len + 1 > linesize) { /* end line & start new one */ |
| strl = sarrayToString(sal, 2); |
| sarrayAddString(saout, strl, L_INSERT); |
| sarrayDestroy(&sal); |
| if ((sal = sarrayCreate(0)) == NULL) |
| return (SARRAY *)ERROR_PTR("sal not made", procName, NULL); |
| sarrayAddString(sal, wd, L_COPY); |
| totlen = len + 1; |
| } |
| else { /* add to current line */ |
| sarrayAddString(sal, wd, L_COPY); |
| totlen += len + 1; |
| } |
| } |
| if (totlen > 0) { /* didn't end with blank line; output last line */ |
| strl = sarrayToString(sal, 2); |
| sarrayAddString(saout, strl, L_INSERT); |
| sarrayDestroy(&sal); |
| } |
| |
| return saout; |
| |
| } |
| |
| |
| /*----------------------------------------------------------------------* |
| * Split string on separator list * |
| *----------------------------------------------------------------------*/ |
| /* |
| * sarraySplitString() |
| * |
| * Input: sa (to append to; typically empty initially) |
| * str (string to split; not changed) |
| * separators (characters that split input string) |
| * Return: 0 if OK, 1 on error. |
| * |
| * Notes: |
| * (1) This uses strtokSafe(). See the notes there in utils.c. |
| */ |
| l_int32 |
| sarraySplitString(SARRAY *sa, |
| const char *str, |
| const char *separators) |
| { |
| char *cstr, *substr, *saveptr; |
| |
| PROCNAME("sarraySplitString"); |
| |
| if (!sa) |
| return ERROR_INT("sa not defined", procName, 1); |
| if (!str) |
| return ERROR_INT("str not defined", procName, 1); |
| if (!separators) |
| return ERROR_INT("separators not defined", procName, 1); |
| |
| cstr = stringNew(str); /* preserves const-ness of input str */ |
| substr = strtokSafe(cstr, separators, &saveptr); |
| if (substr) |
| sarrayAddString(sa, substr, L_INSERT); |
| while ((substr = strtokSafe(NULL, separators, &saveptr))) |
| sarrayAddString(sa, substr, L_INSERT); |
| FREE(cstr); |
| |
| return 0; |
| } |
| |
| |
| /*----------------------------------------------------------------------* |
| * Filter sarray * |
| *----------------------------------------------------------------------*/ |
| /*! |
| * sarraySelectBySubstring() |
| * |
| * Input: sain (input sarray) |
| * substr (<optional> substring for matching; can be NULL) |
| * Return: saout (output sarray, filtered with substring) or null on error |
| * |
| * Notes: |
| * (1) This selects all strings in sain that have substr as a substring. |
| * Note that we can't use strncmp() because we're looking for |
| * a match to the substring anywhere within each filename. |
| * (2) If substr == NULL, returns a copy of the sarray. |
| */ |
| SARRAY * |
| sarraySelectBySubstring(SARRAY *sain, |
| const char *substr) |
| { |
| char *str; |
| l_int32 n, i, offset, found; |
| SARRAY *saout; |
| |
| PROCNAME("sarraySelectBySubstring"); |
| |
| if (!sain) |
| return (SARRAY *)ERROR_PTR("sain not defined", procName, NULL); |
| |
| n = sarrayGetCount(sain); |
| if (!substr || n == 0) |
| return sarrayCopy(sain); |
| |
| saout = sarrayCreate(n); |
| for (i = 0; i < n; i++) { |
| str = sarrayGetString(sain, i, L_NOCOPY); |
| arrayFindSequence((l_uint8 *)str, strlen(str), (l_uint8 *)substr, |
| strlen(substr), &offset, &found); |
| if (found) |
| sarrayAddString(saout, str, L_COPY); |
| } |
| |
| return saout; |
| } |
| |
| |
| /*! |
| * sarrayParseRange() |
| * |
| * Input: sa (input sarray) |
| * start (index to start range search) |
| * &actualstart (<return> index of actual start; may be > 'start') |
| * &end (<return> index of end) |
| * &newstart (<return> index of start of next range) |
| * substr (substring for matching at beginning of string) |
| * loc (byte offset within the string for the pattern; use |
| * -1 if the location does not matter); |
| * Return: 0 if valid range found; 1 otherwise |
| * |
| * Notes: |
| * (1) This finds the range of the next set of strings in SA, |
| * beginning the search at 'start', that does NOT have |
| * the substring 'substr' either at the indicated location |
| * in the string or anywhere in the string. The input |
| * variable 'loc' is the specified offset within the string; |
| * use -1 to indicate 'anywhere in the string'. |
| * (2) Always check the return value to verify that a valid range |
| * was found. |
| * (3) If a valid range is not found, the values of actstart, |
| * end and newstart are all set to the size of sa. |
| * (4) If this is the last valid range, newstart returns the value n. |
| * In use, this should be tested before calling the function. |
| * (5) Usage example. To find all the valid ranges in a file |
| * where the invalid lines begin with two dashes, copy each |
| * line in the file to a string in an sarray, and do: |
| * start = 0; |
| * while (!sarrayParseRange(sa, start, &actstart, &end, &start, |
| * "--", 0)) |
| * fprintf(stderr, "start = %d, end = %d\n", actstart, end); |
| */ |
| l_int32 |
| sarrayParseRange(SARRAY *sa, |
| l_int32 start, |
| l_int32 *pactualstart, |
| l_int32 *pend, |
| l_int32 *pnewstart, |
| const char *substr, |
| l_int32 loc) |
| { |
| char *str; |
| l_int32 n, i, offset, found; |
| |
| PROCNAME("sarrayParseRange"); |
| |
| if (!sa) |
| return ERROR_INT("sa not defined", procName, 1); |
| if (!pactualstart || !pend || !pnewstart) |
| return ERROR_INT("not all range addresses defined", procName, 1); |
| n = sarrayGetCount(sa); |
| *pactualstart = *pend = *pnewstart = n; |
| if (!substr) |
| return ERROR_INT("substr not defined", procName, 1); |
| |
| /* Look for the first string without the marker */ |
| if (start < 0 || start >= n) |
| return 1; |
| for (i = start; i < n; i++) { |
| str = sarrayGetString(sa, i, L_NOCOPY); |
| arrayFindSequence((l_uint8 *)str, strlen(str), (l_uint8 *)substr, |
| strlen(substr), &offset, &found); |
| if (loc < 0) { |
| if (!found) break; |
| } else { |
| if (!found || offset != loc) break; |
| } |
| } |
| start = i; |
| if (i == n) /* couldn't get started */ |
| return 1; |
| |
| /* Look for the last string without the marker */ |
| *pactualstart = start; |
| for (i = start + 1; i < n; i++) { |
| str = sarrayGetString(sa, i, L_NOCOPY); |
| arrayFindSequence((l_uint8 *)str, strlen(str), (l_uint8 *)substr, |
| strlen(substr), &offset, &found); |
| if (loc < 0) { |
| if (found) break; |
| } else { |
| if (found && offset == loc) break; |
| } |
| } |
| *pend = i - 1; |
| start = i; |
| if (i == n) /* no further range */ |
| return 0; |
| |
| /* Look for the first string after *pend without the marker. |
| * This will start the next run of strings, if it exists. */ |
| for (i = start; i < n; i++) { |
| str = sarrayGetString(sa, i, L_NOCOPY); |
| arrayFindSequence((l_uint8 *)str, strlen(str), (l_uint8 *)substr, |
| strlen(substr), &offset, &found); |
| if (loc < 0) { |
| if (!found) break; |
| } else { |
| if (!found || offset != loc) break; |
| } |
| } |
| if (i < n) |
| *pnewstart = i; |
| |
| return 0; |
| } |
| |
| |
| /*----------------------------------------------------------------------* |
| * Sort * |
| *----------------------------------------------------------------------*/ |
| /*! |
| * sarraySort() |
| * |
| * Input: saout (output sarray; can be NULL or equal to sain) |
| * sain (input sarray) |
| * sortorder (L_SORT_INCREASING or L_SORT_DECREASING) |
| * Return: saout (output sarray, sorted by ascii value), or null on error |
| * |
| * Notes: |
| * (1) Set saout = sain for in-place; otherwise, set naout = NULL. |
| * (2) Shell sort, modified from K&R, 2nd edition, p.62. |
| * Slow but simple O(n logn) sort. |
| */ |
| SARRAY * |
| sarraySort(SARRAY *saout, |
| SARRAY *sain, |
| l_int32 sortorder) |
| { |
| char **array; |
| char *tmp; |
| l_int32 n, i, j, gap; |
| |
| PROCNAME("sarraySort"); |
| |
| if (!sain) |
| return (SARRAY *)ERROR_PTR("sain not defined", procName, NULL); |
| |
| /* Make saout if necessary; otherwise do in-place */ |
| if (!saout) |
| saout = sarrayCopy(sain); |
| else if (sain != saout) |
| return (SARRAY *)ERROR_PTR("invalid: not in-place", procName, NULL); |
| array = saout->array; /* operate directly on the array */ |
| n = sarrayGetCount(saout); |
| |
| /* Shell sort */ |
| for (gap = n/2; gap > 0; gap = gap / 2) { |
| for (i = gap; i < n; i++) { |
| for (j = i - gap; j >= 0; j -= gap) { |
| if ((sortorder == L_SORT_INCREASING && |
| stringCompareLexical(array[j], array[j + gap])) || |
| (sortorder == L_SORT_DECREASING && |
| stringCompareLexical(array[j + gap], array[j]))) |
| { |
| tmp = array[j]; |
| array[j] = array[j + gap]; |
| array[j + gap] = tmp; |
| } |
| } |
| } |
| } |
| |
| return saout; |
| } |
| |
| |
| /*! |
| * stringCompareLexical() |
| * |
| * Input: str1 |
| * str2 |
| * Return: 1 if str1 > str2 (lexically); 0 otherwise |
| * |
| * Notes: |
| * (1) If the lexical values are identical, return a 0, to |
| * indicate that no swapping is required to sort the strings. |
| */ |
| l_int32 |
| stringCompareLexical(const char *str1, |
| const char *str2) |
| { |
| l_int32 i, len1, len2, len; |
| |
| PROCNAME("sarrayCompareLexical"); |
| |
| if (!str1) |
| return ERROR_INT("str1 not defined", procName, 1); |
| if (!str2) |
| return ERROR_INT("str2 not defined", procName, 1); |
| |
| len1 = strlen(str1); |
| len2 = strlen(str2); |
| len = L_MIN(len1, len2); |
| |
| for (i = 0; i < len; i++) { |
| if (str1[i] == str2[i]) |
| continue; |
| if (str1[i] > str2[i]) |
| return 1; |
| else |
| return 0; |
| } |
| |
| if (len1 > len2) |
| return 1; |
| else |
| return 0; |
| } |
| |
| |
| /*----------------------------------------------------------------------* |
| * Serialize for I/O * |
| *----------------------------------------------------------------------*/ |
| /*! |
| * sarrayRead() |
| * |
| * Input: filename |
| * Return: sarray, or null on error |
| */ |
| SARRAY * |
| sarrayRead(const char *filename) |
| { |
| FILE *fp; |
| SARRAY *sa; |
| |
| PROCNAME("sarrayRead"); |
| |
| if (!filename) |
| return (SARRAY *)ERROR_PTR("filename not defined", procName, NULL); |
| |
| if ((fp = fopenReadStream(filename)) == NULL) |
| return (SARRAY *)ERROR_PTR("stream not opened", procName, NULL); |
| |
| if ((sa = sarrayReadStream(fp)) == NULL) { |
| fclose(fp); |
| return (SARRAY *)ERROR_PTR("sa not read", procName, NULL); |
| } |
| |
| fclose(fp); |
| return sa; |
| } |
| |
| |
| /*! |
| * sarrayReadStream() |
| * |
| * Input: stream |
| * Return: sarray, or null on error |
| * |
| * Notes: |
| * (1) We store the size of each string along with the string. |
| * (2) This allows a string to have embedded newlines. By reading |
| * the entire string, as determined by its size, we are |
| * not affected by any number of embedded newlines. |
| */ |
| SARRAY * |
| sarrayReadStream(FILE *fp) |
| { |
| char *stringbuf; |
| l_int32 i, n, size, index, bufsize, ret, version; |
| SARRAY *sa; |
| |
| PROCNAME("sarrayReadStream"); |
| |
| if (!fp) |
| return (SARRAY *)ERROR_PTR("stream not defined", procName, NULL); |
| |
| ret = fscanf(fp, "\nSarray Version %d\n", &version); |
| if (ret != 1) |
| return (SARRAY *)ERROR_PTR("not an sarray file", procName, NULL); |
| if (version != SARRAY_VERSION_NUMBER) |
| return (SARRAY *)ERROR_PTR("invalid sarray version", procName, NULL); |
| fscanf(fp, "Number of strings = %d\n", &n); |
| |
| if ((sa = sarrayCreate(n)) == NULL) |
| return (SARRAY *)ERROR_PTR("sa not made", procName, NULL); |
| bufsize = L_BUF_SIZE + 1; |
| if ((stringbuf = (char *)CALLOC(bufsize, sizeof(char))) == NULL) |
| return (SARRAY *)ERROR_PTR("stringbuf not made", procName, NULL); |
| |
| for (i = 0; i < n; i++) { |
| /* Get the size of the stored string */ |
| fscanf(fp, "%d[%d]:", &index, &size); |
| /* Expand the string buffer if necessary */ |
| if (size > bufsize - 5) { |
| FREE(stringbuf); |
| bufsize = (l_int32)(1.5 * size); |
| stringbuf = (char *)CALLOC(bufsize, sizeof(char)); |
| } |
| /* Read the stored string, plus leading spaces and trailing \n */ |
| fread(stringbuf, 1, size + 3, fp); |
| /* Remove the \n that was added by sarrayWriteStream() */ |
| stringbuf[size + 2] = '\0'; |
| /* Copy it in, skipping the 2 leading spaces */ |
| sarrayAddString(sa, stringbuf + 2, L_COPY); |
| } |
| fscanf(fp, "\n"); |
| |
| FREE(stringbuf); |
| return sa; |
| } |
| |
| |
| /*! |
| * sarrayWrite() |
| * |
| * Input: filename |
| * sarray |
| * Return: 0 if OK; 1 on error |
| */ |
| l_int32 |
| sarrayWrite(const char *filename, |
| SARRAY *sa) |
| { |
| FILE *fp; |
| |
| PROCNAME("sarrayWrite"); |
| |
| if (!filename) |
| return ERROR_INT("filename not defined", procName, 1); |
| if (!sa) |
| return ERROR_INT("sa not defined", procName, 1); |
| |
| if ((fp = fopen(filename, "w")) == NULL) |
| return ERROR_INT("stream not opened", procName, 1); |
| |
| if (sarrayWriteStream(fp, sa)) |
| return ERROR_INT("sa not written to stream", procName, 1); |
| |
| fclose(fp); |
| return 0; |
| } |
| |
| |
| /*! |
| * sarrayWriteStream() |
| * |
| * Input: stream |
| * sarray |
| * Returns 0 if OK; 1 on error |
| * |
| * Notes: |
| * (1) This appends a '\n' to each string, which is stripped |
| * off by sarrayReadStream(). |
| */ |
| l_int32 |
| sarrayWriteStream(FILE *fp, |
| SARRAY *sa) |
| { |
| l_int32 i, n, len; |
| |
| PROCNAME("sarrayWriteStream"); |
| |
| if (!fp) |
| return ERROR_INT("stream not defined", procName, 1); |
| if (!sa) |
| return ERROR_INT("sa not defined", procName, 1); |
| |
| n = sarrayGetCount(sa); |
| fprintf(fp, "\nSarray Version %d\n", SARRAY_VERSION_NUMBER); |
| fprintf(fp, "Number of strings = %d\n", n); |
| for (i = 0; i < n; i++) { |
| len = strlen(sa->array[i]); |
| fprintf(fp, " %d[%d]: %s\n", i, len, sa->array[i]); |
| } |
| fprintf(fp, "\n"); |
| |
| return 0; |
| } |
| |
| |
| /*! |
| * sarrayAppend() |
| * |
| * Input: filename |
| * sarray |
| * Return: 0 if OK; 1 on error |
| */ |
| l_int32 |
| sarrayAppend(const char *filename, |
| SARRAY *sa) |
| { |
| FILE *fp; |
| |
| PROCNAME("sarrayAppend"); |
| |
| if (!filename) |
| return ERROR_INT("filename not defined", procName, 1); |
| if (!sa) |
| return ERROR_INT("sa not defined", procName, 1); |
| |
| if ((fp = fopen(filename, "a")) == NULL) |
| return ERROR_INT("stream not opened", procName, 1); |
| |
| if (sarrayWriteStream(fp, sa)) |
| return ERROR_INT("sa not appended to stream", procName, 1); |
| |
| fclose(fp); |
| return 0; |
| } |
| |
| |
| /*---------------------------------------------------------------------* |
| * Directory filenames * |
| *---------------------------------------------------------------------*/ |
| /*! |
| * getSortedPathnamesInDirectory() |
| * |
| * Input: directory name |
| * substr (<optional> substring filter on filenames; can be NULL) |
| * firstpage (0-based) |
| * npages (use 0 for all to the end) |
| * Return: sarray of sorted pathnames, or NULL on error |
| * |
| * Notes: |
| * (1) If 'substr' is not NULL, only filenames that contain |
| * the substring can be returned. If 'substr' is NULL, |
| * none of the filenames are filtered out. |
| * (2) The files in the directory, after optional filtering by |
| * the substring, are lexically sorted in increasing order. |
| * The full pathnames are returned for the requested sequence. |
| * If no files are found after filtering, returns an empty sarray. |
| */ |
| SARRAY * |
| getSortedPathnamesInDirectory(const char *dirname, |
| const char *substr, |
| l_int32 firstpage, |
| l_int32 npages) |
| { |
| char *fname, *fullname; |
| l_int32 i, nfiles, lastpage; |
| SARRAY *sa, *safiles, *saout; |
| |
| PROCNAME("getSortedPathnamesInDirectory"); |
| |
| if (!dirname) |
| return (SARRAY *)ERROR_PTR("dirname not defined", procName, NULL); |
| |
| if ((sa = getFilenamesInDirectory(dirname)) == NULL) |
| return (SARRAY *)ERROR_PTR("sa not made", procName, NULL); |
| safiles = sarraySelectBySubstring(sa, substr); |
| sarrayDestroy(&sa); |
| nfiles = sarrayGetCount(safiles); |
| if (nfiles == 0) { |
| L_WARNING("no files found", procName); |
| return safiles; |
| } |
| |
| sarraySort(safiles, safiles, L_SORT_INCREASING); |
| |
| firstpage = L_MIN(L_MAX(firstpage, 0), nfiles - 1); |
| if (npages == 0) |
| npages = nfiles - firstpage; |
| lastpage = L_MIN(firstpage + npages - 1, nfiles - 1); |
| |
| saout = sarrayCreate(lastpage - firstpage + 1); |
| for (i = firstpage; i <= lastpage; i++) { |
| fname = sarrayGetString(safiles, i, L_NOCOPY); |
| fullname = genPathname(dirname, fname); |
| sarrayAddString(saout, fullname, L_INSERT); |
| } |
| |
| sarrayDestroy(&safiles); |
| return saout; |
| } |
| |
| |
| /*! |
| * getFilenamesInDirectory() |
| * |
| * Input: directory name |
| * Return: sarray of file names, or NULL on error |
| * |
| * Notes: |
| * (1) The versions compiled under unix and cygwin use the POSIX C |
| * library commands for handling directories. For windows, |
| * there is a separate implementation. |
| * (2) It returns an array of filename tails; i.e., only the part of |
| * the path after the last slash. |
| * (3) Use of the d_type field of dirent is not portable: |
| * "According to POSIX, the dirent structure contains a field |
| * char d_name[] of unspecified size, with at most NAME_MAX |
| * characters preceding the terminating null character. Use |
| * of other fields will harm the portability of your programs." |
| * (4) As a consequence of (3), we note several things: |
| * - MINGW doesn't have a d_type member. |
| * - Older versions of gcc (e.g., 2.95.3) return DT_UNKNOWN |
| * for d_type from all files. |
| * On these systems, this function will return directories |
| * (except for '.' and '..', which are eliminated using |
| * the d_name field). |
| */ |
| #ifndef COMPILER_MSVC |
| |
| SARRAY * |
| getFilenamesInDirectory(const char *dirname) |
| { |
| char *name; |
| l_int32 len; |
| SARRAY *safiles; |
| DIR *pdir; |
| struct dirent *pdirentry; |
| |
| PROCNAME("getFilenamesInDirectory"); |
| |
| if (!dirname) |
| return (SARRAY *)ERROR_PTR("dirname not defined", procName, NULL); |
| |
| if ((safiles = sarrayCreate(0)) == NULL) |
| return (SARRAY *)ERROR_PTR("safiles not made", procName, NULL); |
| if ((pdir = opendir(dirname)) == NULL) |
| return (SARRAY *)ERROR_PTR("pdir not opened", procName, NULL); |
| while ((pdirentry = readdir(pdir))) { |
| |
| /* It's nice to ignore directories. For this it is necessary to |
| * define _BSD_SOURCE in the CC command, because the DT_DIR |
| * flag is non-standard. */ |
| #if !defined(__MINGW32__) && !defined(_CYGWIN_ENVIRON) && !defined(__SOLARIS__) |
| if (pdirentry->d_type == DT_DIR) |
| continue; |
| #endif |
| |
| /* Filter out "." and ".." if they're passed through */ |
| name = pdirentry->d_name; |
| len = strlen(name); |
| if (len == 1 && name[len - 1] == '.') continue; |
| if (len == 2 && name[len - 1] == '.' && name[len - 2] == '.') continue; |
| sarrayAddString(safiles, name, L_COPY); |
| } |
| closedir(pdir); |
| |
| return safiles; |
| } |
| |
| #else /* COMPILER_MSVC */ |
| |
| /* http://msdn2.microsoft.com/en-us/library/aa365200(VS.85).aspx */ |
| #include <windows.h> |
| #include <tchar.h> |
| SARRAY * |
| getFilenamesInDirectory(const char *dirname) |
| { |
| SARRAY *safiles; |
| WIN32_FIND_DATAA ffd; |
| size_t length_of_path; |
| CHAR szDir[MAX_PATH]; /* MAX_PATH is defined in stdlib.h */ |
| HANDLE hFind = INVALID_HANDLE_VALUE; |
| |
| PROCNAME("getFilenamesInDirectory"); |
| |
| if (!dirname) |
| return (SARRAY *)ERROR_PTR("dirname not defined", procName, NULL); |
| |
| length_of_path = strlen(dirname); |
| if (length_of_path > (MAX_PATH - 2)) |
| return (SARRAY *)ERROR_PTR("dirname is to long", procName, NULL); |
| |
| strncpy(szDir, dirname, MAX_PATH); |
| szDir[MAX_PATH - 1] = '\0'; |
| strncat(szDir, TEXT("\\*"), MAX_PATH - strlen(szDir)); |
| |
| if ((safiles = sarrayCreate(0)) == NULL) |
| return (SARRAY *)ERROR_PTR("safiles not made", procName, NULL); |
| hFind = FindFirstFileA(szDir, &ffd); |
| if (INVALID_HANDLE_VALUE == hFind) { |
| sarrayDestroy(&safiles); |
| return (SARRAY *)ERROR_PTR("hFind not opened", procName, NULL); |
| } |
| |
| while (FindNextFileA(hFind, &ffd) != 0) { |
| if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) /* skip dirs */ |
| continue; |
| sarrayAddString(safiles, ffd.cFileName, L_COPY); |
| } |
| |
| FindClose(hFind); |
| return safiles; |
| } |
| |
| #endif /* COMPILER_MSVC */ |
| |