gnulib/lib/uniname/gen-uninames.lisp - toolchain/make - Git at Google

 #!/usr/local/bin/clisp -C

 ;;; Creation of gnulib's uninames.h from the UnicodeData.txt table.

 ;;; Copyright (C) 2000-2020 Free Software Foundation, Inc.
 ;;; Written by Bruno Haible <bruno@clisp.org>, 2000-12-28.
 ;;;
 ;;; This program is free software: you can redistribute it and/or modify
 ;;; it under the terms of the GNU General Public License as published by
 ;;; the Free Software Foundation; either version 3 of the License, or
 ;;; (at your option) any later version.
 ;;;
 ;;; This program is distributed in the hope that it will be useful,
 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 ;;; GNU General Public License for more details.
 ;;;
 ;;; You should have received a copy of the GNU General Public License
 ;;; along with this program.  If not, see <https://www.gnu.org/licenses/>.

 (defparameter add-comments nil)

 (defstruct unicode-char
   (index nil :type integer)
   (name nil :type string)
   word-indices
   word-indices-index
 )

 (defstruct range
   (index nil :type integer)
   (start-code nil :type integer)
   (end-code nil :type integer)
 )

 (defstruct word-list
   (hashed nil :type hash-table)
   (sorted nil :type list)
   size                          ; number of characters total
   length                        ; number of words
 )

 (defun main (inputfile outputfile aliasfile)
   (declare (type string inputfile outputfile aliasfile))
   #+UNICODE (setq *default-file-encoding* charset:utf-8)
   (let ((all-chars '())
         (all-chars-hashed (make-hash-table :test #'equal))
         (all-aliases '())
         all-chars-and-aliases
         (all-ranges '())
         (name-index 0)
         range)
     ;; Read all characters and names from the input file.
     (with-open-file (istream inputfile :direction :input)
       (loop
         (let ((line (read-line istream nil nil)))
           (unless line (return))
           (let* ((i1 (position #\; line))
                  (i2 (position #\; line :start (1+ i1)))
                  (code-string (subseq line 0 i1))
                  (code (parse-integer code-string :radix 16))
                  (name-string (subseq line (1+ i1) i2)))
             ; Ignore characters whose name starts with "<".
             (unless (eql (char name-string 0) #\<)
               ; Also ignore Hangul syllables; they are treated specially.
               (unless (<= #xAC00 code #xD7A3)
                 ; Also ignore CJK compatibility ideographs; they are treated
                 ; specially as well.
                 (unless (or (<= #xF900 code #xFA2D) (<= #xFA30 code #xFA6A)
                             (<= #xFA70 code #xFAD9) (<= #x2F800 code #x2FA1D))
                   ;; Also ignore variationselectors; they are treated
                   ;; specially as well.
                   (unless (or (<= #xFE00 code #xFE0F) (<= #xE0100 code #xE01EF))
                     (push (make-unicode-char :index name-index
                                              :name name-string)
                           all-chars)
                     (setf (gethash code all-chars-hashed) (car all-chars))
                     ;; Update the contiguous range, or start a new range.
                     (if (and range (= (1+ (range-end-code range)) code))
                         (setf (range-end-code range) code)
                       (progn
                         (when range
                           (push range all-ranges))
                         (setq range (make-range :index name-index
                                                 :start-code code
                                                 :end-code code))))
                     (incf name-index)
                     (setq last-code code)
                   ) ) ) )
     ) ) ) )
     (setq all-chars (nreverse all-chars))
     (if range
         (push range all-ranges))
     (setq all-ranges (nreverse all-ranges))
     (when aliasfile
       ;; Read all characters and names from the alias file.
       (with-open-file (istream aliasfile :direction :input)
         (loop
          (let ((line (read-line istream nil nil)))
            (unless line (return))
            (let* ((i1 (position #\; line))
                   (i2 (position #\; line :start (1+ i1)))
                   (code-string (subseq line 0 i1))
                   (code (parse-integer code-string :radix 16))
                   (name-string (subseq line (1+ i1) i2))
                   (uc (gethash code all-chars-hashed)))
              (when uc
                (push (make-unicode-char :index (unicode-char-index uc)
                                         :name name-string)
                      all-aliases)
              ) ) ) ) ) )
     (setq all-aliases (nreverse all-aliases)
           all-chars-and-aliases (append all-chars all-aliases))
     ;; Split into words.
     (let ((words-by-length (make-array 0 :adjustable t)))
       (dolist (name (list* "HANGUL SYLLABLE" "CJK COMPATIBILITY" "VARIATION"
                            (mapcar #'unicode-char-name all-chars-and-aliases)))
         (let ((i1 0))
           (loop
             (when (>= i1 (length name)) (return))
             (let ((i2 (or (position #\Space name :start i1) (length name))))
               (let* ((word (subseq name i1 i2))
                      (len (length word)))
                 (when (>= len (length words-by-length))
                   (adjust-array words-by-length (1+ len))
                 )
                 (unless (aref words-by-length len)
                   (setf (aref words-by-length len)
                         (make-word-list
                           :hashed (make-hash-table :test #'equal)
                           :sorted '()
                 ) )     )
                 (let ((word-list (aref words-by-length len)))
                   (unless (gethash word (word-list-hashed word-list))
                     (setf (gethash word (word-list-hashed word-list)) t)
                     (push word (word-list-sorted word-list))
                 ) )
               )
               (setq i1 (1+ i2))
       ) ) ) )
       ;; Sort the word lists.
       (dotimes (len (length words-by-length))
         (unless (aref words-by-length len)
           (setf (aref words-by-length len)
                 (make-word-list
                   :hashed (make-hash-table :test #'equal)
                   :sorted '()
         ) )     )
         (let ((word-list (aref words-by-length len)))
           (setf (word-list-sorted word-list)
                 (sort (word-list-sorted word-list) #'string<)
           )
           (setf (word-list-size word-list)
                 (reduce #'+ (mapcar #'length (word-list-sorted word-list)))
           )
           (setf (word-list-length word-list)
                 (length (word-list-sorted word-list))
       ) ) )
       ;; Output the tables.
       (with-open-file (ostream outputfile :direction :output
                        #+UNICODE :external-format #+UNICODE charset:ascii)
         (format ostream "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */~%")
         (format ostream "/*~%")
         (format ostream " * ~A~%" (file-namestring outputfile))
         (format ostream " *~%")
         (format ostream " * Unicode character name table.~%")
         (format ostream " * Generated automatically by the gen-uninames utility.~%")
         (format ostream " */~%")
         (format ostream "~%")
         (format ostream "static const char unicode_name_words[~D] = {~%"
                         (let ((sum 0))
                           (dotimes (len (length words-by-length))
                             (let ((word-list (aref words-by-length len)))
                               (incf sum (word-list-size word-list))
                           ) )
                           sum
         )               )
         (dotimes (len (length words-by-length))
           (let ((word-list (aref words-by-length len)))
             (dolist (word (word-list-sorted word-list))
               (format ostream " ~{ '~C',~}~%" (coerce word 'list))
         ) ) )
         (format ostream "};~%")
         (format ostream "#define UNICODE_CHARNAME_NUM_WORDS ~D~%"
                         (let ((sum 0))
                           (dotimes (len (length words-by-length))
                             (let ((word-list (aref words-by-length len)))
                               (incf sum (word-list-length word-list))
                           ) )
                           sum
         )               )
         #| ; Redundant data
         (format ostream "static const uint16_t unicode_name_word_offsets[~D] = {~%"
                         (let ((sum 0))
                           (dotimes (len (length words-by-length))
                             (let ((word-list (aref words-by-length len)))
                               (incf sum (word-list-length word-list))
                           ) )
                           sum
         )               )
         (dotimes (len (length words-by-length))
           (let ((word-list (aref words-by-length len)))
             (when (word-list-sorted word-list)
               (format ostream " ")
               (do ((l (word-list-sorted word-list) (cdr l))
                    (offset 0 (+ offset (length (car l)))))
                   ((endp l))
                 (format ostream "~<~% ~0,79:; ~D,~>" offset)
               )
               (format ostream "~%")
         ) ) )
         (format ostream "};~%")
         |#
         (format ostream "static const struct { uint32_t extra_offset; uint16_t ind_offset; } unicode_name_by_length[~D] = {~%"
                         (1+ (length words-by-length))
         )
         (let ((extra-offset 0)
               (ind-offset 0))
           (dotimes (len (length words-by-length))
             (let ((word-list (aref words-by-length len)))
               (format ostream "  { ~D, ~D },~%" extra-offset ind-offset)
               (incf extra-offset (word-list-size word-list))
               (incf ind-offset (word-list-length word-list))
           ) )
           (format ostream "  { ~D, ~D }~%" extra-offset ind-offset)
         )
         (format ostream "};~%")
         (let ((ind-offset 0))
           (dotimes (len (length words-by-length))
             (let ((word-list (aref words-by-length len)))
               (dolist (word (word-list-sorted word-list))
                 (setf (gethash word (word-list-hashed word-list)) ind-offset)
                 (incf ind-offset)
         ) ) ) )
         (dolist (word '("HANGUL" "SYLLABLE" "CJK" "COMPATIBILITY" "VARIATION"))
           (format ostream "#define UNICODE_CHARNAME_WORD_~A ~D~%" word
                           (gethash word (word-list-hashed (aref words-by-length (length word))))
         ) )
         ;; Compute the word-indices for every unicode-char.
         (dolist (uc all-chars-and-aliases)
           (let ((name (unicode-char-name uc))
                 (indices '()))
             (let ((i1 0))
               (loop
                 (when (>= i1 (length name)) (return))
                 (let ((i2 (or (position #\Space name :start i1) (length name))))
                   (let* ((word (subseq name i1 i2))
                          (len (length word)))
                     (push (gethash word (word-list-hashed (aref words-by-length len)))
                           indices
                     )
                   )
                   (setq i1 (1+ i2))
             ) ) )
             (setf (unicode-char-word-indices uc)
                   (coerce (nreverse indices) 'vector)
             )
         ) )
         ;; Sort the list of unicode-chars by word-indices.
         (setq all-chars-and-aliases
               (sort all-chars-and-aliases
                     (lambda (vec1 vec2)
                       (let ((len1 (length vec1))
                             (len2 (length vec2)))
                         (do ((i 0 (1+ i)))
                             (nil)
                           (if (< i len2)
                             (if (< i len1)
                               (cond ((< (aref vec1 i) (aref vec2 i)) (return t))
                                     ((> (aref vec1 i) (aref vec2 i)) (return nil))
                               )
                               (return t)
                             )
                             (return nil)
                     ) ) ) )
                     :key #'unicode-char-word-indices
         )     )
         ;; Output the word-indices.
         (format ostream "static const uint16_t unicode_names[~D] = {~%"
                         (reduce #'+ (mapcar (lambda (uc) (length (unicode-char-word-indices uc))) all-chars-and-aliases))
         )
         (let ((i 0))
           (dolist (uc all-chars-and-aliases)
             (format ostream " ~{ ~D,~}"
                             (maplist (lambda (r) (+ (* 2 (car r)) (if (cdr r) 1 0)))
                                      (coerce (unicode-char-word-indices uc) 'list)
                             )
             )
             (when add-comments
               (format ostream "~40T/* ~A */" (unicode-char-name uc))
             )
             (format ostream "~%")
             (setf (unicode-char-word-indices-index uc) i)
             (incf i (length (unicode-char-word-indices uc)))
         ) )
         (format ostream "};~%")
         (format ostream "static const struct { uint16_t index; uint32_t name:24; }~%")
         (format ostream "#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 7)~%__attribute__((__packed__))~%#endif~%")
         (format ostream "unicode_name_to_index[~D] = {~%"
                         (length all-chars-and-aliases)
         )
         (dolist (uc all-chars-and-aliases)
           (format ostream "  { 0x~4,'0X, ~D },"
                           (unicode-char-index uc)
                           (unicode-char-word-indices-index uc)
           )
           (when add-comments
             (format ostream "~21T/* ~A */" (unicode-char-name uc))
           )
           (format ostream "~%")
         )
         (format ostream "};~%")
         (format ostream "static const struct { uint16_t index; uint32_t name:24; }~%")
         (format ostream "#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 7)~%__attribute__((__packed__))~%#endif~%")
         (format ostream "unicode_index_to_name[~D] = {~%"
                         (length all-chars)
         )
         (dolist (uc (sort (copy-list all-chars) #'< :key #'unicode-char-index))
           (format ostream "  { 0x~4,'0X, ~D },"
                           (unicode-char-index uc)
                           (unicode-char-word-indices-index uc)
           )
           (when add-comments
             (format ostream "~21T/* ~A */" (unicode-char-name uc))
           )
           (format ostream "~%")
         )
         (format ostream "};~%")
         (format ostream "#define UNICODE_CHARNAME_MAX_LENGTH ~D~%"
                         (reduce #'max (mapcar (lambda (uc) (length (unicode-char-name uc))) all-chars-and-aliases))
         )
         (format ostream "#define UNICODE_CHARNAME_MAX_WORDS ~D~%"
                         (reduce #'max (mapcar (lambda (uc) (length (unicode-char-word-indices uc))) all-chars-and-aliases))
         )
         (format ostream "static const struct { uint16_t index; uint32_t gap; uint16_t length; } unicode_ranges[~D] = {~%"
                         (length all-ranges))
         (dolist (range all-ranges)
           (format ostream "  { ~D, ~D, ~D },~%"
                   (range-index range)
                   (- (range-start-code range) (range-index range))
                   (1+ (- (range-end-code range) (range-start-code range))))
         )
         (format ostream "};~%")
       )
 ) ) )

 (main (first *args*) (second *args*) (third *args*))
	#!/usr/local/bin/clisp -C

	;;; Creation of gnulib's uninames.h from the UnicodeData.txt table.

	;;; Copyright (C) 2000-2020 Free Software Foundation, Inc.
	;;; Written by Bruno Haible <bruno@clisp.org>, 2000-12-28.
	;;;
	;;; This program is free software: you can redistribute it and/or modify
	;;; it under the terms of the GNU General Public License as published by
	;;; the Free Software Foundation; either version 3 of the License, or
	;;; (at your option) any later version.
	;;;
	;;; This program is distributed in the hope that it will be useful,
	;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
	;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	;;; GNU General Public License for more details.
	;;;
	;;; You should have received a copy of the GNU General Public License
	;;; along with this program. If not, see <https://www.gnu.org/licenses/>.

	(defparameter add-comments nil)

	(defstruct unicode-char
	(index nil :type integer)
	(name nil :type string)
	word-indices
	word-indices-index
	)

	(defstruct range
	(index nil :type integer)
	(start-code nil :type integer)
	(end-code nil :type integer)
	)

	(defstruct word-list
	(hashed nil :type hash-table)
	(sorted nil :type list)
	size ; number of characters total
	length ; number of words
	)

	(defun main (inputfile outputfile aliasfile)
	(declare (type string inputfile outputfile aliasfile))
	#+UNICODE (setq default-file-encoding charset:utf-8)
	(let ((all-chars '())
	(all-chars-hashed (make-hash-table :test #'equal))
	(all-aliases '())
	all-chars-and-aliases
	(all-ranges '())
	(name-index 0)
	range)
	;; Read all characters and names from the input file.
	(with-open-file (istream inputfile :direction :input)
	(loop
	(let ((line (read-line istream nil nil)))
	(unless line (return))
	(let* ((i1 (position #\; line))
	(i2 (position #\; line :start (1+ i1)))
	(code-string (subseq line 0 i1))
	(code (parse-integer code-string :radix 16))
	(name-string (subseq line (1+ i1) i2)))
	; Ignore characters whose name starts with "<".
	(unless (eql (char name-string 0) #\<)
	; Also ignore Hangul syllables; they are treated specially.
	(unless (<= #xAC00 code #xD7A3)
	; Also ignore CJK compatibility ideographs; they are treated
	; specially as well.
	(unless (or (<= #xF900 code #xFA2D) (<= #xFA30 code #xFA6A)
	(<= #xFA70 code #xFAD9) (<= #x2F800 code #x2FA1D))
	;; Also ignore variationselectors; they are treated
	;; specially as well.
	(unless (or (<= #xFE00 code #xFE0F) (<= #xE0100 code #xE01EF))
	(push (make-unicode-char :index name-index
	:name name-string)
	all-chars)
	(setf (gethash code all-chars-hashed) (car all-chars))
	;; Update the contiguous range, or start a new range.
	(if (and range (= (1+ (range-end-code range)) code))
	(setf (range-end-code range) code)
	(progn
	(when range
	(push range all-ranges))
	(setq range (make-range :index name-index
	:start-code code
	:end-code code))))
	(incf name-index)
	(setq last-code code)
	) ) ) )
	) ) ) )
	(setq all-chars (nreverse all-chars))
	(if range
	(push range all-ranges))
	(setq all-ranges (nreverse all-ranges))
	(when aliasfile
	;; Read all characters and names from the alias file.
	(with-open-file (istream aliasfile :direction :input)
	(loop
	(let ((line (read-line istream nil nil)))
	(unless line (return))
	(let* ((i1 (position #\; line))
	(i2 (position #\; line :start (1+ i1)))
	(code-string (subseq line 0 i1))
	(code (parse-integer code-string :radix 16))
	(name-string (subseq line (1+ i1) i2))
	(uc (gethash code all-chars-hashed)))
	(when uc
	(push (make-unicode-char :index (unicode-char-index uc)
	:name name-string)
	all-aliases)
	) ) ) ) ) )
	(setq all-aliases (nreverse all-aliases)
	all-chars-and-aliases (append all-chars all-aliases))
	;; Split into words.
	(let ((words-by-length (make-array 0 :adjustable t)))
	(dolist (name (list* "HANGUL SYLLABLE" "CJK COMPATIBILITY" "VARIATION"
	(mapcar #'unicode-char-name all-chars-and-aliases)))
	(let ((i1 0))
	(loop
	(when (>= i1 (length name)) (return))
	(let ((i2 (or (position #\Space name :start i1) (length name))))
	(let* ((word (subseq name i1 i2))
	(len (length word)))
	(when (>= len (length words-by-length))
	(adjust-array words-by-length (1+ len))
	)
	(unless (aref words-by-length len)
	(setf (aref words-by-length len)
	(make-word-list
	:hashed (make-hash-table :test #'equal)
	:sorted '()
	) ) )
	(let ((word-list (aref words-by-length len)))
	(unless (gethash word (word-list-hashed word-list))
	(setf (gethash word (word-list-hashed word-list)) t)
	(push word (word-list-sorted word-list))
	) )
	)
	(setq i1 (1+ i2))
	) ) ) )
	;; Sort the word lists.
	(dotimes (len (length words-by-length))
	(unless (aref words-by-length len)
	(setf (aref words-by-length len)
	(make-word-list
	:hashed (make-hash-table :test #'equal)
	:sorted '()
	) ) )
	(let ((word-list (aref words-by-length len)))
	(setf (word-list-sorted word-list)
	(sort (word-list-sorted word-list) #'string<)
	)
	(setf (word-list-size word-list)
	(reduce #'+ (mapcar #'length (word-list-sorted word-list)))
	)
	(setf (word-list-length word-list)
	(length (word-list-sorted word-list))
	) ) )
	;; Output the tables.
	(with-open-file (ostream outputfile :direction :output
	#+UNICODE :external-format #+UNICODE charset:ascii)
	(format ostream "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */~%")
	(format ostream "/*~%")
	(format ostream " * ~A~%" (file-namestring outputfile))
	(format ostream " *~%")
	(format ostream " * Unicode character name table.~%")
	(format ostream " * Generated automatically by the gen-uninames utility.~%")
	(format ostream " */~%")
	(format ostream "~%")
	(format ostream "static const char unicode_name_words[~D] = {~%"
	(let ((sum 0))
	(dotimes (len (length words-by-length))
	(let ((word-list (aref words-by-length len)))
	(incf sum (word-list-size word-list))
	) )
	sum
	) )
	(dotimes (len (length words-by-length))
	(let ((word-list (aref words-by-length len)))
	(dolist (word (word-list-sorted word-list))
	(format ostream " ~{ '~C',~}~%" (coerce word 'list))
	) ) )
	(format ostream "};~%")
	(format ostream "#define UNICODE_CHARNAME_NUM_WORDS ~D~%"
	(let ((sum 0))
	(dotimes (len (length words-by-length))
	(let ((word-list (aref words-by-length len)))
	(incf sum (word-list-length word-list))
	) )
	sum
	) )
	#\| ; Redundant data
	(format ostream "static const uint16_t unicode_name_word_offsets[~D] = {~%"
	(let ((sum 0))
	(dotimes (len (length words-by-length))
	(let ((word-list (aref words-by-length len)))
	(incf sum (word-list-length word-list))
	) )
	sum
	) )
	(dotimes (len (length words-by-length))
	(let ((word-list (aref words-by-length len)))
	(when (word-list-sorted word-list)
	(format ostream " ")
	(do ((l (word-list-sorted word-list) (cdr l))
	(offset 0 (+ offset (length (car l)))))
	((endp l))
	(format ostream "~<~% ~0,79:; ~D,~>" offset)
	)
	(format ostream "~%")
	) ) )
	(format ostream "};~%")
	\|#
	(format ostream "static const struct { uint32_t extra_offset; uint16_t ind_offset; } unicode_name_by_length[~D] = {~%"
	(1+ (length words-by-length))
	)
	(let ((extra-offset 0)
	(ind-offset 0))
	(dotimes (len (length words-by-length))
	(let ((word-list (aref words-by-length len)))
	(format ostream " { ~D, ~D },~%" extra-offset ind-offset)
	(incf extra-offset (word-list-size word-list))
	(incf ind-offset (word-list-length word-list))
	) )
	(format ostream " { ~D, ~D }~%" extra-offset ind-offset)
	)
	(format ostream "};~%")
	(let ((ind-offset 0))
	(dotimes (len (length words-by-length))
	(let ((word-list (aref words-by-length len)))
	(dolist (word (word-list-sorted word-list))
	(setf (gethash word (word-list-hashed word-list)) ind-offset)
	(incf ind-offset)
	) ) ) )
	(dolist (word '("HANGUL" "SYLLABLE" "CJK" "COMPATIBILITY" "VARIATION"))
	(format ostream "#define UNICODE_CHARNAME_WORD_~A ~D~%" word
	(gethash word (word-list-hashed (aref words-by-length (length word))))
	) )
	;; Compute the word-indices for every unicode-char.
	(dolist (uc all-chars-and-aliases)
	(let ((name (unicode-char-name uc))
	(indices '()))
	(let ((i1 0))
	(loop
	(when (>= i1 (length name)) (return))
	(let ((i2 (or (position #\Space name :start i1) (length name))))
	(let* ((word (subseq name i1 i2))
	(len (length word)))
	(push (gethash word (word-list-hashed (aref words-by-length len)))
	indices
	)
	)
	(setq i1 (1+ i2))
	) ) )
	(setf (unicode-char-word-indices uc)
	(coerce (nreverse indices) 'vector)
	)
	) )
	;; Sort the list of unicode-chars by word-indices.
	(setq all-chars-and-aliases
	(sort all-chars-and-aliases
	(lambda (vec1 vec2)
	(let ((len1 (length vec1))
	(len2 (length vec2)))
	(do ((i 0 (1+ i)))
	(nil)
	(if (< i len2)
	(if (< i len1)
	(cond ((< (aref vec1 i) (aref vec2 i)) (return t))
	((> (aref vec1 i) (aref vec2 i)) (return nil))
	)
	(return t)
	)
	(return nil)
	) ) ) )
	:key #'unicode-char-word-indices
	) )
	;; Output the word-indices.
	(format ostream "static const uint16_t unicode_names[~D] = {~%"
	(reduce #'+ (mapcar (lambda (uc) (length (unicode-char-word-indices uc))) all-chars-and-aliases))
	)
	(let ((i 0))
	(dolist (uc all-chars-and-aliases)
	(format ostream " ~{ ~D,~}"
	(maplist (lambda (r) (+ (* 2 (car r)) (if (cdr r) 1 0)))
	(coerce (unicode-char-word-indices uc) 'list)
	)
	)
	(when add-comments
	(format ostream "~40T/* ~A */" (unicode-char-name uc))
	)
	(format ostream "~%")
	(setf (unicode-char-word-indices-index uc) i)
	(incf i (length (unicode-char-word-indices uc)))
	) )
	(format ostream "};~%")
	(format ostream "static const struct { uint16_t index; uint32_t name:24; }~%")
	(format ostream "#if __GNUC__ > 2 \|\| (__GNUC__ == 2 && __GNUC_MINOR__ >= 7)~%__attribute__((__packed__))~%#endif~%")
	(format ostream "unicode_name_to_index[~D] = {~%"
	(length all-chars-and-aliases)
	)
	(dolist (uc all-chars-and-aliases)
	(format ostream " { 0x~4,'0X, ~D },"
	(unicode-char-index uc)
	(unicode-char-word-indices-index uc)
	)
	(when add-comments
	(format ostream "~21T/* ~A */" (unicode-char-name uc))
	)
	(format ostream "~%")
	)
	(format ostream "};~%")
	(format ostream "static const struct { uint16_t index; uint32_t name:24; }~%")
	(format ostream "#if __GNUC__ > 2 \|\| (__GNUC__ == 2 && __GNUC_MINOR__ >= 7)~%__attribute__((__packed__))~%#endif~%")
	(format ostream "unicode_index_to_name[~D] = {~%"
	(length all-chars)
	)
	(dolist (uc (sort (copy-list all-chars) #'< :key #'unicode-char-index))
	(format ostream " { 0x~4,'0X, ~D },"
	(unicode-char-index uc)
	(unicode-char-word-indices-index uc)
	)
	(when add-comments
	(format ostream "~21T/* ~A */" (unicode-char-name uc))
	)
	(format ostream "~%")
	)
	(format ostream "};~%")
	(format ostream "#define UNICODE_CHARNAME_MAX_LENGTH ~D~%"
	(reduce #'max (mapcar (lambda (uc) (length (unicode-char-name uc))) all-chars-and-aliases))
	)
	(format ostream "#define UNICODE_CHARNAME_MAX_WORDS ~D~%"
	(reduce #'max (mapcar (lambda (uc) (length (unicode-char-word-indices uc))) all-chars-and-aliases))
	)
	(format ostream "static const struct { uint16_t index; uint32_t gap; uint16_t length; } unicode_ranges[~D] = {~%"
	(length all-ranges))
	(dolist (range all-ranges)
	(format ostream " { ~D, ~D, ~D },~%"
	(range-index range)
	(- (range-start-code range) (range-index range))
	(1+ (- (range-end-code range) (range-start-code range))))
	)
	(format ostream "};~%")
	)
	) ) )

	(main (first args) (second args) (third args))