| /** |
| * Copyright (c) 2008-2012, http://www.snakeyaml.org |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.yaml.snakeyaml.emitter; |
| |
| import java.util.Formatter; |
| |
| public class YamlCharacterEncoding { |
| |
| /** |
| * Test whether a character is printable, according to the YAML spec. |
| * ('c-printable') |
| */ |
| public static boolean isPrintable(int c) { |
| return c == 0x9 || c == 0xA || c == 0xD || (c >= 0x20 && c <= 0x7E) // 8 |
| // bit |
| || c == 0x85 || (c >= 0xA0 && c <= 0xD7FF) || (c >= 0xE000 && c <= 0xFFFD) // 16 |
| // bit |
| || (c >= 0x10000 && c <= 0x10FFFF); // 32 bit |
| } |
| |
| /** |
| * "On input, a YAML processor must accept all printable ASCII characters, |
| * the space, tab, line break, and all Unicode characters beyond #x9F. On |
| * output, a YAML processor must only produce these acceptable characters, |
| * and should also escape all non-printable Unicode characters. The allowed |
| * character range explicitly excludes the surrogate block #xD800-#xDFFF, |
| * DEL #x7F, the C0 control block #x0-#x1F (except for #x9, #xA, and #xD), |
| * the C1 control block #x80-#x9F, #xFFFE, and #xFFFF." |
| */ |
| public static boolean isAcceptable(int c) { |
| return (c >= 0x20 && c <= 0x7e // accept all printable ASCII characters, |
| // the space, |
| || c == 0x09 // tab, |
| || c == 0x0A || c == 0x0D || c == 0x85 || c == 0x2028 || c == 0x2029 // line |
| // break, |
| || isUnicodeCharacter(c) && c >= 0x9F // and all Unicode characters |
| // beyond #x9F |
| ) && !( // The allowed character range explicitly excludes |
| c >= 0xD800 && c <= 0xDFFF // the surrogate block #xD800-#xDFFF |
| || c == 0x7f // DEL #x7F, |
| || c <= 0x1F && !(c == 0x09 || c == 0x0A || c == 0x0D) // the |
| // C0 |
| // control |
| // block |
| // #x0-#x1F |
| // (except |
| // for |
| // #x9, |
| // #xA, |
| // and |
| // #xD), |
| || c >= 0x80 && c <= 0x9F // the C1 control block |
| // #x80-#x9F, |
| || c == 0xFFFE // #xFFFE, |
| || c == 0xFFFF // and #xFFFF. |
| ); |
| } |
| |
| /** |
| * Tests whether a codepoint is a designated Unicode noncharacter or not. |
| */ |
| public static boolean isUnicodeCharacter(int c) { |
| int plane = c / 0x10000; |
| return !(c >= 0xFDD0 && c <= 0xFDEF) && (plane <= 16 && (c & 0xFFFE) != 0xFFFE); |
| } |
| |
| public static String hexdump(String input) { |
| StringBuilder result = new StringBuilder(); |
| Formatter formatter = new Formatter(result); |
| for (int i = 0; i < input.length(); i++) |
| formatter.format("%02x ", (int) input.charAt(i)); |
| return result.toString(); |
| } |
| } |