blob: 88e78cc5ca9b908cc211d0d9fc354ec94f059b01 [file] [log] [blame]
/* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2002-2003 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution, if
* any, must include the following acknowlegement:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowlegement may appear in the software itself,
* if and wherever such third-party acknowlegements normally appear.
*
* 4. The names "The Jakarta Project", "Commons", and "Apache Software
* Foundation" must not be used to endorse or promote products derived
* from this software without prior written permission. For written
* permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache"
* nor may "Apache" appear in their names without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.apache.commons.lang;
import java.io.IOException;
import java.io.Writer;
import org.apache.commons.lang.exception.NestableRuntimeException;
/**
* <p>Escapes and unescapes <code>String</code>s for
* Java, Java Script, HTML, XML, and SQL.</p>
*
* @author Apache Jakarta Turbine
* @author GenerationJavaCore library
* @author Purple Technology
* @author <a href="mailto:bayard@generationjava.com">Henri Yandell</a>
* @author <a href="mailto:alex@purpletech.com">Alexander Day Chaffee</a>
* @author <a href="mailto:cybertiger@cyberiantiger.org">Antony Riley</a>
* @author Helge Tesgaard
* @author <a href="sean@boohai.com">Sean Brown</a>
* @author <a href="mailto:ggregory@seagullsw.com">Gary Gregory</a>
* @author Phil Steitz
* @since 2.0
* @version $Id: StringEscapeUtils.java,v 1.22 2003/07/31 23:45:28 scolebourne Exp $
*/
public class StringEscapeUtils {
/**
* <p><code>StringEscapeUtils</code> instances should NOT be constructed in
* standard programming.</p>
*
* <p>Instead, the class should be used as:
* <pre>StringEscapeUtils.escapeJava("foo");</pre></p>
*
* <p>This constructor is public to permit tools that require a JavaBean
* instance to operate.</p>
*/
public StringEscapeUtils() {
}
// Java and JavaScript
//--------------------------------------------------------------------------
/**
* <p>Escapes the characters in a <code>String</code> using Java String rules.</p>
*
* <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
*
* <p>So a tab becomes the characters <code>'\\'</code> and
* <code>'t'</code>.</p>
*
* <p>The only difference between Java strings and JavaScript strings
* is that in JavaScript, a single quote must be escaped.</p>
*
* <p>Example:
* <pre>
* input string: He didn't say, "Stop!"
* output string: He didn't say, \"Stop!\"
* </pre>
* </p>
*
* @param str String to escape values in, may be null
* @return String with escaped values, <code>null</code> if null string input
*/
public static String escapeJava(String str) {
return escapeJavaStyleString(str, false);
}
/**
* <p>Escapes the characters in a <code>String</code> using Java String rules to
* a <code>Writer</code>.</p>
*
* <p>A <code>null</code> string input has no effect.</p>
*
* @see #escapeJava(java.lang.String)
* @param out Writer to write escaped string into
* @param str String to escape values in, may be null
* @throws IllegalArgumentException if the Writer is <code>null</code>
* @throws IOException if error occurs on undelying Writer
*/
public static void escapeJava(Writer out, String str) throws IOException {
escapeJavaStyleString(out, str, false);
}
/**
* <p>Escapes the characters in a <code>String</code> using JavaScript String rules.</p>
* <p>Escapes any values it finds into their JavaScript String form.
* Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
*
* <p>So a tab becomes the characters <code>'\\'</code> and
* <code>'t'</code>.</p>
*
* <p>The only difference between Java strings and JavaScript strings
* is that in JavaScript, a single quote must be escaped.</p>
*
* <p>Example:
* <pre>
* input string: He didn't say, "Stop!"
* output string: He didn\'t say, \"Stop!\"
* </pre>
* </p>
*
* @param str String to escape values in, may be null
* @return String with escaped values, <code>null</code> if null string input
*/
public static String escapeJavaScript(String str) {
return escapeJavaStyleString(str, true);
}
/**
* <p>Escapes the characters in a <code>String</code> using JavaScript String rules
* to a <code>Writer</code>.</p>
*
* <p>A <code>null</code> string input has no effect.</p>
*
* @see #escapeJavaScript(java.lang.String)
* @param out Writer to write escaped string into
* @param str String to escape values in, may be null
* @throws IllegalArgumentException if the Writer is <code>null</code>
* @throws IOException if error occurs on undelying Writer
**/
public static void escapeJavaScript(Writer out, String str) throws IOException {
escapeJavaStyleString(out, str, true);
}
private static String escapeJavaStyleString(String str, boolean escapeSingleQuotes) {
if (str == null) {
return null;
}
try {
StringPrintWriter writer = new StringPrintWriter(str.length() * 2);
escapeJavaStyleString(writer, str, escapeSingleQuotes);
return writer.getString();
} catch (IOException ioe) {
// this should never ever happen while writing to a StringWriter
ioe.printStackTrace();
return null;
}
}
private static void escapeJavaStyleString(Writer out, String str, boolean escapeSingleQuote) throws IOException {
if (out == null) {
throw new IllegalArgumentException("The Writer must not be null");
}
if (str == null) {
return;
}
int sz;
sz = str.length();
for (int i = 0; i < sz; i++) {
char ch = str.charAt(i);
// handle unicode
if (ch > 0xfff) {
out.write("\\u" + hex(ch));
} else if (ch > 0xff) {
out.write("\\u0" + hex(ch));
} else if (ch > 0x7f) {
out.write("\\u00" + hex(ch));
} else if (ch < 32) {
switch (ch) {
case '\b':
out.write('\\');
out.write('b');
break;
case '\n':
out.write('\\');
out.write('n');
break;
case '\t':
out.write('\\');
out.write('t');
break;
case '\f':
out.write('\\');
out.write('f');
break;
case '\r':
out.write('\\');
out.write('r');
break;
default :
if (ch > 0xf) {
out.write("\\u00" + hex(ch));
} else {
out.write("\\u000" + hex(ch));
}
break;
}
} else {
switch (ch) {
case '\'':
if (escapeSingleQuote) out.write('\\');
out.write('\'');
break;
case '"':
out.write('\\');
out.write('"');
break;
case '\\':
out.write('\\');
out.write('\\');
break;
default :
out.write(ch);
break;
}
}
}
}
/**
* <p>Returns an upper case hexadecimal <code>String</code> for the given
* character.</p>
*
* @param ch The character to convert.
* @return An upper case hexadecimal <code>String</code>
*/
private static String hex(char ch) {
return Integer.toHexString(ch).toUpperCase();
}
/**
* <p>Unescapes any Java literals found in the <code>String</code>.
* For example, it will turn a sequence of <code>'\'</code> and
* <code>'n'</code> into a newline character, unless the <code>'\'</code>
* is preceded by another <code>'\'</code>.</p>
*
* @param str the <code>String</code> to unescape, may be null
* @return a new unescaped <code>String</code>, <code>null</code> if null string input
*/
public static String unescapeJava(String str) {
if (str == null) {
return null;
}
try {
StringPrintWriter writer = new StringPrintWriter(str.length());
unescapeJava(writer, str);
return writer.getString();
} catch (IOException ioe) {
// this should never ever happen while writing to a StringWriter
ioe.printStackTrace();
return null;
}
}
/**
* <p>Unescapes any Java literals found in the <code>String</code> to a
* <code>Writer</code>.</p>
*
* <p>For example, it will turn a sequence of <code>'\'</code> and
* <code>'n'</code> into a newline character, unless the <code>'\'</code>
* is preceded by another <code>'\'</code>.</p>
*
* <p>A <code>null</code> string input has no effect.</p>
*
* @param out the <code>Writer</code> used to output unescaped characters
* @param str the <code>String</code> to unescape, may be null
* @throws IllegalArgumentException if the Writer is <code>null</code>
*/
public static void unescapeJava(Writer out, String str) throws IOException {
if (out == null) {
throw new IllegalArgumentException("The Writer must not be null");
}
if (str == null) {
return;
}
int sz = str.length();
StringBuffer unicode = new StringBuffer(4);
boolean hadSlash = false;
boolean inUnicode = false;
for (int i = 0; i < sz; i++) {
char ch = str.charAt(i);
if (inUnicode) {
// if in unicode, then we're reading unicode
// values in somehow
unicode.append(ch);
if (unicode.length() == 4) {
// unicode now contains the four hex digits
// which represents our unicode chacater
try {
int value = Integer.parseInt(unicode.toString(), 16);
out.write((char) value);
unicode.setLength(0);
inUnicode = false;
hadSlash = false;
} catch (NumberFormatException nfe) {
throw new NestableRuntimeException("Unable to parse unicode value: " + unicode, nfe);
}
}
continue;
}
if (hadSlash) {
// handle an escaped value
hadSlash = false;
switch (ch) {
case '\\':
out.write('\\');
break;
case '\'':
out.write('\'');
break;
case '\"':
out.write('"');
break;
case 'r':
out.write('\r');
break;
case 'f':
out.write('\f');
break;
case 't':
out.write('\t');
break;
case 'n':
out.write('\n');
break;
case 'b':
out.write('\b');
break;
case 'u':
{
// uh-oh, we're in unicode country....
inUnicode = true;
break;
}
default :
out.write(ch);
break;
}
continue;
} else if (ch == '\\') {
hadSlash = true;
continue;
}
out.write(ch);
}
if (hadSlash) {
// then we're in the weird case of a \ at the end of the
// string, let's output it anyway.
out.write('\\');
}
}
/**
* <p>Unescapes any JavaScript literals found in the <code>String</code>.</p>
*
* <p>For example, it will turn a sequence of <code>'\'</code> and <code>'n'</code>
* into a newline character, unless the <code>'\'</code> is preceded by another
* <code>'\'</code>.</p>
*
* @see #unescapeJava(String)
* @param str the <code>String</code> to unescape, may be null
* @return A new unescaped <code>String</code>, <code>null</code> if null string input
*/
public static String unescapeJavaScript(String str) {
return unescapeJava(str);
}
/**
* <p>Unescapes any JavaScript literals found in the <code>String</code> to a
* <code>Writer</code>.</p>
*
* <p>For example, it will turn a sequence of <code>'\'</code> and <code>'n'</code>
* into a newline character, unless the <code>'\'</code> is preceded by another
* <code>'\'</code>.</p>
*
* <p>A <code>null</code> string input has no effect.</p>
*
* @see #unescapeJava(Writer,String)
* @param out the <code>Writer</code> used to output unescaped characters
* @param str the <code>String</code> to unescape, may be null
* @throws IllegalArgumentException if the Writer is <code>null</code>
*/
public static void unescapeJavaScript(Writer out, String str) throws IOException {
unescapeJava(out, str);
}
// HTML and XML
//--------------------------------------------------------------------------
/**
* <p>Escapes the characters in a <code>String</code> using HTML entities.</p>
*
* <p>
* For example: <tt>"bread" & "butter"</tt> => <tt>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</tt>.
* </p>
*
* <p>Supports all known HTML 4.0 entities, including funky accents.</p>
*
* @param str the <code>String</code> to escape, may be null
* @return a new escaped <code>String</code>, <code>null</code> if null string input
*
* @see #unescapeHtml(String)
* @see </br><a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
* @see </br><a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
* @see </br><a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
* @see </br><a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
* @see </br><a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
**/
public static String escapeHtml(String str) {
if (str == null) {
return null;
}
//todo: add a version that takes a Writer
//todo: rewrite underlying method to use a Writer instead of a StringBuffer
return Entities.HTML40.escape(str);
}
/**
* <p>Unescapes a string containing entity escapes to a string
* containing the actual Unicode characters corresponding to the
* escapes. Supports HTML 4.0 entities.</p>
*
* <p>For example, the string "&amp;lt;Fran&amp;ccedil;ais&amp;gt;"
* will become "&lt;Fran&ccedil;ais&gt;"</p>
*
* <p>If an entity is unrecognized, it is left alone, and inserted
* verbatim into the result string. e.g. "&amp;gt;&amp;zzzz;x" will
* become "&gt;&amp;zzzz;x".</p>
*
* @param str the <code>String</code> to unescape, may be null
* @return a new unescaped <code>String</code>, <code>null</code> if null string input
* @see #escapeHtml(String)
**/
public static String unescapeHtml(String str) {
if (str == null) {
return null;
}
return Entities.HTML40.unescape(str);
}
/**
* <p>Escapes the characters in a <code>String</code> using XML entities.</p>
*
* <p>For example: <tt>"bread" & "butter"</tt> =>
* <tt>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</tt>.
* </p>
*
* <p>Supports only the four basic XML entities (gt, lt, quot, amp).
* Does not support DTDs or external entities.</p>
*
* @param str the <code>String</code> to escape, may be null
* @return a new escaped <code>String</code>, <code>null</code> if null string input
* @see #unescapeXml(java.lang.String)
**/
public static String escapeXml(String str) {
if (str == null) {
return null;
}
return Entities.XML.escape(str);
}
/**
* <p>Unescapes a string containing XML entity escapes to a string
* containing the actual Unicode characters corresponding to the
* escapes.</p>
*
* <p>Supports only the four basic XML entities (gt, lt, quot, amp).
* Does not support DTDs or external entities.</p>
*
* @param str the <code>String</code> to unescape, may be null
* @return a new unescaped <code>String</code>, <code>null</code> if null string input
* @see #escapeXml(String)
**/
public static String unescapeXml(String str) {
if (str == null) {
return null;
}
return Entities.XML.unescape(str);
}
/**
* <p>Escapes the characters in a <code>String</code> to be suitable to pass to
* an SQL query.</p>
*
* <p>For example,
* <pre>statement.executeQuery("SELECT * FROM MOVIES WHERE TITLE='" +
* StringEscapeUtils.escapeSql("McHale's Navy") +
* "'");</pre>
* </p>
*
* <p>At present, this method only turns single-quotes into doubled single-quotes
* (<code>"McHale's Navy"</code> => <code>"McHale''s Navy"</code>). It does not
* handle the cases of percent (%) or underscore (_) for use in LIKE clauses.</p>
*
* see http://www.jguru.com/faq/view.jsp?EID=8881
* @param str the string to escape, may be null
* @return a new String, escaped for SQL, <code>null</code> if null string input
*/
public static String escapeSql(String str) {
if (str == null) {
return null;
}
return StringUtils.replace(str, "'", "''");
}
}