blob: 6e63cd06196fe64a53292a0f1d6094d18a88dcbd [file] [log] [blame]
/*
* Copyright 2005-2006 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Sun designates this
* particular file as subject to the "Classpath" exception as provided
* by Sun in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*/
package com.sun.xml.internal.ws.encoding;
import com.sun.istack.internal.NotNull;
import com.sun.istack.internal.Nullable;
import com.sun.xml.internal.messaging.saaj.packaging.mime.MessagingException;
import com.sun.xml.internal.messaging.saaj.packaging.mime.internet.ContentType;
import com.sun.xml.internal.messaging.saaj.packaging.mime.internet.InternetHeaders;
import com.sun.xml.internal.messaging.saaj.packaging.mime.internet.ParseException;
import com.sun.xml.internal.ws.message.stream.StreamAttachment;
import com.sun.xml.internal.ws.util.ASCIIUtility;
import com.sun.xml.internal.ws.util.ByteArrayBuffer;
import javax.xml.ws.WebServiceException;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.BitSet;
import java.util.HashMap;
import java.util.Map;
/**
* Parses Mime multipart message into primary part and attachment parts. It
* parses the stream lazily as and when required.
*
* TODO need a list to keep all the attachments so that even if Content-Id is
* not there it is accounted
*
* @author Vivek Pandey
* @author Jitendra Kotamraju
*/
public final class MimeMultipartParser {
private final InputStream in;
private final String start;
private final byte[] boundaryBytes;
private final BitSet lastPartFound = new BitSet(1);
// current stream position, set to -1 on EOF
private int b = 0;
private final int[] bcs = new int[256];
private int[] gss;
private static final int BUFFER_SIZE = 4096;
private byte[] buffer = new byte[BUFFER_SIZE];
private byte[] prevBuffer = new byte[BUFFER_SIZE];
private boolean firstPart = true;
private final Map<String, StreamAttachment> attachments = new HashMap<String, StreamAttachment>();
private StreamAttachment root;
private int cidCounter = 0;
public MimeMultipartParser(InputStream in, String contentType) {
try {
ContentType ct = new ContentType(contentType);
String boundary = ct.getParameter("boundary");
if (boundary == null || boundary.equals("")) {
throw new WebServiceException("MIME boundary parameter not found" + contentType);
}
String bnd = "--" + boundary;
boundaryBytes = ASCIIUtility.getBytes(bnd);
start = ct.getParameter("start");
} catch (ParseException e) {
throw new WebServiceException(e);
}
//InputStream MUST support mark()
if (!in.markSupported()) {
this.in = new BufferedInputStream(in);
} else {
this.in = in;
}
}
/**
* Parses the stream and returns the root part. If start parameter is
* present in Content-Type, it is used to determine the root part, otherwise
* root part is the first part.
*
* @return StreamAttachment for root part
* null if root part cannot be found
*
*/
public @Nullable StreamAttachment getRootPart() {
if (root != null) {
return root;
}
while(!lastBodyPartFound() && (b != -1) && root == null) {
getNextPart();
}
return root;
}
/**
* Parses the entire stream and returns all MIME parts except root MIME part.
*
* @return Map<String, StreamAttachment> for all attachment parts
*/
public @NotNull Map<String, StreamAttachment> getAttachmentParts() {
while(!lastBodyPartFound() && (b != -1)) {
getNextPart();
}
return attachments;
}
/**
* This method can be called to get a matching MIME attachment part for the
* given contentId. It parses the stream until it finds a matching part.
*
* @return StreamAttachment attachment for contentId
* null if there is no attachment for contentId
*/
public @Nullable StreamAttachment getAttachmentPart(String contentId) throws IOException {
//first see if this attachment is already parsed, if so return it
StreamAttachment streamAttach = attachments.get(contentId);
if (streamAttach != null) {
return streamAttach;
}
//else parse the MIME parts till we get what we want
while (!lastBodyPartFound() && (b != -1)) {
streamAttach = getNextPart();
String newContentId = streamAttach.getContentId();
if (newContentId != null && newContentId.equals(contentId)){
return streamAttach;
}
}
return null; // Attachment is not found
}
/**
* Parses the stream and returns next available MIME part. This shouldn't
* be called if there are no MIME parts in the stream. Attachment
* part(not root part) is cached in the {@link Map}<{@link String},{@link StreamAttachment}>
* before returning the MIME part. It also finds the root part of the MIME
* package and assigns root variable.
*
* @return StreamAttachment next available MIME part
*
*/
private StreamAttachment getNextPart() {
assert !lastBodyPartFound();
try {
if (firstPart) {
compileBoundaryPattern();
// skip the first boundary of the MIME package
if (!skipPreamble()) {
throw new WebServiceException("Missing Start Boundary, or boundary does not start on a new line");
}
}
InternetHeaders ih = new InternetHeaders(in);
String[] contentTypes = ih.getHeader("content-type");
String contentType = (contentTypes != null) ? contentTypes[0] : "application/octet-stream";
String [] contentIds = ih.getHeader("content-id");
String mimeContentId = (contentIds != null) ? contentIds[0] : null;
String contentId = mimeContentId;
if(mimeContentId!=null && mimeContentId.length()>2) {
if(contentId.charAt(0)=='<') contentId=mimeContentId.substring(1,mimeContentId.length()-1);
}
ByteArrayBuffer bos = new ByteArrayBuffer();
b = readBody(bos);
StreamAttachment as = new StreamAttachment(bos, contentId, contentType);
if (start == null && firstPart) {
root = as; // Taking first part as root part
} else if (mimeContentId != null && start != null && start.equals(mimeContentId)) {
root = as; // root part as identified by start parameter
} else if (contentId != null) {
attachments.put(contentId, as); // Attachment part
} else {
++cidCounter;
attachments.put(""+cidCounter, as);
}
firstPart = false;
return as;
} catch(IOException ioe) {
throw new WebServiceException(ioe);
} catch(MessagingException me) {
throw new WebServiceException(me);
}
}
private int readBody(ByteArrayBuffer baos) throws IOException {
if (!findMimeBody(baos)) {
//TODO: i18n
throw new WebServiceException("Missing boundary delimitier ");
}
return b;
}
private boolean findMimeBody(ByteArrayBuffer out) throws IOException {
int i;
int l = boundaryBytes.length;
int lx = l - 1;
int bufferLength;
int s = 0;
byte[] tmp;
boolean first = true;
BitSet eof = new BitSet(1);
while (true) {
in.mark(l);
if (!first) {
tmp = prevBuffer;
prevBuffer = buffer;
buffer = tmp;
}
bufferLength = readNext(in, l, eof);
if (bufferLength == -1) {
b = -1;
if ((s == l)) {
out.write(prevBuffer, 0, s);
}
return true;
}
if (bufferLength < l) {
out.write(buffer, 0, bufferLength);
b = -1;
return true;
}
for (i = lx; i >= 0; i--) {
if (buffer[i] != boundaryBytes[i]) {
break;
}
}
if (i < 0) {
if (s > 0) {
// so if s == 1 : it must be an LF
// if s == 2 : it must be a CR LF
if (s <= 2) {
String crlf = new String(prevBuffer, 0, s);
if (!"\n".equals(crlf) && !"\r\n".equals(crlf)) {
throw new WebServiceException(
"Boundary characters encountered in part Body " +
"without a preceeding CRLF");
}
} else if (s > 2) {
if ((prevBuffer[s - 2] == '\r') && (prevBuffer[s - 1] == '\n')) {
out.write(prevBuffer, 0, s - 2);
} else if (prevBuffer[s - 1] == '\n') {
out.write(prevBuffer, 0, s - 1);
} else {
throw new WebServiceException(
"Boundary characters encountered in part Body " +
"without a preceeding CRLF");
}
}
}
// found the boundary, skip *LWSP-char and CRLF
if (!skipLWSPAndCRLF(in)) {
//throw new Exception(
// "Boundary does not terminate with CRLF");
}
return true;
}
if ((s > 0)) {
if (prevBuffer[s - 1] == (byte) 13) {
// if buffer[0] == (byte)10
if (buffer[0] == (byte) 10) {
int j;
for (j = lx - 1; j > 0; j--) {
if (buffer[j + 1] != boundaryBytes[j]) {
break;
}
}
if (j == 0) {
// matched the boundaryBytes excluding the last char of the boundaryBytes
// so dont write the CR into stream
out.write(prevBuffer, 0, s - 1);
} else {
out.write(prevBuffer, 0, s);
}
} else {
out.write(prevBuffer, 0, s);
}
} else {
out.write(prevBuffer, 0, s);
}
}
s = Math.max(i + 1 - bcs[buffer[i] & 0x7f], gss[i]);
in.reset();
in.skip(s);
if (first) {
first = false;
}
}
}
private boolean lastBodyPartFound() {
return lastPartFound.get(0);
}
private void compileBoundaryPattern() {
int l = boundaryBytes.length;
int i;
int j;
// Copied from J2SE 1.4 regex code
// java.util.regex.Pattern.java
// Initialise Bad Character Shift table
for (i = 0; i < l; i++) {
bcs[boundaryBytes[i]] = i + 1;
}
// Initialise Good Suffix Shift table
gss = new int[l];
NEXT:
for (i = l; i > 0; i--) {
// j is the beginning index of suffix being considered
for (j = l - 1; j >= i; j--) {
// Testing for good suffix
if (boundaryBytes[j] == boundaryBytes[j - i]) {
// boundaryBytes[j..len] is a good suffix
gss[j - 1] = i;
} else {
// No match. The array has already been
// filled up with correct values before.
continue NEXT;
}
}
while (j > 0) {
gss[--j] = i;
}
}
gss[l - 1] = 1;
}
private boolean skipPreamble() throws IOException {
if (!findBoundary()) {
return false;
}
if (lastPartFound.get(0)) {
throw new WebServiceException("Found closing boundary delimiter while trying to skip preamble");
}
return true;
}
private boolean findBoundary() throws IOException {
int i;
int l = boundaryBytes.length;
int lx = l - 1;
BitSet eof = new BitSet(1);
while (true) {
in.mark(l);
readNext(in, l, eof);
if (eof.get(0)) {
// End of stream
return false;
}
for (i = lx; i >= 0; i--) {
if (buffer[i] != boundaryBytes[i]) {
break;
}
}
if (i < 0) {
// found the boundary, skip *LWSP-char and CRLF
if (!skipLWSPAndCRLF(in)) {
throw new WebServiceException("Boundary does not terminate with CRLF");
}
return true;
}
int s = Math.max(i + 1 - bcs[buffer[i] & 0x7f], gss[i]);
in.reset();
in.skip(s);
}
}
private boolean skipLWSPAndCRLF(InputStream is) throws IOException {
b = is.read();
//looks like old impl allowed just a \n as well
if (b == '\n') {
return true;
}
if (b == '\r') {
b = is.read();
if (b == '\n') {
return true;
} else {
throw new WebServiceException(
"transport padding after a Mime Boundary should end in a CRLF, found CR only");
}
}
if (b == '-') {
b = is.read();
if (b != '-') {
throw new WebServiceException(
"Unexpected singular '-' character after Mime Boundary");
} else {
lastPartFound.flip(0);
// read the next char
b = is.read();
}
}
while ((b != -1) && ((b == ' ') || (b == '\t'))) {
b = is.read();
if (b == '\r') {
b = is.read();
if (b == '\n') {
return true;
}
}
}
if (b == -1) {
// the last boundary need not have CRLF
if (!lastPartFound.get(0)) {
throw new WebServiceException(
"End of Multipart Stream before encountering closing boundary delimiter");
}
return true;
}
return false;
}
private int readNext(InputStream is, int patternLength, BitSet eof) throws IOException {
int bufferLength = is.read(buffer, 0, patternLength);
if (bufferLength == -1) {
eof.flip(0);
} else if (bufferLength < patternLength) {
//repeatedly read patternLength - bufferLength
int temp ;
int i = bufferLength;
for (; i < patternLength; i++) {
temp = is.read();
if (temp == -1) {
eof.flip(0);
break;
}
buffer[i] = (byte) temp;
}
bufferLength = i;
}
return bufferLength;
}
}