blob: 5f0e034aeb257032fd32cb24cc71030a5a18ebf3 [file] [log] [blame]
/*
* Copyright 2006 Sony Computer Entertainment Inc.
*
* Licensed under the MIT Open Source License, for details please see license.txt or the website
* http://www.opensource.org/licenses/mit-license.php
*
*/
#include <algorithm>
#include <dae.h>
#include <dae/daeURI.h>
#include <ctype.h>
#include <dae/daeDocument.h>
#include <dae/daeErrorHandler.h>
#include <dae/daeUtils.h>
#include <pcrecpp.h>
using namespace std;
using namespace cdom;
void daeURI::initialize() {
reset();
container = NULL;
}
daeURI::~daeURI() { }
daeURI::daeURI(DAE& dae) : dae(&dae) {
initialize();
}
daeURI::daeURI(DAE& dae, const string& uriStr, daeBool nofrag) : dae(&dae) {
initialize();
if (nofrag) {
size_t pos = uriStr.find_last_of('#');
if (pos != string::npos) {
set(uriStr.substr(0, pos));
return;
}
}
set(uriStr);
}
daeURI::daeURI(const daeURI& baseURI, const string& uriStr) : dae(baseURI.getDAE())
{
initialize();
set(uriStr, &baseURI);
}
daeURI::daeURI(const daeURI& copyFrom_) : dae(copyFrom_.getDAE()), container(NULL)
{
initialize();
copyFrom(copyFrom_);
}
daeURI::daeURI(daeElement& container_, const std::string& uriStr)
: dae(container_.getDAE())
{
initialize();
container = &container_;
set(uriStr);
}
daeURI::daeURI(DAE& dae, daeElement& container_, const string& uriStr)
: dae(&dae)
{
initialize();
container = &container_;
set(uriStr);
}
void
daeURI::copyFrom(const daeURI& copyFrom)
{
if (!container)
container = copyFrom.container;
set(copyFrom.originalStr());
}
daeURI& daeURI::operator=(const daeURI& other) {
copyFrom(other);
return *this;
}
daeURI& daeURI::operator=(const string& uriStr) {
set(uriStr);
return *this;
}
void daeURI::reset() {
// Clear everything except the container, which doesn't change for the lifetime of the daeURI
uriString = "";
originalURIString = "";
_scheme = "";
_authority = "";
_path = "";
_query = "";
_fragment = "";
}
DAE* daeURI::getDAE() const {
return dae;
}
const string& daeURI::str() const {
return uriString;
}
const string& daeURI::originalStr() const {
return originalURIString;
}
daeString daeURI::getURI() const {
return str().c_str();
}
daeString daeURI::getOriginalURI() const {
return originalStr().c_str();
}
namespace {
void parsePath(const string& path,
/* out */ string& dir,
/* out */ string& baseName,
/* out */ string& extension) {
// !!!steveT Currently, if we have a file name that begins with a '.', as in
// ".emacs", that will be treated as having no base name with an extension
// of ".emacs". We might want to change this behavior, so that the base name
// is considered ".emacs" and the extension is empty. I think this is more
// in line with what path parsers in other libraries/languages do, and it
// more accurately reflects the intended structure of the file name.
// The following implementation cannot handle paths like this:
// /tmp/se.3/file
//static pcrecpp::RE re("(.*/)?([^.]*)?(\\..*)?");
//dir = baseName = extension = "";
//re.FullMatch(path, &dir, &baseName, &extension);
static pcrecpp::RE findDir("(.*/)?(.*)?");
static pcrecpp::RE findExt("([^.]*)?(\\..*)?");
string tmpFile;
dir = baseName = extension = tmpFile = "";
findDir.PartialMatch(path, &dir, &tmpFile);
findExt.PartialMatch(tmpFile, &baseName, &extension);
}
}
void daeURI::set(const string& uriStr_, const daeURI* baseURI) {
// We make a copy of the uriStr so that set(originalURIString, ...) works properly.
string uriStr = uriStr_;
reset();
originalURIString = uriStr;
if (!parseUriRef(uriStr, _scheme, _authority, _path, _query, _fragment)) {
reset();
return;
}
validate(baseURI);
}
void daeURI::set(const string& scheme_,
const string& authority_,
const string& path_,
const string& query_,
const string& fragment_,
const daeURI* baseURI)
{
set(assembleUri(scheme_, authority_, path_, query_, fragment_), baseURI);
}
void daeURI::setURI(daeString _URIString, const daeURI* baseURI) {
string uriStr = _URIString ? _URIString : "";
set(uriStr, baseURI);
}
const string& daeURI::scheme() const { return _scheme; }
const string& daeURI::authority() const { return _authority; }
const string& daeURI::path() const { return _path; }
const string& daeURI::query() const { return _query; }
const string& daeURI::fragment() const { return _fragment; }
const string& daeURI::id() const { return fragment(); }
namespace {
string addSlashToEnd(const string& s) {
return (!s.empty() && s[s.length()-1] != '/') ? s + '/' : s;
}
}
void daeURI::pathComponents(string& dir, string& baseName, string& ext) const {
parsePath(_path, dir, baseName, ext);
}
string daeURI::pathDir() const {
string dir, base, ext;
parsePath(_path, dir, base, ext);
return dir;
}
string daeURI::pathFileBase() const {
string dir, base, ext;
parsePath(_path, dir, base, ext);
return base;
}
string daeURI::pathExt() const {
string dir, base, ext;
parsePath(_path, dir, base, ext);
return ext;
}
string daeURI::pathFile() const {
string dir, base, ext;
parsePath(_path, dir, base, ext);
return base + ext;
}
void daeURI::path(const string& dir, const string& baseName, const string& ext) {
path(addSlashToEnd(dir) + baseName + ext);
}
void daeURI::pathDir(const string& dir) {
string tmp, base, ext;
parsePath(_path, tmp, base, ext);
path(addSlashToEnd(dir), base, ext);
}
void daeURI::pathFileBase(const string& baseName) {
string dir, tmp, ext;
parsePath(_path, dir, tmp, ext);
path(dir, baseName, ext);
}
void daeURI::pathExt(const string& ext) {
string dir, base, tmp;
parsePath(_path, dir, base, tmp);
path(dir, base, ext);
}
void daeURI::pathFile(const string& file) {
string dir, base, ext;
parsePath(_path, dir, base, ext);
path(dir, file, "");
}
daeString daeURI::getScheme() const { return _scheme.c_str(); }
daeString daeURI::getProtocol() const { return getScheme(); }
daeString daeURI::getAuthority() const { return _authority.c_str(); }
daeString daeURI::getPath() const { return _path.c_str(); }
daeString daeURI::getQuery() const { return _query.c_str(); }
daeString daeURI::getFragment() const { return _fragment.c_str(); }
daeString daeURI::getID() const { return getFragment(); }
daeBool daeURI::getPath(daeChar *dest, daeInt size) const {
if (int(_path.length()) < size) {
strcpy(dest, _path.c_str());
return true;
}
return false;
}
void daeURI::scheme(const string& scheme_) { set(scheme_, _authority, _path, _query, _fragment); };
void daeURI::authority(const string& authority_) { set(_scheme, authority_, _path, _query, _fragment); }
void daeURI::path(const string& path_) { set(_scheme, _authority, path_, _query, _fragment); }
void daeURI::query(const string& query_) { set(_scheme, _authority, _path, query_, _fragment); }
void daeURI::fragment(const string& fragment_) { set(_scheme, _authority, _path, _query, fragment_); }
void daeURI::id(const string& id) { fragment(id); }
void
daeURI::print()
{
fprintf(stderr,"URI(%s)\n",uriString.c_str());
fprintf(stderr,"scheme = %s\n",_scheme.c_str());
fprintf(stderr,"authority = %s\n",_authority.c_str());
fprintf(stderr,"path = %s\n",_path.c_str());
fprintf(stderr,"query = %s\n",_query.c_str());
fprintf(stderr,"fragment = %s\n",_fragment.c_str());
fprintf(stderr,"URI without base = %s\n",originalURIString.c_str());
fflush(stderr);
}
namespace {
void normalize(string& path) {
daeURI::normalizeURIPath(const_cast<char*>(path.c_str()));
path = path.substr(0, strlen(path.c_str()));
}
}
void
daeURI::validate(const daeURI* baseURI)
{
// If no base URI was supplied, use the container's document URI. If there's
// no container or the container doesn't have a doc URI, use the application
// base URI.
if (!baseURI) {
if (container)
{
if (container->getDocument())
{
if (container->getDocument()->isZAERootDocument())
baseURI = &container->getDocument()->getExtractedFileURI();
else
baseURI = container->getDocumentURI();
}
}
if (!baseURI)
baseURI = &dae->getBaseURI();
if (this == baseURI)
return;
}
// This is rewritten according to the updated rfc 3986
if (!_scheme.empty()) // if defined(R.scheme) then
{
// Everything stays the same except path which we normalize
// T.scheme = R.scheme;
// T.authority = R.authority;
// T.path = remove_dot_segments(R.path);
// T.query = R.query;
normalize(_path);
}
else
{
if (!_authority.empty()) // if defined(R.authority) then
{
// Authority and query stay the same, path is normalized
// T.authority = R.authority;
// T.path = remove_dot_segments(R.path);
// T.query = R.query;
normalize(_path);
}
else
{
if (_path.empty()) // if (R.path == "") then
{
// T.path = Base.path;
_path = baseURI->_path;
//if defined(R.query) then
// T.query = R.query;
//else
// T.query = Base.query;
//endif;
if (_query.empty())
_query = baseURI->_query;
}
else
{
if (_path[0] == '/') // if (R.path starts-with "/") then
{
// T.path = remove_dot_segments(R.path);
normalize(_path);
}
else
{
// T.path = merge(Base.path, R.path);
if (!baseURI->_authority.empty() && baseURI->_path.empty()) // authority defined, path empty
_path.insert(0, "/");
else {
string dir, baseName, ext;
parsePath(baseURI->_path, dir, baseName, ext);
_path = dir + _path;
}
// T.path = remove_dot_segments(T.path);
normalize(_path);
}
// T.query = R.query;
}
// T.authority = Base.authority;
_authority = baseURI->_authority;
}
// T.scheme = Base.scheme;
_scheme = baseURI->_scheme;
}
// T.fragment = R.fragment;
// Reassemble all this into a string version of the URI
uriString = assembleUri(_scheme, _authority, _path, _query, _fragment);
}
daeElementRef daeURI::getElement() const {
return internalResolveElement();
}
daeElement* daeURI::internalResolveElement() const {
if (uriString.empty())
return NULL;
return dae->getURIResolvers().resolveElement(*this);
}
void daeURI::resolveElement() { }
void daeURI::setContainer(daeElement* cont) {
container = cont;
// Since we have a new container element, the base URI may have changed. Re-resolve.
set(originalURIString);
}
daeBool daeURI::isExternalReference() const {
if (uriString.empty())
return false;
if (container && container->getDocumentURI()) {
daeURI* docURI = container->getDocumentURI();
if (_path != docURI->_path ||
_scheme != docURI->_scheme ||
_authority != docURI->_authority) {
return true;
}
}
return false;
}
daeDocument* daeURI::getReferencedDocument() const {
string doc = assembleUri(_scheme, _authority, _path, "", "");
return dae->getDatabase()->getDocument(doc.c_str(), true);
}
daeURI::ResolveState daeURI::getState() const {
return uriString.empty() ? uri_empty : uri_loaded;
}
void daeURI::setState(ResolveState newState) { }
// This code is loosely based on the RFC 2396 normalization code from
// libXML. Specifically it does the RFC steps 6.c->6.g from section 5.2
// The path is modified in place, there is no error return.
void daeURI::normalizeURIPath(char* path)
{
char *cur, // location we are currently processing
*out; // Everything from this back we are done with
// Return if the path pointer is null
if (path == NULL) return;
// Skip any initial / characters to get us to the start of the first segment
for(cur=path; *cur == '/'; cur++);
// Return if we hit the end of the string
if (*cur == 0) return;
// Keep everything we've seen so far.
out = cur;
// Analyze each segment in sequence for cases (c) and (d).
while (*cur != 0)
{
// (c) All occurrences of "./", where "." is a complete path segment, are removed from the buffer string.
if ((*cur == '.') && (*(cur+1) == '/'))
{
cur += 2;
// If there were multiple slashes, skip them too
while (*cur == '/') cur++;
continue;
}
// (d) If the buffer string ends with "." as a complete path segment, that "." is removed.
if ((*cur == '.') && (*(cur+1) == 0))
break;
// If we passed the above tests copy the segment to the output side
while (*cur != '/' && *cur != 0)
{
*(out++) = *(cur++);
}
if(*cur != 0)
{
// Skip any occurrances of // at the end of the segment
while ((*cur == '/') && (*(cur+1) == '/')) cur++;
// Bring the last character in the segment (/ or a null terminator) into the output
*(out++) = *(cur++);
}
}
*out = 0;
// Restart at the beginning of the first segment for the next part
for(cur=path; *cur == '/'; cur++);
if (*cur == 0) return;
// Analyze each segment in sequence for cases (e) and (f).
//
// e) All occurrences of "<segment>/../", where <segment> is a
// complete path segment not equal to "..", are removed from the
// buffer string. Removal of these path segments is performed
// iteratively, removing the leftmost matching pattern on each
// iteration, until no matching pattern remains.
//
// f) If the buffer string ends with "<segment>/..", where <segment>
// is a complete path segment not equal to "..", that
// "<segment>/.." is removed.
//
// To satisfy the "iterative" clause in (e), we need to collapse the
// string every time we find something that needs to be removed. Thus,
// we don't need to keep two pointers into the string: we only need a
// "current position" pointer.
//
while (true)
{
char *segp, *tmp;
// At the beginning of each iteration of this loop, "cur" points to
// the first character of the segment we want to examine.
// Find the end of the current segment.
for(segp = cur;(*segp != '/') && (*segp != 0); ++segp);
// If this is the last segment, we're done (we need at least two
// segments to meet the criteria for the (e) and (f) cases).
if (*segp == 0)
break;
// If the first segment is "..", or if the next segment _isn't_ "..",
// keep this segment and try the next one.
++segp;
if (((*cur == '.') && (cur[1] == '.') && (segp == cur+3))
|| ((*segp != '.') || (segp[1] != '.')
|| ((segp[2] != '/') && (segp[2] != 0))))
{
cur = segp;
continue;
}
// If we get here, remove this segment and the next one and back up
// to the previous segment (if there is one), to implement the
// "iteratively" clause. It's pretty much impossible to back up
// while maintaining two pointers into the buffer, so just compact
// the whole buffer now.
// If this is the end of the buffer, we're done.
if (segp[2] == 0)
{
*cur = 0;
break;
}
// Strings overlap during this copy, but not in a bad way, just avoid using strcpy
tmp = cur;
segp += 3;
while ((*(tmp++) = *(segp++)) != 0);
// If there are no previous segments, then keep going from here.
segp = cur;
while ((segp > path) && (*(--segp) == '/'));
if (segp == path)
continue;
// "segp" is pointing to the end of a previous segment; find it's
// start. We need to back up to the previous segment and start
// over with that to handle things like "foo/bar/../..". If we
// don't do this, then on the first pass we'll remove the "bar/..",
// but be pointing at the second ".." so we won't realize we can also
// remove the "foo/..".
for(cur = segp;(cur > path) && (*(cur-1) != '/'); cur--);
}
*out = 0;
// g) If the resulting buffer string still begins with one or more
// complete path segments of "..", then the reference is
// considered to be in error. Implementations may handle this
// error by retaining these components in the resolved path (i.e.,
// treating them as part of the final URI), by removing them from
// the resolved path (i.e., discarding relative levels above the
// root), or by avoiding traversal of the reference.
//
// We discard them from the final path.
if (*path == '/')
{
for(cur=path; (*cur == '/') && (cur[1] == '.') && (cur[2] == '.') && ((cur[3] == '/') || (cur[3] == 0)); cur += 3);
if (cur != path)
{
for(out=path; *cur != 0; *(out++) = *(cur++));
*out = 0;
}
}
return;
}
// This function will take a resolved URI and create a version of it that is relative to
// another existing URI. The new URI is stored in the "originalURI"
int daeURI::makeRelativeTo(const daeURI* relativeToURI)
{
// Can only do this function if both URIs have the same scheme and authority
if (_scheme != relativeToURI->_scheme || _authority != relativeToURI->_authority)
return DAE_ERR_INVALID_CALL;
// advance till we find a segment that doesn't match
const char *this_path = getPath();
const char *relativeTo_path = relativeToURI->getPath();
const char *this_slash = this_path;
const char *relativeTo_slash = relativeTo_path;
while((*this_path == *relativeTo_path) && *this_path)
{
if(*this_path == '/')
{
this_slash = this_path;
relativeTo_slash = relativeTo_path;
}
this_path++;
relativeTo_path++;
}
// Decide how many ../ segments are needed (Filepath should always end in a /)
int segment_count = 0;
relativeTo_slash++;
while(*relativeTo_slash != 0)
{
if(*relativeTo_slash == '/')
segment_count ++;
relativeTo_slash++;
}
this_slash++;
string newPath;
for (int i = 0; i < segment_count; i++)
newPath += "../";
newPath += this_slash;
set("", "", newPath, _query, _fragment, relativeToURI);
return(DAE_OK);
}
daeBool daeURIResolver::_loadExternalDocuments = true;
daeURIResolver::daeURIResolver(DAE& dae) : dae(&dae) { }
daeURIResolver::~daeURIResolver() { }
void daeURIResolver::setAutoLoadExternalDocuments( daeBool load )
{
_loadExternalDocuments = load;
}
daeBool daeURIResolver::getAutoLoadExternalDocuments()
{
return _loadExternalDocuments;
}
daeURIResolverList::daeURIResolverList() { }
daeURIResolverList::~daeURIResolverList() {
for (size_t i = 0; i < resolvers.getCount(); i++)
delete resolvers[i];
}
daeTArray<daeURIResolver*>& daeURIResolverList::list() {
return resolvers;
}
daeElement* daeURIResolverList::resolveElement(const daeURI& uri) {
for (size_t i = 0; i < resolvers.getCount(); i++)
if (daeElement* elt = resolvers[i]->resolveElement(uri))
return elt;
return NULL;
}
// Returns true if parsing succeeded, false otherwise. Parsing can fail if the uri
// reference isn't properly formed.
bool cdom::parseUriRef(const string& uriRef,
string& scheme,
string& authority,
string& path,
string& query,
string& fragment) {
// This regular expression for parsing URI references comes from the URI spec:
// http://tools.ietf.org/html/rfc3986#appendix-B
static pcrecpp::RE re("^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?");
string s1, s3, s6, s8;
if (re.FullMatch(uriRef, &s1, &scheme, &s3, &authority, &path, &s6, &query, &s8, &fragment))
return true;
return false;
}
namespace {
string safeSubstr(const string& s, size_t offset, size_t length) {
string result = s.substr(offset, min(length, s.length() - offset));
result.resize(length, '\0');
return result;
}
}
string cdom::assembleUri(const string& scheme,
const string& authority,
const string& path,
const string& query,
const string& fragment,
bool forceLibxmlCompatible) {
string p = safeSubstr(path, 0, 3);
bool libxmlHack = forceLibxmlCompatible && scheme == "file";
bool uncPath = false;
string uri;
if (!scheme.empty())
uri += scheme + ":";
if (!authority.empty() || libxmlHack || (p[0] == '/' && p[1] == '/'))
uri += "//";
if (!authority.empty()) {
if (libxmlHack) {
// We have a UNC path URI of the form file://otherMachine/file.dae.
// Convert it to file://///otherMachine/file.dae, which is how libxml
// does UNC paths.
uri += "///" + authority;
uncPath = true;
}
else {
uri += authority;
}
}
if (!uncPath && libxmlHack && getSystemType() == Windows) {
// We have to be delicate in how we pass absolute path URIs to libxml on Windows.
// If the path is an absolute path with no drive letter, add an extra slash to
// appease libxml.
if (p[0] == '/' && p[1] != '/' && p[2] != ':') {
uri += "/";
}
}
uri += path;
if (!query.empty())
uri += "?" + query;
if (!fragment.empty())
uri += "#" + fragment;
return uri;
}
string cdom::fixUriForLibxml(const string& uriRef) {
string scheme, authority, path, query, fragment;
cdom::parseUriRef(uriRef, scheme, authority, path, query, fragment);
return assembleUri(scheme, authority, path, query, fragment, true);
}
string cdom::nativePathToUri(const string& nativePath, systemType type) {
string uri = nativePath;
if (type == Windows) {
// Convert "c:\" to "/c:/"
if (uri.length() >= 2 && isalpha(uri[0]) && uri[1] == ':')
uri.insert(0, "/");
// Convert backslashes to forward slashes
uri = replace(uri, "\\", "/");
}
// Convert spaces to %20
uri = replace(uri, " ", "%20");
return uri;
}
string cdom::filePathToUri(const string& filePath) {
return nativePathToUri(filePath);
}
string cdom::uriToNativePath(const string& uriRef, systemType type) {
string scheme, authority, path, query, fragment;
parseUriRef(uriRef, scheme, authority, path, query, fragment);
// Make sure we have a file scheme URI, or that it doesn't have a scheme
if (!scheme.empty() && scheme != "file")
return "";
string filePath;
if (type == Windows) {
if (!authority.empty())
filePath += string("\\\\") + authority; // UNC path
// Replace two leading slashes with one leading slash, so that
// ///otherComputer/file.dae becomes //otherComputer/file.dae and
// //folder/file.dae becomes /folder/file.dae
if (path.length() >= 2 && path[0] == '/' && path[1] == '/')
path.erase(0, 1);
// Convert "/C:/" to "C:/"
if (path.length() >= 3 && path[0] == '/' && path[2] == ':')
path.erase(0, 1);
// Convert forward slashes to back slashes
path = replace(path, "/", "\\");
}
filePath += path;
// Replace %20 with space
filePath = replace(filePath, "%20", " ");
return filePath;
}
string cdom::uriToFilePath(const string& uriRef) {
return uriToNativePath(uriRef);
}