blob: 55ce3c375b04884aca6a00b0337eb3aa7a54192e [file] [log] [blame]
/*
* Copyright (C) 2009, 2010, 2011, 2012 Research In Motion Limited. All rights reserved.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "RSSAtomParser.h"
#include "BlackBerryPlatformAssert.h"
#include "libxml/parser.h"
#include "libxml/xmlwriter.h"
#include <wtf/text/StringBuilder.h>
namespace WebCore {
static inline bool isRelativePath(const String& path)
{
return !(path.startsWith("/") || path.find(":/") != WTF::notFound);
}
RSSAtomLink::Type RSSAtomLink::relType()
{
if (m_typeInEnum != TypeUnknown)
return m_typeInEnum;
if (m_rel.isEmpty())
m_typeInEnum = TypeAlternate;
else {
String lowrel = m_rel.lower();
if (lowrel == "alternate")
m_typeInEnum = TypeAlternate;
else if (lowrel == "enclosure")
m_typeInEnum = TypeEnclosure;
else if (lowrel == "related")
m_typeInEnum = TypeRelated;
else if (lowrel == "self")
m_typeInEnum = TypeSelf;
else if (lowrel == "via")
m_typeInEnum = TypeVia;
else
m_typeInEnum = TypeUnsupported;
}
return m_typeInEnum;
}
RSSAtomParser::RSSAtomParser()
{
}
bool RSSAtomParser::parseBuffer(const char* buffer, int length, const char* url, const char* encoding)
{
m_url = KURL(blankURL(), url);
return parseXmlDoc(xmlReadMemory(buffer, length, url, encoding, XML_PARSE_NOBLANKS | XML_PARSE_NONET));
}
bool RSSAtomParser::parseXmlDoc(xmlDocPtr doc)
{
if (!doc)
return false;
xmlNode* node = xmlDocGetRootElement(doc);
if (!node) {
xmlFreeDoc(doc);
return false;
}
for (; node; node = node->next) {
String name(reinterpret_cast<const char*>(node->name));
name.makeLower();
if (name == "feed") {
m_root = parseFeed(node->children);
break;
}
}
xmlFreeDoc(doc);
return m_root;
}
bool RSSAtomParser::parseItemBaseAttribute(RSSItemBase* item, const String& name, xmlNode* node, const String& base)
{
if (name == "title")
item->m_title = textFromXMLNode(node);
else if (name == "id")
item->m_id = textFromXMLNode(node);
else if (name == "author")
item->m_author = parseAuthor(node);
else if (name == "updated")
item->m_updated = textFromXMLNode(node);
else if (name == "content")
item->m_description = parseContent(base, node);
else if (name == "published")
item->m_pubDate = textFromXMLNode(node);
else
return false;
return true;
}
RSSItem* RSSAtomParser::parseItem(xmlNode* node)
{
BLACKBERRY_ASSERT(node);
RSSItem* item = new RSSItem();
String base;
for (xmlAttr* attr = node->properties; attr; attr = attr->next) {
String name(reinterpret_cast<const char*>(attr->name));
name.makeLower();
if (name == "base")
base = textFromXMLAttr(attr);
}
node = node->children;
for (; node; node = node->next) {
String name(reinterpret_cast<const char*>(node->name));
name.makeLower();
if (parseItemBaseAttribute(item, name, node, base))
continue;
if (name == "link") {
RSSAtomLink* link = parseLink(node);
if (isRelativePath(link->m_href))
link->m_href = base + "/" + link->m_href;
switch (link->relType()) {
case RSSAtomLink::TypeAlternate:
item->m_link = link->m_href;
break;
case RSSAtomLink::TypeEnclosure:
BLACKBERRY_ASSERT(!item->m_enclosure);
if (!item->m_enclosure)
item->m_enclosure = enclosureFromLink(link);
break;
default:
break;
}
delete link;
} else if (name == "category")
item->m_categories.append(parseCategory(node));
}
return item;
}
RSSFeed* RSSAtomParser::parseFeed(xmlNode* node)
{
BLACKBERRY_ASSERT(node);
RSSFeed* feed = new RSSFeed();
for (; node; node = node->next) {
String name(reinterpret_cast<const char*>(node->name));
name.makeLower();
if (parseItemBaseAttribute(feed, name, node, emptyString()))
continue;
if (name == "entry")
feed->m_items.append(parseItem(node));
else if (name == "link") {
RSSAtomLink* link = parseLink(node);
if (link->relType() == RSSAtomLink::TypeAlternate)
feed->m_link = link->m_href;
delete link;
}
}
return feed;
}
RSSAtomLink* RSSAtomParser::parseLink(xmlNode* node)
{
BLACKBERRY_ASSERT(node);
RSSAtomLink* link = new RSSAtomLink();
for (xmlAttr* attr = node->properties; attr; attr = attr->next) {
String name(reinterpret_cast<const char*>(attr->name));
name.makeLower();
if (name == "href")
link->m_href = textFromXMLAttr(attr);
else if (name == "rel")
link->m_rel = textFromXMLAttr(attr);
else if (name == "type")
link->m_type = textFromXMLAttr(attr);
else if (name == "hreflang")
link->m_hreflang = textFromXMLAttr(attr);
else if (name == "title")
link->m_title = textFromXMLAttr(attr);
else if (name == "length")
link->m_length = textFromXMLAttr(attr);
}
return link;
}
RSSEnclosure* RSSAtomParser::enclosureFromLink(RSSAtomLink* link)
{
BLACKBERRY_ASSERT(link);
BLACKBERRY_ASSERT(link->relType() == RSSAtomLink::TypeEnclosure);
RSSEnclosure* enclosure = new RSSEnclosure();
enclosure->m_url = link->m_href;
enclosure->m_type = link->m_type;
enclosure->m_length = link->m_length;
return enclosure;
}
String RSSAtomParser::parseContent(const String& base, xmlNode* node)
{
// See: http://tools.ietf.org/html/rfc4287#page-16
BLACKBERRY_ASSERT(node);
// Why does Blackberry have its own RSS parser?
String content;
String type = "default";
String src;
for (xmlAttr* attr = node->properties; attr; attr = attr->next) {
String name(reinterpret_cast<const char*>(attr->name));
name.makeLower();
if (name == "type")
type = textFromXMLAttr(attr);
else if (name == "src")
src = textFromXMLAttr(attr);
}
if (!src.isEmpty()) {
if (isRelativePath(src))
src = base + "/" + src;
StringBuilder builder;
builder.appendLiteral("<a href=\"");
builder.append(src + "\">" + src + "</a>");
return builder.toString();
}
if (type == "text" || type.startsWith("text/"))
content = textFromXMLNode(node);
else if (type == "html")
content = textFromXMLNode(node);
else if (type == "xhtml") {
xmlBufferPtr buffer = xmlBufferCreate();
xmlNode * cur = node->children;
if (cur && cur->type == XML_ELEMENT_NODE) {
// Encoding of buffer is utf-8.
xmlNodeDump(buffer, cur->doc, cur, 0, 0);
StringBuilder builder;
if (!base.isEmpty()) {
builder.appendLiteral("<base href='");
builder.append(m_url.baseAsString());
builder.appendLiteral("/");
builder.append(base);
builder.appendLiteral("/' />");
}
builder.append((const char*)xmlBufferContent(buffer));
content = builder.toString();
}
xmlBufferFree(buffer);
} else if (type.endsWith("+xml") || type.endsWith("/xml"))
// FIXME: see atom spec 4.1.3.3.4.
content = textFromXMLNode(node);
else
content = textFromXMLNode(node);
return content;
}
String RSSAtomParser::parseAuthor(xmlNode* node)
{
BLACKBERRY_ASSERT(node);
String username;
String email;
for (node = node->children; node; node = node->next) {
String name(reinterpret_cast<const char*>(node->name));
name.makeLower();
if (name == "name")
username = textFromXMLNode(node);
else if (name == "email")
email = textFromXMLNode(node);
}
if (!email.isEmpty()) {
username = username + " (";
username = username + email;
username = username + ")";
}
return username;
}
String RSSAtomParser::parseCategory(xmlNode* node)
{
BLACKBERRY_ASSERT(node);
String category;
for (xmlAttr* attr = node->properties; attr; attr = attr->next) {
String name(reinterpret_cast<const char*>(attr->name));
name.makeLower();
// If there's a label, we use it, if not, use term attribute, as label is
// optional, but term is mandatory.
if (name == "label") {
category = textFromXMLAttr(attr);
break;
}
if (name == "term")
category = textFromXMLAttr(attr);
}
return category;
}
} // namespace WebCore