[prev in list] [next in list] [prev in thread] [next in thread] 

List:       kde-commits
Subject:    [kdepimlibs] syndication: In proper RSS 1.0, the items in the feed must be listed in the "items" seq
From:       Frank Osterfeld <frank.osterfeld () kdab ! com>
Date:       2012-03-27 19:34:33
Message-ID: 20120327193433.7087DA60A9 () git ! kde ! org
[Download RAW message or body]

Git commit 503d2e94d8fdd357bf723441035d4b0c49a20ec4 by Frank Osterfeld.
Committed on 27/03/2012 at 20:11.
Pushed by osterfeld into branch 'master'.

In proper RSS 1.0, the items in the feed must be listed in the "items" sequence, with \
the same URIs used in the item descriptions afterwards. If the URIs don't match, the \
sequence and items are not linked in the RDF, thus no item descriptions are found. \
This is a common error RDF feeds out there and  W3C's feed validator doesn't \
complain. Nobody groks RDF anyway, so don't rely on matching URIs. Instead, just \
ignore <items> and get all item resources by type as they occur in the feed.

BUG:295354

M  +52   -22   syndication/rdf/document.cpp
M  +6    -0    syndication/rdf/parser.cpp
A  +102  -0    syndication/tests/rdf/javaworld.xml
A  +86   -0    syndication/tests/rdf/javaworld.xml.expected
M  +2    -2    syndication/tests/rdf/tagesschau.de.xml

http://commits.kde.org/kdepimlibs/503d2e94d8fdd357bf723441035d4b0c49a20ec4

diff --git a/syndication/rdf/document.cpp b/syndication/rdf/document.cpp
index e336955..997e3c9 100644
--- a/syndication/rdf/document.cpp
+++ b/syndication/rdf/document.cpp
@@ -38,6 +38,8 @@
 
 #include <QtCore/QList>
 #include <QtCore/QString>
+#include <QtCore/QStringList>
+#include <QtCore/QVector>
 
 using namespace boost;
 
@@ -138,37 +140,65 @@ SyndicationInfo Document::syn() const
     return SyndicationInfo(resource());
 }
 
+
+struct SortItem {
+    Item item;
+    int index;
+};
+
+struct LessThanByIndex {
+    bool operator()(const SortItem& lhs, const SortItem& rhs) const {
+        return lhs.index < rhs.index;
+    }
+};
+
+static QList<Item> sortListToMatchSequence(QList<Item> items, const QStringList& \
uriSequence) { +    QVector<SortItem> toSort;
+    toSort.reserve(items.size());
+    Q_FOREACH(const Item& i, items) {
+        SortItem item;
+        item.item = i;
+        item.index = uriSequence.indexOf(i.resource()->uri());
+        toSort.append(item);
+    }
+    qSort(toSort.begin(), toSort.end(), LessThanByIndex());
+
+    int i = 0;
+    Q_FOREACH(const SortItem& sortItem, toSort) {
+        items[i] = sortItem.item;
+        i++;
+    }
+
+    return items;
+}
+
 QList<Item> Document::items() const
 {
     QList<Item> list;
-    if (!resource()->hasProperty(RSSVocab::self()->items()))
-        return list;
 
-    NodePtr n = resource()->property(RSSVocab::self()->items())->object();
-    if (n->isSequence())
-    {
-        Sequence* seq = static_cast<Sequence*>(n.get());
+    const QList<ResourcePtr> items = \
resource()->model().resourcesWithType(RSSVocab::self()->item()); +    DocumentPtr \
doccpy(new Document(*this)); +    Q_FOREACH (const ResourcePtr& i, items)
+        list.append(Item(i, doccpy));
 
-        const QList<NodePtr> items = seq->items();
-        QList<NodePtr>::ConstIterator it = items.begin();
-        QList<NodePtr>::ConstIterator end = items.end();
+    if (resource()->hasProperty(RSSVocab::self()->items())) {
+        NodePtr n = resource()->property(RSSVocab::self()->items())->object();
+        if (n->isSequence())
+        {
+            Sequence* seq = static_cast<Sequence*>(n.get());
 
-        DocumentPtr doccpy(new Document(*this));
+            const QList<NodePtr> seqItems = seq->items();
 
-        for ( ; it != end; ++it)
-        {
-            if ((*it)->isResource())
-            {
-                // well, we need it as ResourcePtr
-                // maybe this should go to the node
-                // interface ResourcePtr asResource()?
-                ResourcePtr ptr = \
                resource()->model().createResource((static_cast<Resource*>((*it).get()))->uri());
                
-
-                list.append(Item(ptr, doccpy));
-            }
-        }
+            QStringList uriSequence;
+            uriSequence.reserve(seqItems.size());
 
+            Q_FOREACH(const NodePtr& i, seqItems)
+                if (i->isResource())
+                    uriSequence.append(static_cast<Resource*>(i.get())->uri());
+           list = sortListToMatchSequence(list, uriSequence);
+        }
     }
+
     return list;
 }
 
diff --git a/syndication/rdf/parser.cpp b/syndication/rdf/parser.cpp
index 12b4c1c..f675236 100644
--- a/syndication/rdf/parser.cpp
+++ b/syndication/rdf/parser.cpp
@@ -164,6 +164,8 @@ void Parser::ParserPrivate::map09to10(Model model)
 
 void Parser::ParserPrivate::addSequenceFor09(Model model)
 {
+    //RDF 0.9 doesn't contain an item sequence, and the items don't have rdf:about, \
so add both +
     const QList<ResourcePtr> items = \
model.resourcesWithType(RSS09Vocab::self()->item());  
     if (items.isEmpty())
@@ -196,6 +198,10 @@ void Parser::ParserPrivate::addSequenceFor09(Model model)
     foreach (const ResourcePtr &i, sorted)
     {
         seq->append(i);
+        // add rdf:about (type)
+        model.addStatement(i, RDFVocab::self()->type(), RSSVocab::self()->item());
+
+        //add to items sequence
         model.addStatement(seq, RDFVocab::self()->li(), i);
     }
 }
diff --git a/syndication/tests/rdf/javaworld.xml \
b/syndication/tests/rdf/javaworld.xml new file mode 100644
index 0000000..dbbc6b0
--- /dev/null
+++ b/syndication/tests/rdf/javaworld.xml
@@ -0,0 +1,102 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+RDF 1.0 with the URIs <items> not matching the actual <item>s
+As not matching URIs is a common error, <items> should be ignored and the actual \
item descriptions should be returned (even if that doesn't make sense on RDF level) \
+bug 294354 +-->
+
+
+
+						    <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" \
xmlns="http://purl.org/rss/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/" \
xmlns:rss="http://purl.org/rss/1.0/" \
xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"> +       <channel \
rdf:about="http://www.javaworld.com/features/index.html"> +          <title>Featured \
Articles</title> +          <link>http://www.javaworld.com/features/index.html</link>
+          <description>JW RSS feed for Featured Articles</description>
+          <dc:publisher>Javaworld</dc:publisher>
+          <dc:rights>Copyright(C) 1994 - 2012 Javaworld</dc:rights>
+                    <items>
+             <rdf:Seq>
+															<rdf:li \
resource="http://www.javaworld.com/javaworld/jw-02-2012/120228-modernizing-it.html"/> \
+							<rdf:li resource="http://www.javaworld.com/javaworld/jw-02-2012/120224-bjarne-stroustrup-interview.html"/>
 +							<rdf:li resource="http://www.javaworld.com/javaworld/jw-02-2012/120222-node-js-tools-remake-server-side.html"/>
 +							<rdf:li resource="http://www.javaworld.com/javaworld/jw-02-2012/120216-fatal-exception.html"/>
 +							<rdf:li resource="http://www.javaworld.com/javaworld/jw-02-2012/120214-jtip-rss-for-android.html"/>
 +							<rdf:li resource="http://www.javaworld.com/javaworld/jw-02-2012/120208-mongodb-review.html"/>
 +							<rdf:li resource="http://www.javaworld.com/javaworld/jw-02-2012/120206-programmer-personality-types.html"/>
 +							<rdf:li resource="http://www.javaworld.com/javaworld/jw-02-2012/120202-environment-aware-maven-build.html"/>
 +							<rdf:li resource="http://www.javaworld.com/javaworld/jw-02-2012/120201-opa-programming.html"/>
 +							<rdf:li resource="http://www.javaworld.com/javaworld/jw-01-2012/120126-fatal-exception.html"/>
 +							             </rdf:Seq>
+          </items>
+       </channel>
+											<item rdf:about="http://www.javaworld.com//javaworld/jw-02-2012/120228-modernizing-it.html">
 +					<title>Electric Cloud CEO: We automate agile development</title>
+					<link>http://www.javaworld.com/javaworld/jw-02-2012/120228-modernizing-it.html</link>
 +					<description>An explosion of tools and platforms is making app dev much more \
complex. Mike Maciag, CEO of Electric Cloud, offers cross-platform workflow and \
automation to rationalize it all.</description> +					<dc:creator/>
+										<dc:date>2012-02-28T00:00:00Z</dc:date>
+				</item>
+							<item rdf:about="http://www.javaworld.com//javaworld/jw-02-2012/120224-bjarne-stroustrup-interview.html">
 +					<title>Stroustrup reveals what&apos;s new in C++ 11</title>
+					<link>http://www.javaworld.com/javaworld/jw-02-2012/120224-bjarne-stroustrup-interview.html</link>
 +					<description>Latest version of language isn&apos;t a major overhaul, but \
offers many small upgrades to appeal to different areas of development.</description> \
+					<dc:creator/> +										<dc:date>2012-02-24T00:00:00Z</dc:date>
+				</item>
+							<item rdf:about="http://www.javaworld.com//javaworld/jw-02-2012/120222-node-js-tools-remake-server-side.html">
 +					<title>Node.js tools: Server-side JavaScript comes of age</title>
+					<link>http://www.javaworld.com/javaworld/jw-02-2012/120222-node-js-tools-remake-server-side.html</link>
 +					<description>Node&apos;s ecosystem is growing at an exponential rate, which is \
only natural when you start from scratch. Still, the plethora of Node-inspired tools \
amount to a diverse and sustainable infrastructure, which could ultimatey redefine \
the web development stack. </description> +					<dc:creator/>
+										<dc:date>2012-02-22T00:00:00Z</dc:date>
+				</item>
+							<item rdf:about="http://www.javaworld.com//javaworld/jw-02-2012/120216-fatal-exception.html">
 +					<title>Google Chrome, HTML5, and the new Web platform</title>
+					<link>http://www.javaworld.com/javaworld/jw-02-2012/120216-fatal-exception.html</link>
 +					<description>The Chrome dev team is working toward a vision of Web apps that \
offers a clean break from traditional websites.</description> +					<dc:creator/>
+										<dc:date>2012-02-16T00:00:00Z</dc:date>
+				</item>
+							<item rdf:about="http://www.javaworld.com//javaworld/jw-02-2012/120214-jtip-rss-for-android.html">
 +					<title>Java tip: Set up an RSS feed for your Android application</title>
+					<link>http://www.javaworld.com/javaworld/jw-02-2012/120214-jtip-rss-for-android.html</link>
 +					<description>Get a hands-on introduction to using \
&lt;code&gt;javax.xml.parsers.SAXParser&lt;/code&gt; to parse an RSS feed in XML \
format. While some familiarity with Java application development is assumed, this \
Java tip is suitable for developers new to Java mobile development.</description> \
+					<dc:creator/> +										<dc:date>2012-02-14T00:00:00Z</dc:date>
+				</item>
+							<item rdf:about="http://www.javaworld.com//javaworld/jw-02-2012/120208-mongodb-review.html">
 +					<title>Flexing NoSQL: MongoDB in review</title>
+					<link>http://www.javaworld.com/javaworld/jw-02-2012/120208-mongodb-review.html</link>
 +					<description>MongoDB shines with broad programming language support, SQL-like \
queries, and out-of-the-box scaling. Check the InfoWorld Review score card to see how \
MongoDB stacks up among NoSQL data stores.</description> +					<dc:creator/>
+										<dc:date>2012-02-08T00:00:00Z</dc:date>
+				</item>
+							<item rdf:about="http://www.javaworld.com//javaworld/jw-02-2012/120206-programmer-personality-types.html">
 +					<title>Programmer personality types: 13 profiles in code</title>
+					<link>http://www.javaworld.com/javaworld/jw-02-2012/120206-programmer-personality-types.html</link>
 +					<description>From Underdocumenters to flat-out Fakers, the world of software \
development is full of characters. Here, InfoWorld&apos;s Peter Wayner offers 13 \
developer personality profiles based on coding style.</description> \
+					<dc:creator/> +										<dc:date>2012-02-06T00:00:00Z</dc:date>
+				</item>
+							<item rdf:about="http://www.javaworld.com//javaworld/jw-02-2012/120202-environment-aware-maven-build.html">
 +					<title>Develop an environment-aware Maven build process</title>
+					<link>http://www.javaworld.com/javaworld/jw-02-2012/120202-environment-aware-maven-build.html</link>
 +					<description>Including environment variables in your Maven build process could \
boost your team&apos;s  efficiency at every stage of the software development \
lifecycle. Java developer Paul Spinelli demonstrates his custom approach to \
environment-aware Maven builds.</description> +					<dc:creator/>
+										<dc:date>2012-02-02T00:00:00Z</dc:date>
+				</item>
+							<item rdf:about="http://www.javaworld.com//javaworld/jw-02-2012/120201-opa-programming.html">
 +					<title>Programming Opa: Web development, reimagined</title>
+					<link>http://www.javaworld.com/javaworld/jw-02-2012/120201-opa-programming.html</link>
 +					<description>MLstate&apos;s Opa streamlines Web app development with a single \
language for client and server, but the bright promise is not without \
pitfalls.</description> +					<dc:creator/>
+										<dc:date>2012-02-01T00:00:00Z</dc:date>
+				</item>
+							<item rdf:about="http://www.javaworld.com//javaworld/jw-01-2012/120126-fatal-exception.html">
 +					<title>Here&apos;s how to solve America&apos;s developer shortage</title>
+					<link>http://www.javaworld.com/javaworld/jw-01-2012/120126-fatal-exception.html</link>
 +					<description>Employers say they can&apos;t find enough workers to staff IT \
jobs. Maybe they&apos;ve been going about it all wrong.</description> \
+					<dc:creator/> +										<dc:date>2012-01-26T00:00:00Z</dc:date>
+				</item>
+							    </rdf:RDF>
diff --git a/syndication/tests/rdf/javaworld.xml.expected \
b/syndication/tests/rdf/javaworld.xml.expected new file mode 100644
index 0000000..5d3e0aa
--- /dev/null
+++ b/syndication/tests/rdf/javaworld.xml.expected
@@ -0,0 +1,86 @@
+# Feed begin ######################
+title: #Featured Articles#
+link: #http://www.javaworld.com/features/index.html#
+description: #JW RSS feed for Featured Articles#
+copyright: #Copyright(C) 1994 - 2012 Javaworld#
+# Item begin ######################
+id: #http://www.javaworld.com//javaworld/jw-02-2012/120201-opa-programming.html#
+title: #Programming Opa: Web development, reimagined#
+link: #http://www.javaworld.com/javaworld/jw-02-2012/120201-opa-programming.html#
+description: #MLstate's Opa streamlines Web app development with a single language \
for client and server, but the bright promise is not without pitfalls.# \
+datePublished: #Wed Feb 1 01:00:00 2012# +dateUpdated: #Wed Feb 1 01:00:00 2012#
+# Item end ########################
+# Item begin ######################
+id: #http://www.javaworld.com//javaworld/jw-02-2012/120222-node-js-tools-remake-server-side.html#
 +title: #Node.js tools: Server-side JavaScript comes of age#
+link: #http://www.javaworld.com/javaworld/jw-02-2012/120222-node-js-tools-remake-server-side.html#
 +description: #Node's ecosystem is growing at an exponential rate, which is only \
natural when you start from scratch. Still, the plethora of Node-inspired tools \
amount to a diverse and sustainable infrastructure, which could ultimatey redefine \
the web development stack.# +datePublished: #Wed Feb 22 01:00:00 2012#
+dateUpdated: #Wed Feb 22 01:00:00 2012#
+# Item end ########################
+# Item begin ######################
+id: #http://www.javaworld.com//javaworld/jw-02-2012/120216-fatal-exception.html#
+title: #Google Chrome, HTML5, and the new Web platform#
+link: #http://www.javaworld.com/javaworld/jw-02-2012/120216-fatal-exception.html#
+description: #The Chrome dev team is working toward a vision of Web apps that offers \
a clean break from traditional websites.# +datePublished: #Thu Feb 16 01:00:00 2012#
+dateUpdated: #Thu Feb 16 01:00:00 2012#
+# Item end ########################
+# Item begin ######################
+id: #http://www.javaworld.com//javaworld/jw-02-2012/120208-mongodb-review.html#
+title: #Flexing NoSQL: MongoDB in review#
+link: #http://www.javaworld.com/javaworld/jw-02-2012/120208-mongodb-review.html#
+description: #MongoDB shines with broad programming language support, SQL-like \
queries, and out-of-the-box scaling. Check the InfoWorld Review score card to see how \
MongoDB stacks up among NoSQL data stores.# +datePublished: #Wed Feb 8 01:00:00 2012#
+dateUpdated: #Wed Feb 8 01:00:00 2012#
+# Item end ########################
+# Item begin ######################
+id: #http://www.javaworld.com//javaworld/jw-02-2012/120224-bjarne-stroustrup-interview.html#
 +title: #Stroustrup reveals what&apos;s new in C++ 11#
+link: #http://www.javaworld.com/javaworld/jw-02-2012/120224-bjarne-stroustrup-interview.html#
 +description: #Latest version of language isn't a major overhaul, but offers many \
small upgrades to appeal to different areas of development.# +datePublished: #Fri Feb \
24 01:00:00 2012# +dateUpdated: #Fri Feb 24 01:00:00 2012#
+# Item end ########################
+# Item begin ######################
+id: #http://www.javaworld.com//javaworld/jw-02-2012/120206-programmer-personality-types.html#
 +title: #Programmer personality types: 13 profiles in code#
+link: #http://www.javaworld.com/javaworld/jw-02-2012/120206-programmer-personality-types.html#
 +description: #From Underdocumenters to flat-out Fakers, the world of software \
development is full of characters. Here, InfoWorld's Peter Wayner offers 13 developer \
personality profiles based on coding style.# +datePublished: #Mon Feb 6 01:00:00 \
2012# +dateUpdated: #Mon Feb 6 01:00:00 2012#
+# Item end ########################
+# Item begin ######################
+id: #http://www.javaworld.com//javaworld/jw-02-2012/120202-environment-aware-maven-build.html#
 +title: #Develop an environment-aware Maven build process#
+link: #http://www.javaworld.com/javaworld/jw-02-2012/120202-environment-aware-maven-build.html#
 +description: #Including environment variables in your Maven build process could \
boost your team's  efficiency at every stage of the software development lifecycle. \
Java developer Paul Spinelli demonstrates his custom approach to environment-aware \
Maven builds.# +datePublished: #Thu Feb 2 01:00:00 2012#
+dateUpdated: #Thu Feb 2 01:00:00 2012#
+# Item end ########################
+# Item begin ######################
+id: #http://www.javaworld.com//javaworld/jw-02-2012/120228-modernizing-it.html#
+title: #Electric Cloud CEO: We automate agile development#
+link: #http://www.javaworld.com/javaworld/jw-02-2012/120228-modernizing-it.html#
+description: #An explosion of tools and platforms is making app dev much more \
complex. Mike Maciag, CEO of Electric Cloud, offers cross-platform workflow and \
automation to rationalize it all.# +datePublished: #Tue Feb 28 01:00:00 2012#
+dateUpdated: #Tue Feb 28 01:00:00 2012#
+# Item end ########################
+# Item begin ######################
+id: #http://www.javaworld.com//javaworld/jw-02-2012/120214-jtip-rss-for-android.html#
 +title: #Java tip: Set up an RSS feed for your Android application#
+link: #http://www.javaworld.com/javaworld/jw-02-2012/120214-jtip-rss-for-android.html#
 +description: #Get a hands-on introduction to using \
<code>javax.xml.parsers.SAXParser</code> to parse an RSS feed in XML format. While \
some familiarity with Java application development is assumed, this Java tip is \
suitable for developers new to Java mobile development.# +datePublished: #Tue Feb 14 \
01:00:00 2012# +dateUpdated: #Tue Feb 14 01:00:00 2012#
+# Item end ########################
+# Item begin ######################
+id: #http://www.javaworld.com//javaworld/jw-01-2012/120126-fatal-exception.html#
+title: #Here&apos;s how to solve America&apos;s developer shortage#
+link: #http://www.javaworld.com/javaworld/jw-01-2012/120126-fatal-exception.html#
+description: #Employers say they can't find enough workers to staff IT jobs. Maybe \
they've been going about it all wrong.# +datePublished: #Thu Jan 26 01:00:00 2012#
+dateUpdated: #Thu Jan 26 01:00:00 2012#
+# Item end ########################
+# Feed end ########################
diff --git a/syndication/tests/rdf/tagesschau.de.xml \
b/syndication/tests/rdf/tagesschau.de.xml index 27b9ea1..fbf6906 100644
--- a/syndication/tests/rdf/tagesschau.de.xml
+++ b/syndication/tests/rdf/tagesschau.de.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="ISO-8859-1"?><rdf:RDF \
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" \
xmlns="http://my.netscape.com/rdf/simple/0.9/">  <!-- Source: www.tagesschau.de
      Properties:
-       - no-thrills RDF 1.0 feed
+       - no-thrills RDF 0.9 feed
        - unescaped quotes in text
 -->
 <!--
@@ -41,4 +41,4 @@
 
 	
 	
-</rdf:RDF>
\ No newline at end of file
+</rdf:RDF>


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic